From 2e6a648ada94998199dedb1156494571c124c38a Mon Sep 17 00:00:00 2001
From: "sam.hadow" <sam.hadow@inbox.lv>
Date: Thu, 25 May 2023 00:39:59 +0200
Subject: [PATCH] Upload files to 'huffman_py/fonctions'

---
 huffman_py/fonctions/affichage_binaire.py | 25 +++++++
 huffman_py/fonctions/calcul_code.py       | 40 +++++++++++
 huffman_py/fonctions/decode.py            | 81 +++++++++++++++++++++++
 huffman_py/fonctions/encode.py            | 65 ++++++++++++++++++
 huffman_py/fonctions/ifFileGetContent.py  | 27 ++++++++
 5 files changed, 238 insertions(+)
 create mode 100644 huffman_py/fonctions/affichage_binaire.py
 create mode 100644 huffman_py/fonctions/calcul_code.py
 create mode 100644 huffman_py/fonctions/decode.py
 create mode 100644 huffman_py/fonctions/encode.py
 create mode 100644 huffman_py/fonctions/ifFileGetContent.py

diff --git a/huffman_py/fonctions/affichage_binaire.py b/huffman_py/fonctions/affichage_binaire.py
new file mode 100644
index 0000000..7207b82
--- /dev/null
+++ b/huffman_py/fonctions/affichage_binaire.py
@@ -0,0 +1,25 @@
+#
+#     Sam Hadow - Huffman-py
+#     Copyright (C) 2023
+#
+#     This program is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+#
+#     This program is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU General Public License for more details.
+#
+#     You should have received a copy of the GNU General Public License
+#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+def affichage_binaire(the_data, coding):
+        encodingOutput = []
+        for element in the_data:
+            # print(coding[element], end = '')
+            encodingOutput.append(coding[element])
+
+        the_string = ''.join([str(item) for item in encodingOutput])
+        return the_string
diff --git a/huffman_py/fonctions/calcul_code.py b/huffman_py/fonctions/calcul_code.py
new file mode 100644
index 0000000..f1f27aa
--- /dev/null
+++ b/huffman_py/fonctions/calcul_code.py
@@ -0,0 +1,40 @@
+#
+#     Sam Hadow - Huffman-py
+#     Copyright (C) 2023
+#
+#     This program is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+#
+#     This program is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU General Public License for more details.
+#
+#     You should have received a copy of the GNU General Public License
+#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+def calcul_code(node, value = '', les_codes = None):
+        # on crée le dict des codes s'il n'existe pas encore (lors du premier passage dans la fonction)
+        if les_codes == None:
+            les_codes = dict()
+
+            # gérer le cas où on a qu'un seul noeud, on lui attribuera le code 0 par défaut
+            # à noter que dans ce cas là l'algorithme d'Huffman pour la compression est peu utile
+            if(not node.left and not node.right):
+                les_codes[node.lettres] = "0"
+                return les_codes
+
+        #code pour le sommet actuel
+        newValue = value + str(node.code)
+
+        if(node.left):
+            calcul_code(node.left, newValue, les_codes)
+        if(node.right):
+            calcul_code(node.right, newValue, les_codes)
+
+        if(not node.left and not node.right):
+            les_codes[node.lettres] = newValue
+
+        return les_codes
diff --git a/huffman_py/fonctions/decode.py b/huffman_py/fonctions/decode.py
new file mode 100644
index 0000000..679979f
--- /dev/null
+++ b/huffman_py/fonctions/decode.py
@@ -0,0 +1,81 @@
+#
+#     Sam Hadow - Huffman-py
+#     Copyright (C) 2023
+#
+#     This program is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+#
+#     This program is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU General Public License for more details.
+#
+#     You should have received a copy of the GNU General Public License
+#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+import re
+
+def huffman_decode(encodedData, noeud_actuel):
+        racine = noeud_actuel
+        decodedOutput = []
+
+        # cas particulier, si on a qu'un seul sommet dans l'arbre (lettre unique dans le texte d'origine)
+        if (not racine.left and not racine.right):
+            string = ''.join([racine.lettres for _ in encodedData])
+            return string
+
+        # cas normal
+        for x in encodedData:
+            if x == '1':
+                noeud_actuel = noeud_actuel.right
+            elif x == '0':
+                noeud_actuel = noeud_actuel.left
+
+            # On regarde si on est encore dans les noeuds internes, sinon (pour une feuille) on peut décoder une lettre.
+            try:
+                # noeud interne
+                if noeud_actuel.left.lettres == None and noeud_actuel.right.lettres == None:
+                    pass
+            except AttributeError:
+                # feuille
+                decodedOutput.append(noeud_actuel.lettres)
+                noeud_actuel = racine
+
+        if noeud_actuel != racine and (noeud_actuel.right !=None or noeud_actuel.left  != None):
+            # si on finit dans un noeud interne c'est que l'arbre ne correspondait pas au texte encodé.
+            raise ValueError ("L'arbre ne correspond pas en texte encodé.")
+
+        string = ''.join([str(item) for item in decodedOutput])
+        return string
+
+def decode_from_dico(encodedData, dico):
+    # on suppose le dico en entrée de la forme {lettre:binaire} (le même que celui affiché dans la console par l'encodage)
+    # on inverse d'abord les paires key:value (les lettres seront uniques comme les codes)
+    dico = {value:key for key,value in dico.items()}
+    texte = str(encodedData)
+
+    # on regarde si on a bien un texte en binaire en entrée
+    char_invalide  =  re.compile('[^01]')
+    if char_invalide.search(texte):
+        raise TypeError ('Le texte en entrée doit être en binaire.')
+
+    decoded = ''
+    sorted_dict = dict(sorted(dico.items(), key=lambda x: len(x[0]), reverse=False))
+    while len(texte) > 0:
+        for i,binaire in enumerate(sorted_dict.keys()):
+            if binaire == texte[0:len(binaire)]:
+                # on décode si on trouve la portion du texte codé dans nos codes
+                decoded += sorted_dict[binaire]
+                # on supprime la partie qui vient d'être décodée
+                texte = texte[len(binaire):]
+                # on passe à l'itération suivante du while
+                break
+
+            # si aucun code correspond après avoir parcouru le dictionnaire on lève une exception
+            elif i == len(sorted_dict.keys())-1:
+                raise ValueError ('Impossible de  convertir le texte avec le dictionnaire fourni.')
+    return decoded
+
+
diff --git a/huffman_py/fonctions/encode.py b/huffman_py/fonctions/encode.py
new file mode 100644
index 0000000..f7fab4a
--- /dev/null
+++ b/huffman_py/fonctions/encode.py
@@ -0,0 +1,65 @@
+#
+#     Sam Hadow - Huffman-py
+#     Copyright (C) 2023
+#
+#     This program is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+#
+#     This program is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU General Public License for more details.
+#
+#     You should have received a copy of the GNU General Public License
+#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+from huffman_py.fonctions.occurence import *
+from huffman_py.fonctions.calcul_code import *
+from huffman_py.fonctions.affichage_binaire import *
+from huffman_py.Sommets import Sommets
+from huffman_py.Arbre import Arbre
+
+def huffman_encode(the_data):
+        lettres_et_occurences = calcul_occurence(the_data)
+
+        # affichage des lettres et occurences associées (vérification bon fonctionnement)
+        les_lettres = lettres_et_occurences.keys()
+        les_occurences = lettres_et_occurences.values()
+        print("lettres: ", les_lettres)
+        print("occurences: ", les_occurences)
+
+        les_sommets = []
+
+        # conversion des lettres et occurences en sommets
+        for lettres in les_lettres:
+            les_sommets.append(Sommets(lettres_et_occurences.get(lettres), lettres))
+
+        while len(les_sommets) > 1:
+            # tri des sommets par ordre croissant
+            les_sommets = sorted(les_sommets, key = lambda x: x.occurrence)
+            #for node in les_sommets:
+            #    print(node.lettres, node.occurrence)
+
+            # on récupère les 2 plus petits sommets
+            left = les_sommets[0]
+            right = les_sommets[1]
+
+            left.code = 0
+            right.code = 1
+
+            # on combine les 2 sommets précédents
+            nouveau_sommet = Sommets(left.occurrence + right.occurrence, left.lettres + right.lettres, left, right)
+            #nouveau_sommet = Sommets(left.occurrence + right.occurrence, 'a', left, right)
+
+            les_sommets.remove(left)
+            les_sommets.remove(right)
+            les_sommets.append(nouveau_sommet)
+
+        mon_arbre = Arbre(les_sommets[0])
+
+        huffmanEncoding = calcul_code(les_sommets[0])
+        print("lettres avec codes", huffmanEncoding)
+        encodedOutput = affichage_binaire(the_data,huffmanEncoding)
+        return encodedOutput, les_sommets[0], huffmanEncoding
diff --git a/huffman_py/fonctions/ifFileGetContent.py b/huffman_py/fonctions/ifFileGetContent.py
new file mode 100644
index 0000000..54d4816
--- /dev/null
+++ b/huffman_py/fonctions/ifFileGetContent.py
@@ -0,0 +1,27 @@
+#
+#     Sam Hadow - Huffman-py
+#     Copyright (C) 2023
+#
+#     This program is free software: you can redistribute it and/or modify
+#     it under the terms of the GNU General Public License as published by
+#     the Free Software Foundation, either version 3 of the License, or
+#     (at your option) any later version.
+#
+#     This program is distributed in the hope that it will be useful,
+#     but WITHOUT ANY WARRANTY; without even the implied warranty of
+#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#     GNU General Public License for more details.
+#
+#     You should have received a copy of the GNU General Public License
+#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+import os
+
+def ifFileGetContent(string):
+    '''prend un str en entrée, si le str est un chemin vers un fichier, retourne le contenu de ce fichier, sinon retourne le str'''
+    if os.path.isfile(string):
+        with open(string, 'r') as fd:
+            content = fd.read()
+        return content
+    else:
+        return string