From 2e6a648ada94998199dedb1156494571c124c38a Mon Sep 17 00:00:00 2001 From: "sam.hadow" Date: Thu, 25 May 2023 00:39:59 +0200 Subject: [PATCH] Upload files to 'huffman_py/fonctions' --- huffman_py/fonctions/affichage_binaire.py | 25 +++++++ huffman_py/fonctions/calcul_code.py | 40 +++++++++++ huffman_py/fonctions/decode.py | 81 +++++++++++++++++++++++ huffman_py/fonctions/encode.py | 65 ++++++++++++++++++ huffman_py/fonctions/ifFileGetContent.py | 27 ++++++++ 5 files changed, 238 insertions(+) create mode 100644 huffman_py/fonctions/affichage_binaire.py create mode 100644 huffman_py/fonctions/calcul_code.py create mode 100644 huffman_py/fonctions/decode.py create mode 100644 huffman_py/fonctions/encode.py create mode 100644 huffman_py/fonctions/ifFileGetContent.py diff --git a/huffman_py/fonctions/affichage_binaire.py b/huffman_py/fonctions/affichage_binaire.py new file mode 100644 index 0000000..7207b82 --- /dev/null +++ b/huffman_py/fonctions/affichage_binaire.py @@ -0,0 +1,25 @@ +# +# Sam Hadow - Huffman-py +# Copyright (C) 2023 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +def affichage_binaire(the_data, coding): + encodingOutput = [] + for element in the_data: + # print(coding[element], end = '') + encodingOutput.append(coding[element]) + + the_string = ''.join([str(item) for item in encodingOutput]) + return the_string diff --git a/huffman_py/fonctions/calcul_code.py b/huffman_py/fonctions/calcul_code.py new file mode 100644 index 0000000..f1f27aa --- /dev/null +++ b/huffman_py/fonctions/calcul_code.py @@ -0,0 +1,40 @@ +# +# Sam Hadow - Huffman-py +# Copyright (C) 2023 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +def calcul_code(node, value = '', les_codes = None): + # on crée le dict des codes s'il n'existe pas encore (lors du premier passage dans la fonction) + if les_codes == None: + les_codes = dict() + + # gérer le cas où on a qu'un seul noeud, on lui attribuera le code 0 par défaut + # à noter que dans ce cas là l'algorithme d'Huffman pour la compression est peu utile + if(not node.left and not node.right): + les_codes[node.lettres] = "0" + return les_codes + + #code pour le sommet actuel + newValue = value + str(node.code) + + if(node.left): + calcul_code(node.left, newValue, les_codes) + if(node.right): + calcul_code(node.right, newValue, les_codes) + + if(not node.left and not node.right): + les_codes[node.lettres] = newValue + + return les_codes diff --git a/huffman_py/fonctions/decode.py b/huffman_py/fonctions/decode.py new file mode 100644 index 0000000..679979f --- /dev/null +++ b/huffman_py/fonctions/decode.py @@ -0,0 +1,81 @@ +# +# Sam Hadow - Huffman-py +# Copyright (C) 2023 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +import re + +def huffman_decode(encodedData, noeud_actuel): + racine = noeud_actuel + decodedOutput = [] + + # cas particulier, si on a qu'un seul sommet dans l'arbre (lettre unique dans le texte d'origine) + if (not racine.left and not racine.right): + string = ''.join([racine.lettres for _ in encodedData]) + return string + + # cas normal + for x in encodedData: + if x == '1': + noeud_actuel = noeud_actuel.right + elif x == '0': + noeud_actuel = noeud_actuel.left + + # On regarde si on est encore dans les noeuds internes, sinon (pour une feuille) on peut décoder une lettre. + try: + # noeud interne + if noeud_actuel.left.lettres == None and noeud_actuel.right.lettres == None: + pass + except AttributeError: + # feuille + decodedOutput.append(noeud_actuel.lettres) + noeud_actuel = racine + + if noeud_actuel != racine and (noeud_actuel.right !=None or noeud_actuel.left != None): + # si on finit dans un noeud interne c'est que l'arbre ne correspondait pas au texte encodé. + raise ValueError ("L'arbre ne correspond pas en texte encodé.") + + string = ''.join([str(item) for item in decodedOutput]) + return string + +def decode_from_dico(encodedData, dico): + # on suppose le dico en entrée de la forme {lettre:binaire} (le même que celui affiché dans la console par l'encodage) + # on inverse d'abord les paires key:value (les lettres seront uniques comme les codes) + dico = {value:key for key,value in dico.items()} + texte = str(encodedData) + + # on regarde si on a bien un texte en binaire en entrée + char_invalide = re.compile('[^01]') + if char_invalide.search(texte): + raise TypeError ('Le texte en entrée doit être en binaire.') + + decoded = '' + sorted_dict = dict(sorted(dico.items(), key=lambda x: len(x[0]), reverse=False)) + while len(texte) > 0: + for i,binaire in enumerate(sorted_dict.keys()): + if binaire == texte[0:len(binaire)]: + # on décode si on trouve la portion du texte codé dans nos codes + decoded += sorted_dict[binaire] + # on supprime la partie qui vient d'être décodée + texte = texte[len(binaire):] + # on passe à l'itération suivante du while + break + + # si aucun code correspond après avoir parcouru le dictionnaire on lève une exception + elif i == len(sorted_dict.keys())-1: + raise ValueError ('Impossible de convertir le texte avec le dictionnaire fourni.') + return decoded + + diff --git a/huffman_py/fonctions/encode.py b/huffman_py/fonctions/encode.py new file mode 100644 index 0000000..f7fab4a --- /dev/null +++ b/huffman_py/fonctions/encode.py @@ -0,0 +1,65 @@ +# +# Sam Hadow - Huffman-py +# Copyright (C) 2023 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +from huffman_py.fonctions.occurence import * +from huffman_py.fonctions.calcul_code import * +from huffman_py.fonctions.affichage_binaire import * +from huffman_py.Sommets import Sommets +from huffman_py.Arbre import Arbre + +def huffman_encode(the_data): + lettres_et_occurences = calcul_occurence(the_data) + + # affichage des lettres et occurences associées (vérification bon fonctionnement) + les_lettres = lettres_et_occurences.keys() + les_occurences = lettres_et_occurences.values() + print("lettres: ", les_lettres) + print("occurences: ", les_occurences) + + les_sommets = [] + + # conversion des lettres et occurences en sommets + for lettres in les_lettres: + les_sommets.append(Sommets(lettres_et_occurences.get(lettres), lettres)) + + while len(les_sommets) > 1: + # tri des sommets par ordre croissant + les_sommets = sorted(les_sommets, key = lambda x: x.occurrence) + #for node in les_sommets: + # print(node.lettres, node.occurrence) + + # on récupère les 2 plus petits sommets + left = les_sommets[0] + right = les_sommets[1] + + left.code = 0 + right.code = 1 + + # on combine les 2 sommets précédents + nouveau_sommet = Sommets(left.occurrence + right.occurrence, left.lettres + right.lettres, left, right) + #nouveau_sommet = Sommets(left.occurrence + right.occurrence, 'a', left, right) + + les_sommets.remove(left) + les_sommets.remove(right) + les_sommets.append(nouveau_sommet) + + mon_arbre = Arbre(les_sommets[0]) + + huffmanEncoding = calcul_code(les_sommets[0]) + print("lettres avec codes", huffmanEncoding) + encodedOutput = affichage_binaire(the_data,huffmanEncoding) + return encodedOutput, les_sommets[0], huffmanEncoding diff --git a/huffman_py/fonctions/ifFileGetContent.py b/huffman_py/fonctions/ifFileGetContent.py new file mode 100644 index 0000000..54d4816 --- /dev/null +++ b/huffman_py/fonctions/ifFileGetContent.py @@ -0,0 +1,27 @@ +# +# Sam Hadow - Huffman-py +# Copyright (C) 2023 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +import os + +def ifFileGetContent(string): + '''prend un str en entrée, si le str est un chemin vers un fichier, retourne le contenu de ce fichier, sinon retourne le str''' + if os.path.isfile(string): + with open(string, 'r') as fd: + content = fd.read() + return content + else: + return string