huffman-py/huffman_py/functions/decode.py

#
#     Sam Hadow - Huffman-py
#     Copyright (C) 2023
#
#     This program is free software: you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation, either version 3 of the License, or
#     (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU General Public License
#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
import re

def huffman_decode(encodedData, current_node):
        root = current_node
        decodedOutput = []

        # if single node in source tree (unique char in the text)
        if (not root.left and not root.right):
            string = ''.join([root.char for _ in encodedData])
            return string

        # else
        for x in encodedData:
            if x == '1':
                current_node = current_node.right
            elif x == '0':
                current_node = current_node.left

            # If internal node we keep going down, else (a leaf) we can decode a part of the binary.
            try:
                # internal node
                if current_node.left.char == None and current_node.right.char == None:
                    pass
            except AttributeError:
                # leaf
                decodedOutput.append(current_node.char)
                current_node = root

        if current_node != root and (current_node.right !=None or current_node.left  != None):
            # If we end on an internal node then source tree wasn't the correct tree.
            raise ValueError ("Tree and binary don't correspond.")

        string = ''.join([str(item) for item in decodedOutput])
        return string

def decode_from_dict(encodedData, dict_):
    # we have a Dict like this: {char:code}
    # we convert it to a Dict like that: {code:char} (both codes and chars are unique)
    dict_ = {value:key for key,value in dict_.items()}
    text = str(encodedData)

    # we check if we have a binary
    invalid_char  =  re.compile('[^01]')
    if invalid_char.search(text):
        raise TypeError ('Input text must be a binary.')

    decoded = ''
    sorted_dict = dict(sorted(dict_.items(), key=lambda x: len(x[0]), reverse=False))
    while len(text) > 0:
        for i,binary in enumerate(sorted_dict.keys()):
            if binary == text[0:len(binary)]:
                # If we have this binary part in our Dict we can decode a char
                decoded += sorted_dict[binary]
                # delete decoded part
                text = text[len(binary):]
                # next while iteration
                break

            # if nothing corresponds then it's not the correct Dict
            elif i == len(sorted_dict.keys())-1:
                raise ValueError ("Can't convert text with current Dict.")
    return decoded
Upload files to 'huffman_py/functions' 2023-05-25 02:08:43 +02:00			`#`
			`# Sam Hadow - Huffman-py`
			`# Copyright (C) 2023`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`
			`#`
			`import re`

			`def huffman_decode(encodedData, current_node):`
			`root = current_node`
			`decodedOutput = []`

			`# if single node in source tree (unique char in the text)`
			`if (not root.left and not root.right):`
			`string = ''.join([root.char for _ in encodedData])`
			`return string`

			`# else`
			`for x in encodedData:`
			`if x == '1':`
			`current_node = current_node.right`
			`elif x == '0':`
			`current_node = current_node.left`

			`# If internal node we keep going down, else (a leaf) we can decode a part of the binary.`
			`try:`
			`# internal node`
			`if current_node.left.char == None and current_node.right.char == None:`
			`pass`
			`except AttributeError:`
			`# leaf`
			`decodedOutput.append(current_node.char)`
			`current_node = root`

			`if current_node != root and (current_node.right !=None or current_node.left != None):`
			`# If we end on an internal node then source tree wasn't the correct tree.`
			`raise ValueError ("Tree and binary don't correspond.")`

			`string = ''.join([str(item) for item in decodedOutput])`
			`return string`

			`def decode_from_dict(encodedData, dict_):`
			`# we have a Dict like this: {char:code}`
			`# we convert it to a Dict like that: {code:char} (both codes and chars are unique)`
			`dict_ = {value:key for key,value in dict_.items()}`
			`text = str(encodedData)`

			`# we check if we have a binary`
			`invalid_char = re.compile('[^01]')`
			`if invalid_char.search(text):`
			`raise TypeError ('Input text must be a binary.')`

			`decoded = ''`
			`sorted_dict = dict(sorted(dict_.items(), key=lambda x: len(x[0]), reverse=False))`
			`while len(text) > 0:`
			`for i,binary in enumerate(sorted_dict.keys()):`
			`if binary == text[0:len(binary)]:`
			`# If we have this binary part in our Dict we can decode a char`
			`decoded += sorted_dict[binary]`
			`# delete decoded part`
			`text = text[len(binary):]`
			`# next while iteration`
			`break`

			`# if nothing corresponds then it's not the correct Dict`
			`elif i == len(sorted_dict.keys())-1:`
			`raise ValueError ("Can't convert text with current Dict.")`
			`return decoded`