huffman-py/huffman_py/functions/decode.py

#
#     Sam Hadow - Huffman-py
#     Copyright (C) 2023
#
#     This program is free software: you can redistribute it and/or modify
#     it under the terms of the GNU General Public License as published by
#     the Free Software Foundation, either version 3 of the License, or
#     (at your option) any later version.
#
#     This program is distributed in the hope that it will be useful,
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#     GNU General Public License for more details.
#
#     You should have received a copy of the GNU General Public License
#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
import re

def huffman_decode(encodedData, current_node):
        root = current_node
        decodedOutput = []

        # if single node in source tree (unique char in the text)
        if (not root.left and not root.right):
            string = ''.join([root.char for _ in encodedData])
            return string

        # else
        for x in encodedData:
            if x == '1':
                current_node = current_node.right
            elif x == '0':
                current_node = current_node.left

            # If internal node we keep going down, else (a leaf) we can decode a part of the binary.
            try:
                # internal node
                if current_node.left.char == None and current_node.right.char == None:
                    pass
            except AttributeError:
                # leaf
                decodedOutput.append(current_node.char)
                current_node = root

        if current_node != root and (current_node.right !=None or current_node.left  != None):
            # If we end on an internal node then source tree wasn't the correct tree.
            raise ValueError ("Tree and binary don't correspond.")

        string = ''.join([str(item) for item in decodedOutput])
        return string

def decode_from_dict(encodedData, dict_):
    # we have a Dict like this: {char:code}
    # we convert it to a Dict like that: {code:char} (both codes and chars are unique)
    dict_ = {value:key for key,value in dict_.items()}
    text = str(encodedData)

    # we check if we have a binary
    invalid_char  =  re.compile('[^01]')
    if invalid_char.search(text):
        raise TypeError ('Input text must be a binary.')

    decoded = ''
    sorted_dict = dict(sorted(dict_.items(), key=lambda x: len(x[0]), reverse=False))
    while len(text) > 0:
        for i,binary in enumerate(sorted_dict.keys()):
            if binary == text[0:len(binary)]:
                # If we have this binary part in our Dict we can decode a char
                decoded += sorted_dict[binary]
                # delete decoded part
                text = text[len(binary):]
                # next while iteration
                break

            # if nothing corresponds then it's not the correct Dict
            elif i == len(sorted_dict.keys())-1:
                raise ValueError ("Can't convert text with current Dict.")
    return decoded