82 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			82 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#
 | 
						|
#     Sam Hadow - Huffman-py
 | 
						|
#     Copyright (C) 2023
 | 
						|
#
 | 
						|
#     This program is free software: you can redistribute it and/or modify
 | 
						|
#     it under the terms of the GNU General Public License as published by
 | 
						|
#     the Free Software Foundation, either version 3 of the License, or
 | 
						|
#     (at your option) any later version.
 | 
						|
#
 | 
						|
#     This program is distributed in the hope that it will be useful,
 | 
						|
#     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
#     GNU General Public License for more details.
 | 
						|
#
 | 
						|
#     You should have received a copy of the GNU General Public License
 | 
						|
#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
#
 | 
						|
import re
 | 
						|
 | 
						|
def huffman_decode(encodedData, current_node):
 | 
						|
        root = current_node
 | 
						|
        decodedOutput = []
 | 
						|
 | 
						|
        # if single node in source tree (unique char in the text)
 | 
						|
        if (not root.left and not root.right):
 | 
						|
            string = ''.join([root.char for _ in encodedData])
 | 
						|
            return string
 | 
						|
 | 
						|
        # else
 | 
						|
        for x in encodedData:
 | 
						|
            if x == '1':
 | 
						|
                current_node = current_node.right
 | 
						|
            elif x == '0':
 | 
						|
                current_node = current_node.left
 | 
						|
 | 
						|
            # If internal node we keep going down, else (a leaf) we can decode a part of the binary.
 | 
						|
            try:
 | 
						|
                # internal node
 | 
						|
                if current_node.left.char == None and current_node.right.char == None:
 | 
						|
                    pass
 | 
						|
            except AttributeError:
 | 
						|
                # leaf
 | 
						|
                decodedOutput.append(current_node.char)
 | 
						|
                current_node = root
 | 
						|
 | 
						|
        if current_node != root and (current_node.right !=None or current_node.left  != None):
 | 
						|
            # If we end on an internal node then source tree wasn't the correct tree.
 | 
						|
            raise ValueError ("Tree and binary don't correspond.")
 | 
						|
 | 
						|
        string = ''.join([str(item) for item in decodedOutput])
 | 
						|
        return string
 | 
						|
 | 
						|
def decode_from_dict(encodedData, dict_):
 | 
						|
    # we have a Dict like this: {char:code}
 | 
						|
    # we convert it to a Dict like that: {code:char} (both codes and chars are unique)
 | 
						|
    dict_ = {value:key for key,value in dict_.items()}
 | 
						|
    text = str(encodedData)
 | 
						|
 | 
						|
    # we check if we have a binary
 | 
						|
    invalid_char  =  re.compile('[^01]')
 | 
						|
    if invalid_char.search(text):
 | 
						|
        raise TypeError ('Input text must be a binary.')
 | 
						|
 | 
						|
    decoded = ''
 | 
						|
    sorted_dict = dict(sorted(dict_.items(), key=lambda x: len(x[0]), reverse=False))
 | 
						|
    while len(text) > 0:
 | 
						|
        for i,binary in enumerate(sorted_dict.keys()):
 | 
						|
            if binary == text[0:len(binary)]:
 | 
						|
                # If we have this binary part in our Dict we can decode a char
 | 
						|
                decoded += sorted_dict[binary]
 | 
						|
                # delete decoded part
 | 
						|
                text = text[len(binary):]
 | 
						|
                # next while iteration
 | 
						|
                break
 | 
						|
 | 
						|
            # if nothing corresponds then it's not the correct Dict
 | 
						|
            elif i == len(sorted_dict.keys())-1:
 | 
						|
                raise ValueError ("Can't convert text with current Dict.")
 | 
						|
    return decoded
 | 
						|
 | 
						|
 |