# MIT License # # Copyright (c) 2018 Jelle Hermsen # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # # +-------------------------------------+ # | _ _ | # | __ _ __| |_ _ _ __ ___(_)_ __ | # | / _` |/ _` | | | | '_ \|_ / | '_ \ | # || (_| | (_| | |_| | | | |/ /| | |_) || # | \__, |\__,_|\__,_|_| |_/___|_| .__/ | # | |___/ |_| | # | | # +-------------------------------------+ # | A small GDScript that allows you to | # | browse and uncompress zip files. | # +-------------------------------------+ # Example usage: # # - put gdunzip.gd somewhere in your Godot project # - instance this scrip by using: # var gdunzip = load('res://LOCATION_IN_LIBRARY/gdunzip.gd').new() # - load a zip file: # var loaded = gdunzip.load('res://PATH_TO_ZIP/test.zip') # - if loaded is true you can try to uncompress a file: # var uncompressed = gdunzip.uncompress('PATH_TO_FILE_IN_ZIP/test.txt') # - now you have got a PoolByteArray named "uncompressed" with the # uncompressed data for the given file # # You can iterate over the "files" variable from the gdunzip instance, to # see all the available files: # - for f in gdunzip.files: # print(f['file_name']) # The path of the currently loaded zip file var path # A PoolByteArray with the contents of the zip file var buffer # The size of the currently loaded buffer var buffer_size # A dictionary with the files in the currently loaded zip file. # It maps full file paths to dictionaries with meta file information: # # - compression_method: -1 if uncompressed or File.COMPRESSION_DEFLATE # - file_name: the full path of the compressed file inside the zip # - file_header_offset: the exact byte location of this file's compressed data # inside the zip file # - compressed_size # - uncompressed_size var files = {} # The current position we're at inside the buffer var pos = 0 # An instance of the inner Tinf class, which is a GDScript port of Jørgen # Ibsen's excellent "tiny inflate library" var tinf # Initialize the gdunzip class func _init(): self.tinf = Tinf.new() # Load a zip file with the given path. Returns a boolean denoting success. func load(path): if path == null: return false self.path = path self.pos = 0 var file = File.new() if !file.file_exists(path): return false file.open(path, File.READ) var file_length = file.get_len() if file.get_32() != 0x04034b50: return false file.seek(0) self.buffer = file.get_buffer(file_length) self.buffer_size = self.buffer.size() file.close() if self.buffer_size < 22: # Definitely not a valid zip file return false # Fill in self.files with all the file data return self._get_files() # Uncompress the given file. Returns false if uncompressing fails, or when the # file isn't available in the currently loaded zip file. func uncompress(file_name): if !(file_name in self.files): return false var f = self.files[file_name] self.pos = f['file_header_offset'] self._skip_file_header() var uncompressed = self._read(f['compressed_size']) if f['compression_method'] == -1: return uncompressed return tinf.tinf_uncompress(f['uncompressed_size'], uncompressed) # Returns a PoolByteArray with the compressed data for the given file. # Returns false if it can't be found. func get_compressed(file_name): if !(file_name in self.files): return false var f = self.files[file_name] self.pos = f['file_header_offset'] self._skip_file_header() return self._read(f['compressed_size']) # Parse the zip file's central directory and fill self.files with all the # file info func _get_files(): # Locate starting position of central directory var eocd_offset = buffer.size() - 22 while ( eocd_offset > 0 && buffer[eocd_offset+3] != 0x06 && buffer[eocd_offset+2] != 0x05 && buffer[eocd_offset+1] != 0x4b && buffer[eocd_offset] != 0x50 ): eocd_offset -= 1 # Set the central directory start offset self.pos = ( buffer[eocd_offset + 19] << 24 | buffer[eocd_offset + 18] << 16 | buffer[eocd_offset + 17] << 8 | buffer[eocd_offset + 16] ) # Get all central directory records, and fill self.files # with all the file information while ( buffer[pos + 3] == 0x02 && buffer[pos + 2] == 0x01 && buffer[pos + 1] == 0x4b && buffer[pos] == 0x50 ): var raw = _read(46) var header = { 'compression_method': '', 'file_name': '', 'compressed_size': 0, 'uncompressed_size': 0, 'file_header_offset': -1, } if raw[10] == 0 && raw[11] == 0: header['compression_method'] = -1 else: header['compression_method'] = File.COMPRESSION_DEFLATE header['compressed_size'] = ( raw[23] << 24 | raw[22] << 16 | raw[21] << 8 | raw[20] ) header['uncompressed_size'] = ( raw[27] << 24 | raw[26] << 16 | raw[25] << 8 | raw[24] ) header['file_header_offset'] = ( raw[45] << 24 | raw[44] << 16 | raw[43] << 8 | raw[42] ) var file_name_length = raw[29] << 8 | raw[28] var extra_field_length = raw[31] << 8 | raw[30] var comment_length = raw[33] << 8 | raw[32] var raw_end = _read(file_name_length + extra_field_length + comment_length) if !raw_end: return false header['file_name'] = ( raw_end.subarray(0, file_name_length - 1).get_string_from_utf8() ) self.files[header['file_name']] = header return true # Read a given number of bytes from the buffer, and return it as a # PoolByteArray func _read(length): var result = buffer.subarray(pos, pos + length - 1) if result.size() != length: return false pos = pos + length return result # Skip the given number of bytes in the buffer. Advancing "pos". func _skip(length): pos += length # This function skips the file header information, when "pos" points at a file # header inside the buffer. func _skip_file_header(): var raw = _read(30) if !raw: return false var file_name_length = raw[27] << 8 | raw[26] var extra_field_length = raw[29] << 8 | raw[28] var raw_end = _skip(file_name_length + extra_field_length) # The inner Tinf class is a pretty straight port from Jørgen Ibsen's excellent # "tiny inflate library". It's written in an imperative style and I have tried # to stay close to the original. I added two helper functions, and had to make # some minor additions to support "faux pointer arithmetic". # # I have created a TINF_TREE and TINF_DATA dictionary to serve as structs. I # use "duplicate" to make an instance from these structs. class Tinf: # ---------------------------------------- # -- GDscript specific helper functions -- # ---------------------------------------- func make_pool_int_array(size): var pool_int_array = PoolIntArray() pool_int_array.resize(size) return pool_int_array func make_pool_byte_array(size): var pool_byte_array = PoolByteArray() pool_byte_array.resize(size) return pool_byte_array # ------------------------------ # -- internal data structures -- # ------------------------------ var TINF_TREE = { 'table': make_pool_int_array(16), 'trans': make_pool_int_array(288), } var TINF_DATA = { 'source': PoolByteArray(), # sourcePtr is an "int" that's used to point at a location in "source". # I added this since we don't have pointer arithmetic in GDScript. 'sourcePtr': 0, 'tag': 0, 'bitcount': 0, 'dest': PoolByteArray(), 'destLen': 0, # "Faux pointer" to dest. 'destPtr': 0, 'ltree': TINF_TREE.duplicate(), 'dtree': TINF_TREE.duplicate() } const TINF_OK = 0 const TINF_DATA_ERROR = -3 # --------------------------------------------------- # -- uninitialized global data (static structures) -- # --------------------------------------------------- var sltree = TINF_TREE.duplicate() # fixed length/symbol tree var sdtree = TINF_TREE.duplicate() # fixed distance tree var base_tables = { # extra bits and base tables for length codes 'length_bits': make_pool_byte_array(30), 'length_base': make_pool_int_array(30), # extra bits and base tables for distance codes 'dist_bits': make_pool_byte_array(30), 'dist_base': make_pool_int_array(30) } var clcidx = PoolByteArray([ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15]) # ----------------------- # -- utility functions -- # ----------------------- # build extra bits and base tables # bits: PoolByteArray # base: PoolIntArray # delta: int # first: int func tinf_build_bits_base(target, delta, first): # var i = 0 var sum = first for i in range(0, delta): base_tables[target + '_bits'][i] = 0 for i in range(0, 30 - delta): base_tables[target + '_bits'][i + delta] = i / delta for i in range(0, 30): base_tables[target + '_base'][i] = sum sum += 1 << base_tables[target + '_bits'][i] # build the fixed huffman trees # lt: TINF_TREE # rt: TINF_TREE func tinf_build_fixed_trees(lt, dt): # var i = 0 for i in range(0, 7): lt['table'][i] = 0 lt['table'][7] = 24 lt['table'][8] = 152 lt['table'][9] = 112 for i in range(0, 24): lt['trans'][i] = 256 + i for i in range(0, 144): lt['trans'][24 + i] = i for i in range(0, 8): lt['trans'][24 + 144 + i] = 280 + i for i in range(0, 112): lt['trans'][24 + 144 + 8 + i] = 144 + i for i in range(0, 5): dt['table'][i] = 0 dt['table'][5] = 32 for i in range(0, 32): dt['trans'][i] = i # given an array of code lengths, build a tree # t: TINF_TREE # lengths: PoolByteArray # num: int func tinf_build_tree(t, lengths, num): var offs = make_pool_int_array(16) # var i = 0 var sum = 0 # clear code length count table for i in range(0,16): t['table'][i] = 0 # scan symbol lengths, and sum code length counts for i in range(0, num): t['table'][lengths[i]] += 1 t['table'][0] = 0 for i in range(0,16): offs[i] = sum sum += t['table'][i] for i in range(0, num): if lengths[i]: t['trans'][offs[lengths[i]]] = i offs[lengths[i]] += 1 # ---------------------- # -- decode functions -- # ---------------------- # get one bit from source stream # d: TINF_DATA # returns: int func tinf_getbit(d): var bit = 0 d['bitcount'] -= 1 if !(d['bitcount'] + 1) : d['tag'] = d['source'][d['sourcePtr']] d['sourcePtr'] += 1 d['bitcount'] = 7 bit = d['tag'] & 0x01 d['tag'] >>= 1 return bit # read a num bit value from a stream and add base # d: TINF_DATA # num: int # base: int func tinf_read_bits(d, num, base): var val = 0 if num: var limit = 1 << num var mask = 1 while mask < limit: if tinf_getbit(d): val += mask mask *= 2 return val + base # given a data stream and a tree, decode a symbol # d: TINF_DATA # t: TINF_TREE func tinf_decode_symbol(d, t): var sum = 0 var cur = 0 var length = 0 while true: cur = 2 * cur + tinf_getbit(d) length += 1 sum += t['table'][length] cur -= t['table'][length] if cur < 0: break return t['trans'][sum + cur] # given a data stream, decode dynamic trees from it # d: TINF_DATA # lt: TINF_TREE # dt: TINF_TREE func tinf_decode_trees(d, lt, dt): var code_tree = TINF_TREE.duplicate() var lengths = make_pool_byte_array(288 + 32) var hlit = 0 var hdist = 0 var hclen = 0 # var i = 0 var num = 0 var length = 0 # get 5 bits HLIT (257-286) hlit = tinf_read_bits(d, 5, 257) # get 5 bits HDIST (1-32) hdist = tinf_read_bits(d, 5, 1) # get 4 bits HCLEN (4-19) hclen = tinf_read_bits(d, 4, 4) for i in range(0, 19): lengths[i] = 0 for i in range(0, hclen): var clen = tinf_read_bits(d, 3, 0) lengths[clcidx[i]] = clen # build code length tree tinf_build_tree(code_tree, lengths, 19) while num < hlit + hdist: var sym = tinf_decode_symbol(d, code_tree) match sym: 16: var prev = lengths[num - 1] length = tinf_read_bits(d, 2, 3) while length != 0: lengths[num] = prev num += 1 length -= 1 17: length = tinf_read_bits(d, 3, 3) while length != 0: lengths[num] = 0 num += 1 length -= 1 18: length = tinf_read_bits(d, 7, 11) while length != 0: lengths[num] = 0 num += 1 length -= 1 _: lengths[num] = sym num += 1 # build dynamic trees tinf_build_tree(lt, lengths, hlit) tinf_build_tree(dt, lengths.subarray(hlit, lengths.size() - 1), hdist) # ----------------------------- # -- block inflate functions -- # ----------------------------- # given a stream and two trees, inflate a block of data # d: TINF_DATA # lt: TINF_TREE # dt: TINF_TREE func tinf_inflate_block_data(d, lt, dt): var start = d['destPtr'] while true: var sym = tinf_decode_symbol(d, lt) if sym == 256: d['destLen'] += d['destPtr'] - start return TINF_OK if sym < 256: d['dest'][d['destPtr']] = sym d['destPtr'] += 1 else: var length = 0 var dist = 0 var offs = 0 # var i = 0 var ptr = d['destPtr'] sym -= 257 length = tinf_read_bits(d, base_tables['length_bits'][sym], base_tables['length_base'][sym]) dist = tinf_decode_symbol(d, dt) # possibly get more bits from distance code offs = tinf_read_bits(d, base_tables['dist_bits'][dist], base_tables['dist_base'][dist]) for i in range(0, length): d['dest'][ptr + i] = d['dest'][ptr + (i - offs)] d['destPtr'] += length # inflate an uncompressed block of data */ # d: TINF_DATA func tinf_inflate_uncompressed_block(d): var length = 0 var invlength = 0 var i = 0 # get length length = d['source'][d['sourcePtr'] + 1] length = 256 * length + d['source'][d['sourcePtr']] # get one's complement of length invlength = d['source'][d['sourcePtr'] + 3] invlength = 256 * invlength + d['source'][d['sourcePtr'] + 2] if length != ~invlength & 0x0000ffff: return TINF_DATA_ERROR d['sourcePtr'] += 4 i = length while i: d['dest'][d['destPtr']] = d['source'][d['sourcePtr']] d['destPtr'] += 1 d['sourcePtr'] += 1 i -= 1 d['bitcount'] = 0 d['destLen'] += length return TINF_OK # inflate a block of data compressed with fixed huffman trees # d: TINF_DATA # returns: int func tinf_inflate_fixed_block(d): # decode block using fixed trees return tinf_inflate_block_data(d, sltree, sdtree) # inflate a block of data compressed with dynamic huffman trees # d: TINF_DATA # returns: int func tinf_inflate_dynamic_block(d): # decode trees from stream tinf_decode_trees(d, d['ltree'], d['dtree']) # decode block using decoded trees return tinf_inflate_block_data(d, d['ltree'], d['dtree']) # ---------------------- # -- public functions -- # ---------------------- func _init(): # build fixed huffman trees tinf_build_fixed_trees(sltree, sdtree) # build extra bits and base tables tinf_build_bits_base('length', 4, 3) tinf_build_bits_base('dist', 2, 1) # fix a special case base_tables['length_bits'][28] = 0 base_tables['length_base'][28] = 258 # inflate stream from source to dest func tinf_uncompress(destLen, source): var d = TINF_DATA.duplicate() var dest = make_pool_byte_array(destLen) var sourceSize = source.size() d['source'] = source d['dest'] = dest destLen = 0 while true: var btype = 0 var res = 0 # Skip final block flag tinf_getbit(d) # read block type (2 bits) btype = tinf_read_bits(d, 2, 0) match btype: 0: # decompress uncompressed block res = tinf_inflate_uncompressed_block(d) 1: # decompress block with fixed huffman trees res = tinf_inflate_fixed_block(d) 2: # decompress block with dynamic huffman trees res = tinf_inflate_dynamic_block(d) _: return false if res != TINF_OK: return false # When we have consumed the entire source, we're done if d['sourcePtr'] >= sourceSize: break return d['dest']