function [compressed_data, Kraft_vector, encodedlength4] = huffman_compress( raw_data ) % [compressed_data, Kraft_vector] = huffman_compress( raw_data ) % compresses a list of non-negative integers (raw data) % into a uint8 list of compressed data. % Usage: % plaintext = double(imread('cameraman.tif')); % plaintext = [5 5 5 5 4; 4 0 0 1 5]; % shape = size(plaintext); % [compressed_data, bitlengths] = huffman_compress(plaintext(:)); % % to recover, only need shape, compressed_data, bitlengths. % recovered_data = huffman_uncompress(compressed_data, bitlengths, prod(shape)); % recovered_matrix = reshape(recovered_data, shape); % if( isequal( plaintext, recovered_matrix ) ), % disp('it worked') % else, % error(' I messed up somewhere ... ') % end; % % The "bitlengths" array could be compressed further -- % -- see huff03.m from % http://www.ux.his.no/~karlsk/ % . % The form % [compressed_data, Kraft_vector, encodedlength] = huffman_compress( raw_data ) % also returns the length of the compressed data, in bits, where % length(compressed_data) == ceil(encodedlength4/8). % % See also IMWRITE_COMPRESSED, HUFFMAN_UNCOMPRESS. % Change log: % 1999-06-26:DAV: David Cary started. % huff03.m in huffman.zip % at % http://www.ux.his.no/~karlsk/proj98/huffman.zip % mirrored % ftp://ftp.mathworks.com/pub/contrib/v5/misc/huffman/ % http://www.mathworks.com/ftp/miscv5.shtml % written by % Karl Skretting % Hogskolen in Stavanger (Stavanger University), Norway % karl.skretting@tn.his.no Homepage: http://www.ux.his.no/~karlsk/ % does exactly the same thing as % huffman_compress.m % Like all Huffman compressors, % huff03.m will get the same compressed data length % as huffman_compress, % but huff03.m packs the Kraft_vector % much more tighly than huffman_compress.m does, % and pre-pends it to the compressed data. % (With David Cary's test data, % packing that Kraft_vector down to zero bits % would only improve my compression by 0.031 bpp, % which is relatively insignificant). % On the other hand, huffman_compress.m is much faster % (a factor of 10 in my tests). % Force raw_data into a 1D single list raw_data = raw_data(:); if( min(raw_data) < 0 ) error('Sorry, support for negative numbers not yet implemented.') end; histogram = histo(raw_data); % ASSERT( isequal( length(histogram), 1+max(raw_data) ); disp('getting Kraft vector') drawnow Kraft_vector = unsorted_kraft( histogram ); disp('generating code book') drawnow % ASSERT( isequal( encodedlength1, huffmanlength(plaintext) ) encodedlength1 = Kraft_vector' * histogram; encodedlength2 = huffmanlength(raw_data); if( isequal( encodedlength1, encodedlength2 ) ), else, encodedlength1 encodedlength2 Kraft_vector error('Sorry, internal error') end; max_length = max(Kraft_vector); % If we could guarantee that code lengths % would never be more than 15, % then we could pack 2 values 0..15 % into each byte (0 indicates "never happens"). if( max_length < 15 ) else, disp('maximum code length:') max_length end; % ASSERT( max_length < 53 ) if( max_length < 32 ) else, max_length disp(' ... it *might* work up to') BITMAX disp('bits.') error('Sorry, code lengths greater than 31 bits not yet implemented.') end; % Get the code words: % compactcodeP(Kraft_vector) gives the bogus code "-1" to items that never occur. codebook = compactcodeP(Kraft_vector); if(0) print_bitstrings( codebook ) end; disp('compressing data') drawnow % Finally, actually compress the data. compressed_bits = prefix_encode( codebook, raw_data ); disp('packing data') drawnow encodedlength4 = length(compressed_bits); % ASSERT( isequal( encodedlength3, encodedlength1 ) if( isequal( encodedlength1, encodedlength4 ) ), else, encodedlength1 encodedlength4 print_bitstrings( codebook ) print_without_spaces( compressed_bits ) error('Sorry, internal error') end; % Pack everything into uint8 vectors. % minor compression of code lengths Kraft_vector = uint8(Kraft_vector); % add '0' bits to round up to a multiple of 8 bytes = ceil(encodedlength4/8); stuff_bits = 8*bytes - encodedlength4; if(0 == stuff_bits) else, compressed_bits(8*bytes) = 0; end; bits = reshape(compressed_bits, 8, bytes); % pack first bit into least-significant-bit % of first byte (little-endian) compressed_data = uint8(... 1*bits(1,:) + ... 2*bits(2,:) + ... 4*bits(3,:) + ... 8*bits(4,:) + ... 16*bits(5,:) + ... 32*bits(6,:) + ... 64*bits(7,:) + ... 128*bits(8,:) ... ); % end huffman_compress.m