title lzcomp - file compressor using limpel-ziv algorithm ;Tom Pfau ;Digital Equipment Corporation ;Parsippany, NJ ;Constants clear equ 256 ;Clear code eof equ 257 ;End of file marker first_free equ 258 ;First free code maxmax equ 4096 ;Max code + 1 include macros.mlb ;Hash table entry hash_rec struc first dw ? ; First entry with this value next dw ? ; Next entry along chain char db ? ; Suffix char hash_rec ends ;Declare Segments code segment byte public 'code' code ends stack segment word stack 'data' dw 128 dup (?) stack ends data segment word public 'data' data ends memory segment para public 'memory' hash label hash_rec memory ends ;Start writing code code segment assume cs:code,ds:data,es:data,ss:stack start proc far mov bx,seg hash ;End of program mov ax,ds ;Start of program sub bx,ax ;Program size inc bx ;Make sure setmem bx ;Set our size mov bx,data ;Set up data segment addressability mov es,bx mov ds,bx print input_prompt ;Get input file name input input_file print crlf print output_prompt ;And output input output_file print crlf mov al,input_file+1 ;Terminate file names with nulls xor ah,ah mov si,ax mov input_file+2[si],0 mov al,output_file+1 mov si,ax mov output_file+2[si],0 hopen input_file+2,0 mov input_handle,ax hcreat output_file+2,0 mov output_handle,ax call compress ;Compress file hclose input_handle ;Close in and out hclose output_handle exit ;Done start endp data segment input_prompt db 'Input file: $' output_prompt db 'Output file: $' input_file db 80,0,80 dup (?) output_file db 80,0,80 dup (?) crlf db 13,10,'$' input_handle dw ? output_handle dw ? data ends compress proc near malloc 1280 ;Allocate space for hash table mov hash_seg,ax ;Save segment address l1: call init_table ;Initialize the table and some vars mov ax,clear ;Write a clear code call write_code call read_char ;Read first char l4: xor ah,ah ;Turn char into code l4a: mov prefix_code,ax ;Set prefix code call read_char ;Read next char jc l17 ;Carry means eof mov k,al ;Save char in k mov bx,prefix_code ;Get prefix code call lookup_code ;See if this pair in table jnc l4a ;nc means yes, new code in ax call add_code ;Add pair to table push bx ;Save new code mov ax,prefix_code ;Write old prefix code call write_code pop bx mov al,k ;Get last char cmp bx,max_code ;Exceed code size? jl l4 ;less means no cmp nbits,12 ;Currently less than 12 bits? jl l14 ;yes mov ax,clear ;Write a clear code call write_code call init_table ;Reinit table mov al,k ;get last char jmp l4 ;Start over l14: inc nbits ;Increase number of bits shl max_code,1 ;Double max code size jmp l4 ;Get next char l17: mov ax,prefix_code ;Write last code call write_code mov ax,eof ;Write eof code call write_code mov ax,bit_offset ;Make sure buffer is flushed to file cmp ax,0 je l18 mov cx,8 ;convert bits to bytes xor dx,dx div cx or dx,dx ;If extra bits, make sure they get je l17a ;written inc ax l17a: call flush l18: ret compress endp data segment hash_seg dw ? prefix_code dw ? free_code dw ? max_code dw ? nbits dw ? k db ? data ends init_table proc near mov nbits,9 ;Set code size to 9 mov max_code,512 ;Set max code to 512 push es ;Save seg reg mov es,hash_seg ;Address hash table mov ax,-1 ;Unused flag mov cx,640 ;Clear first 256 entries mov di,offset hash ;Point to first entry rep stosw ;Clear it out pop es ;Restore seg reg mov free_code,first_free ;Set next code to use ret ;done init_table endp write_code proc near push ax ;Save code mov ax,bit_offset ;Get bit offset mov cx,nbits ;Adjust bit offset by code size add bit_offset,cx mov cx,8 ;Convert bit offset to byte offset xor dx,dx div cx cmp ax,1020 ;Approaching end of buffer? jl wc1 ;less means no call flush ;Write the buffer push dx ;dx contains offset within byte add dx,nbits ;adjust by code size mov bit_offset,dx ;new bit offset pop dx ;restore dx add ax,offset output_data ;Point to last byte mov si,ax ;put in si mov al,byte ptr [si] ;move byte to first position mov output_data,al xor ax,ax ;Byte offset of zero wc1: add ax,offset output_data ;Point into buffer mov di,ax ;Destination pop ax ;Restore code mov cx,dx ;offset within byte xor dx,dx ;dx will catch bits rotated out jcxz wc3 ;If offset in byte is zero, skip shift wc2: shl ax,1 ;Rotate code rcl dx,1 loop wc2 or al,byte ptr [di] ;Grab bits currently in buffer wc3: stosw ;Save data mov al,dl ;Grab extra bits stosb ;and save ret write_code endp data segment bit_offset dw ? output_data db 1024 dup (?) data ends flush proc near push ax ;Save all registers push bx ;AX contains number of bytes to write push cx push dx hwrite output_handle,output_data,ax ;write output file pop dx pop cx pop bx pop ax ret flush endp read_char proc near mov di,input_offset ;Anything left in buffer? cmp di,input_size jl rd1 ;less means yes hread input_handle,input_data,1024 ;Read next chunk of input cmp ax,0 ;Anything left? je rd2 ;equal means no, finished mov input_size,ax ;Save bytes read mov input_offset,0 ;Point to beginning of buffer mov di,0 rd1: lea si,input_data[di] ;Point at character lodsb ;Read it in inc input_offset ;Adjust pointer clc ;Success ret rd2: stc ;Nothing left ret read_char endp data segment input_data db 1024 dup (?) input_offset dw 0 input_size dw 0 data ends lookup_code proc near push ds ;Save seg reg mov ds,hash_seg ;point to hash table call index ;convert code to address mov di,0 ;flag cmp [si].first,-1 ;Has this code been used? je gc4 ;equal means no inc di ;set flag mov bx,[si].first ;Get first entry gc2: call index ;convert code to address cmp [si].char,al ;is char the same? jne gc3 ;ne means no clc ;success mov ax,bx ;put found code in ax pop ds ;restore seg reg ret ;done gc3: cmp [si].next,-1 ;More left with this prefix? je gc4 ;equal means no mov bx,[si].next ;get next code jmp gc2 ;try again gc4: stc ;not found pop ds ;restore seg reg ret ;done lookup_code endp index proc near mov si,bx ;si = bx * 5 (5 byte hash entries) shl si,1 ;si = bx * 2 * 2 + bx shl si,1 add si,bx ret index endp add_code proc near mov bx,free_code ;Get code to use push ds ;point to hash table mov ds,hash_seg cmp di,0 ;First use of this prefix? je ac1 ;equal means yes mov [si].next,bx ;point last use to new entry jmp short ac2 ac1: mov [si].first,bx ;Point first use to new entry ac2: cmp bx,maxmax ;Have we reached code limit? je ac3 ;equal means yes, just return call index ;get address of new entry mov [si].first,-1 ;initialize pointers mov [si].next,-1 mov [si].char,al ;save suffix char inc es:free_code ;adjust next code ac3: pop ds ;restore seg reg ret add_code endp code ends end start