#! /usr/bin/env -S gawk -f BEGIN { # tab separator FS=" " if (ARGC < 2) { printf("Need a file to work on.\n"); printf("File needs to use tabs, not spaces (use unexpand -a)\n") printf("and lines ending with lf, not cr or crlf (use sed -i)\n") printf("** watch out for single spaces that should be tabs **\n") exit } }; function isunknown() { # a label followed by a number sb a DB # a # in col 2 sb a DB # with TEA, it will always be 1 byte per line # (we've already done the "," to ":" label fix) if (($1 =="") || (match($1, /:$/)) || (match($2, /[0-9A-F][0-9A-F]H$/))) { if (match($2, /[A-F][0-9A-F]H$/)) $2="0" $2; print $1 "\tDB\t" $2 "\t" $3 } else # when everything else fails, just print it print $1"\t"$2"\t"$3 } function isone() { # output lines with 1 byte opcodes print $1"\t"$2"\t"$3 } function istwo() { # 2nd part of instruction is on 2nd line # comments on both lines will get combined S1=$1; S2=$2; S3=$3; getline; # fix hex numbers that start with a letter if (match($2, /[A-F][0-9A-F]H$/)) $2="0" $2; S2=S2"\t"$2; sub("/", "; ", $3); print S1 "\t" S2 "\t" S3 if ($3 != "" ) print "\t\t\t\t" $3 } function isthree() { # parts 2 & 3 are on next two lines # comments on 1st two lines will get combined # (numeric might have extra "H", "B", "O") # JMP LABEL # CALL 1234H # LXI H, LABEL S1=$1; S2=$2; S3=$3; # watch out for the "LXI" cases if (match(S2, "LXISP")) S2=substr($2,1,3)"\tSP, "; else if (match(S2, "LXIB")) S2=substr($2,1,3)"\tB, "; else if (match(S2, "LXID")) S2=substr($2,1,3)"\tD, "; else if (match(S2, "LXIH")) S2=substr($2,1,3)"\tH, "; else S2=S2 "\t" # get the next two lines & fix them up getline; S4=$2; SC=$3 getline; S5=$2; if ( S5 != "0" ) { # must be two numbers, strip the trailing letter from the last one # hex, octal, binary... if (match(S5, /H$/) || (match(S5, /O$/)) || (match(S5, /B$/))) S5=substr(S5, 1, length(S5)-1); # check if 1st number needs a zero in front if (match(S5, /^[A-F]/)) S5="0" S5; } else S5 = "" sub("/", "; ", $3); sub("/", "; ", SC); # put numbers in correct order print S1 "\t" S2 S5 S4 "\t" S3 if (SC != "") print "\t\t\t\t" SC } function isreg1() { # 1 register: CMP B L=length($2); S1=$1 # take care of the PSW case if (match($2, "PSW")) S2=substr($2,1,L-3)"\t"substr($2,L-2,3) else S2=substr($2,1,L-1)"\t"substr($2,L,1) S3=$3 print S1"\t"S2"\t"S3 } function isreg2() { # 2 register: MOV A, B L=length($2); S1=$1; S2=substr($2,1,L-2)"\t"substr($2,L-1,1)", "substr($2,L,1) S3=$3; print S1"\t"S2"\t"S3 } function isregi() { # register immediate: MVI A, LABEL # comments on both will combine L=length($2); S1=$1; S2=substr($2,1,L-1)"\t"substr($2,L,1) S3=$3; getline; # fix hex numbers that start with a letter if (match($2, /[A-F][0-9A-F]H$/)) $2="0" $2; S2=S2", "$2 sub("/", "; ", $3); print S1"\t"S2"\t"S3 if ($3 != "") print "\t\t\t\t" $3 } function ispseudo() { # fix ORG first if (match($2, "*")) { # assume "*##H ##H" format, but *LABEL should still work sub("*", "", $2) sub("H ", "", $2) # fix hex numbers that start with a letter if (match($2, /[A-F][0-9A-F][0-9A-F][0-9A-F]H$/)) $2="0" $2; print $1 "\tORG\t" $2$3 } else if (match($2, "DW") || match($2, "DB")) { # fix for DB and DW, all in $2 # (space delimited, either 3 or 4 fields) # also need to fix hex numbers here # and figure out when DS should replace DBs # # $2: DB LABEL VALUE # $3: /COMMENT -> # LABEL: \t EQU \t VALUE \t ; COMMENT N=split($2, A, " ") if (match(A[3], /^[A-F][0-9A-F]H/)) A[3] = "0" A[3] if (N==3) print A[2] "\tEQU\t" A[3] "\t" $3 else if (N==4) { sub("H", "", A[3]) print A[2] "\tEQU\t" A[3] A[4] "\t" $3 } } } function trypseudo() { if (match($2, "*")) ispseudo() else { switch (substr($2, 1, 2)) { case "DB": ispseudo(); break case "DW": ispseudo(); break default: isunknown(); break } } } function tryreg() { # test for register instructions switch (substr($2, 1, 3)) { case "ADC": isreg1(); break case "ADD": isreg1(); break case "ANA": isreg1(); break case "CMP": isreg1(); break case "DAD": isreg1(); break case "DCR": isreg1(); break case "DCX": isreg1(); break case "INR": isreg1(); break case "INX": isreg1(); break case "LDA": isreg1(); break case "LXI": isthree(); break case "MOV": isreg2(); break case "MVI": isregi(); break case "ORA": isreg1(); break case "POP": isreg1(); break case "PUS": isreg1(); break case "SBB": isreg1(); break case "SUB": isreg1(); break case "STA": isreg1(); break case "XRA": isreg1(); break default: trypseudo(); break } } function trythree() { # test for 3 byte instructions switch ($2) { case "CALL": isthree(); break case "CC": isthree(); break case "CNC": isthree(); break case "CNZ": isthree(); break case "CM": isthree(); break case "CP": isthree(); break case "CPE": isthree(); break case "CPO": isthree(); break case "CZ": isthree(); break case "JC": isthree(); break case "JMP": isthree(); break case "JNC": isthree(); break case "JNZ": isthree(); break case "JM": isthree(); break case "JP": isthree(); break case "JPE": isthree(); break case "JPO": isthree(); break case "JZ": isthree(); break case "LDA": isthree(); break case "LHLD": isthree(); break case "STA": isthree(); break case "SHLD": isthree(); break default: tryreg(); break } } function trytwo() { # test for 2 byte instructions switch ($2) { case "ACI": istwo(); break case "ADI": istwo(); break case "ANI": istwo(); break case "CPI": istwo(); break case "IN": istwo(); break case "ORI": istwo(); break case "OUT": istwo(); break case "SBI": istwo(); break case "SUI": istwo(); break case "XRI": istwo(); break default: trythree(); break } } { # fix comments if (match($0, "/")) sub("/", "; ", $0); # fix labels if (substr($1, length($1), 1) == ",") { sub(",", ":", $1); #print "|"$1"|"$2"|" } # just output line with no opcodes if ( NF < 2 ) print $0 else { # test for 1 byte instructions switch ($2) { case "CMA": isone(); break case "CMC": isone(); break case "DAA": isone(); break case "DI": isone(); break case "EI": isone(); break case "HLT": isone(); break case "NOP": isone(); break case "PCHL": isone(); break case "RAL": isone(); break case "RAR": isone(); break case "RC": isone(); break case "RET": isone(); break case "RIM": isone(); break case "RNC": isone(); break case "RNZ": isone(); break case "RLC": isone(); break case "RM": isone(); break case "RP": isone(); break case "RPE": isone(); break case "RPO": isone(); break case "RRC": isone(); break case "RZ": isone(); break case "SIM": isone(); break case "SPHL": isone(); break case "STC": isone(); break case "XCHG": isone(); break case "XTHL": isone(); break default: trytwo(); break } } } END { # tea treats "end" as a lable, so print "\tEND" }