#! /usr/bin/python # asm1802.py # Assembler for CDP1802 processor # D. Hunter 9/7/14 # version 1.5 01/11/15 DH handle symbols to I/O instructions # version 1.6 08/12/17 DH fix mif file generation bug # version 1.7 08/20/17 DH If END directive missing, flag error and abort VERSION = 1.7 # uses the standard mnemonics for the CDP1802 as defined by # RCA MPM-201A User Manual for the CDP1802 COSMAC Microprocessor # in addition, the standard call and return is provided through mnemonics: 'CALL' and 'EXIT' # note the CDP1802 is a 'big-endian' processor, so the high byte is at the lower address import sys, string, getopt from output import genBinFile, genHexFile, genVerilogFile, genAlteraFile PAD_VALUE = 16 # size of the first field in the list file # command line # asm1802 -f -s -o # output formats: 'bin' - default # 'hex' - Intel hex # 'verilog' - Verilog memh format # 'altera' - Altera MIF format # assembler directives and modifiers: # .. comment # ORG origin # EQU or = set symbol value # END end of assembly # * or $ current address pointer # ,A store 16 bit value (MSB first) # ,# store 8 bit value # ,T'' store string text (ASCII values) # A.0() low byte of address # A.1() high byte of address # # hex identifier # note: having labels end with a colon is optional since the assembler # breaks up lines based on whitespace # assembler directives directives = { 'ORG' :'ORIGIN', 'END' :'FINISH' } # each instruction mnemonic contains a tuple (opcode, type) # the opcode is the hex value of the instruction # the type indicates the type of opcode # types: # ctl 1 byte instruction # io 1 byte instruction with parameter # reg 1 byte instruction with register parameter # imm 2 byte instruction with immediate value following opcode # br 2 byte instruction with branch address low byte following opcode # lbr 3 byte instruction with full branch address following opcode # note, alternate forms of instructions are included based on the # RCA MPM-201A User Manual for the CDP1802 COSMAC Microprocessor # instruction types TYPE_CTL = 0 TYPE_IO = 1 TYPE_REG = 2 TYPE_IMM = 3 TYPE_BR = 4 TYPE_LBR = 5 TYPE_CALL = 6 # opcode dictionary ops = { 'IDL' :(0x00,TYPE_CTL), 'LDN' :(0x00,TYPE_REG), 'INC' :(0x10,TYPE_REG), 'DEC' :(0x20,TYPE_REG), 'BR' :(0x30,TYPE_BR), 'BQ' :(0x31,TYPE_BR), 'BZ' :(0x32,TYPE_BR), 'BDF' :(0x33,TYPE_BR), 'BPZ' :(0x33,TYPE_BR), 'BGE' :(0x33,TYPE_BR), 'B1' :(0x34,TYPE_BR), 'B2' :(0x35,TYPE_BR), 'B3' :(0x36,TYPE_BR), 'B4' :(0x37,TYPE_BR), 'NBR' :(0x38,TYPE_BR), 'SKP' :(0x38,TYPE_CTL), 'BNQ' :(0x39,TYPE_BR), 'BNZ' :(0x3A,TYPE_BR), 'BNF' :(0x3B,TYPE_BR), 'BM' :(0x3B,TYPE_BR), 'BL' :(0x3B,TYPE_BR), 'BN1' :(0x3C,TYPE_BR), 'BN2' :(0x3D,TYPE_BR), 'BN3' :(0x3E,TYPE_BR), 'BN4' :(0x3F,TYPE_BR), 'LDA' :(0x40,TYPE_REG), 'STR' :(0x50,TYPE_REG), 'IRX' :(0x60,TYPE_CTL), 'OUT' :(0x60,TYPE_IO), 'OUT1':(0x61,TYPE_CTL), 'OUT2':(0x62,TYPE_CTL), 'OUT3':(0x63,TYPE_CTL), 'OUT4':(0x64,TYPE_CTL), 'OUT5':(0x65,TYPE_CTL), 'OUT6':(0x66,TYPE_CTL), 'OUT7':(0x67,TYPE_CTL), 'INP' :(0x68,TYPE_IO), 'INP1':(0x69,TYPE_CTL), 'INP2':(0x6A,TYPE_CTL), 'INP3':(0x6B,TYPE_CTL), 'INP4':(0x6C,TYPE_CTL), 'INP5':(0x6D,TYPE_CTL), 'INP6':(0x6E,TYPE_CTL), 'INP7':(0x6F,TYPE_CTL), 'RET' :(0x70,TYPE_CTL), 'DIS' :(0x71,TYPE_CTL), 'LDXA':(0x72,TYPE_CTL), 'STXD':(0x73,TYPE_CTL), 'ADC' :(0x74,TYPE_CTL), 'SDB' :(0x75,TYPE_CTL), 'SHRC':(0x76,TYPE_CTL), 'RSHR':(0x76,TYPE_CTL), 'SMB' :(0x77,TYPE_CTL), 'SAV' :(0x78,TYPE_CTL), 'MARK':(0x79,TYPE_CTL), 'REQ' :(0x7A,TYPE_CTL), 'SEQ' :(0x7B,TYPE_CTL), 'ADDI':(0x7C,TYPE_IMM), 'SDBI':(0x7D,TYPE_IMM), 'SHLC':(0x7E,TYPE_CTL), 'RSHL':(0x7E,TYPE_CTL), 'SMBI':(0x7F,TYPE_IMM), 'GLO' :(0x80,TYPE_REG), 'GHI' :(0x90,TYPE_REG), 'PLO' :(0xA0,TYPE_REG), 'PHI' :(0xB0,TYPE_REG), 'LBR' :(0xC0,TYPE_LBR), 'LBQ' :(0xC1,TYPE_LBR), 'LBZ' :(0xC2,TYPE_LBR), 'LBDF':(0xC3,TYPE_LBR), 'NOP' :(0xC4,TYPE_CTL), 'LSNQ':(0xC5,TYPE_CTL), 'LSNZ':(0xC6,TYPE_CTL), 'LSNF':(0xC7,TYPE_CTL), 'LSKP':(0xC8,TYPE_CTL), 'NLBR':(0xC8,TYPE_LBR), 'LBNQ':(0xC9,TYPE_LBR), 'LBNZ':(0xCA,TYPE_LBR), 'LBNF':(0xCB,TYPE_LBR), 'LSIE':(0xCC,TYPE_CTL), 'LSQ' :(0xCD,TYPE_CTL), 'LSZ' :(0xCE,TYPE_CTL), 'LSDF':(0xCF,TYPE_CTL), 'SEP' :(0xD0,TYPE_REG), 'SEX' :(0xE0,TYPE_REG), 'LDX' :(0xF0,TYPE_CTL), 'OR' :(0xF1,TYPE_CTL), 'AND' :(0xF2,TYPE_CTL), 'XOR' :(0xF3,TYPE_CTL), 'ADD' :(0xF4,TYPE_CTL), 'SD' :(0xF5,TYPE_CTL), 'SHR' :(0xF6,TYPE_CTL), 'SM' :(0xF7,TYPE_CTL), 'LDI' :(0xF8,TYPE_IMM), 'ORI' :(0xF9,TYPE_IMM), 'ANI' :(0xFA,TYPE_IMM), 'XRI' :(0xFB,TYPE_IMM), 'ADI' :(0xFC,TYPE_IMM), 'SDI' :(0xFD,TYPE_IMM), 'SHL' :(0xFE,TYPE_CTL), 'SMI' :(0xFF,TYPE_IMM), 'CALL':(0xD4,TYPE_LBR), # standard call 'EXIT':(0xD5,TYPE_CTL) # standard return } # names of registers register_names = { 'R0': 0x00, 'R1': 0x01, 'R2': 0x02, 'R3': 0x03, 'R4': 0x04, 'R5': 0x05, 'R6': 0x06, 'R7': 0x07, 'R8': 0x08, 'R9': 0x09, 'RA': 0x0A, 'RB': 0x0B, 'RC': 0x0C, 'RD': 0x0D, 'RE': 0x0E, 'RF': 0x0F } # I/O port names port_names = { '1': 0x01, '2': 0x02, '3': 0x03, '4': 0x04, '5': 0x05, '6': 0x06, '7': 0x07 } # global variables symtab = {} # symbol table (label, address) #------------------------------------------------------------------------------------------------------- # string parsing routines # convert a set of fields to a string ignoring comments def fieldsToString(flds): fldStr = '' for f in flds: if f[0:2] == '..': break # quit at first comment fldStr = fldStr + f return fldStr # parse a value (decimal or hexadecimal) # return integer or -1 if invalid def parseValue(valStr): valStr = valStr.upper() if valStr in symtab.keys(): # symbol? value = symtab[valStr] elif valStr[0] == '#': # hex value ? try: value = int(valStr[1:],base=16) except: value = -1 else: # decimal value try: value = int(valStr) except: value = -1 return value # parse a byte value # return integer representing the byte value or a -1 if parsing fails # possible inputs: # # A.0() ( low byte of address ) # A.1() ( high byte of address ) # 'c ( ASCII character ) # #dd ( hex value ) # dd ( decimal value ) def parseByteValue(byteStr): if byteStr[0] == 'A': # address directive? if byteStr[1] == '.' and byteStr[2] == '0' and byteStr[3] == '(': valstr = parseString(byteStr[3:],')') # get symbol name valstr = valstr.upper() if valstr in symtab.keys(): value = symtab[valstr] % 256 # get low byte else: value = -1 elif byteStr[1] == '.' and byteStr[2] == '1' and byteStr[3] == '(': valstr = parseString(byteStr[3:],')') # get symbol name valstr = valstr.upper() if valstr in symtab.keys(): value = symtab[valstr] >> 8 # get high byte else: value = -1 else: value = -1 elif byteStr[0] == '\'': # ASCII value? if byteStr[1] == '\\': # escaped character? if byteStr[2] == '\\': # \\? e.g slant value = ord('\\') else: try: char = byteStr[2].upper() # convert to upper case value = ord(char) - ord('@') # get control character value except: value = -1 else: value = ord(byteStr[1]) else: value = parseValue(byteStr) if value > 255: # check if parsed into a byte value value = -1 # otherwise return an error return value # parse an address value # return an integer representing the address or -1 if parsing fails # possible inputs: # # 1: op = addrStr[1] # get the operator (+ or -) value = parseValue(addrStr[2:]) # parse the value if value != -1: if (op == '+'): addr = addr + value elif (op == '-'): addr = addr - value else: addr = -1 else: addr = -1 elif string.find(addrStr,'+') != -1: # symbol with positive offset? flds = string.split(addrStr,'+') # separate symbol and value addr = parseValue(flds[0]) value = parseValue(flds[1]) addr = addr + value elif string.find(addrStr,'-') != -1: # symbol with negative offset? flds = string.split(addrStr,'-') # separate symbol and value addr = parseValue(flds[0]) value = parseValue(flds[1]) addr = addr - value else: addr = parseValue(addrStr) return(addr) # parse an inline data value, return a string of bytes def parseDataValue(valStr): retval = '' if valStr[0] == 'A': if valStr[1] == '(': # address reference value = parseString(valStr[1:],')') # get symbol name value = value.upper() if value in symtab.keys(): d = symtab[value] retval = retval + chr(d >> 8) # add upper byte retval = retval + chr(d & 0x00FF) # add lower byte else: retval = retval + '\x00\x00' # put in nulls for forward reference elif valStr[1] == '.' : # byte reference if valStr[2] == '0' and valStr[3] == '(': # low byte of symbol value = parseString(valStr[3:],')') # get symbol name value = value.upper() if value in symtab.keys(): d = symtab[value] retval = retval + chr(d & 0x00FF) else: retval = retval + '\x00' # put in null for forward reference elif valStr[2] == '1' and valStr[3] == '(': # high byte of symbol value = parseString(valStr[3:],')') # get symbol name value = value.upper() if value in symtab.keys(): d = symtab[value] retval = retval + chr(d >> 8) else: retval = retval + '\x00' # put in null for forward reference elif valStr[0] == 'T' and valStr[1] == '\'': # string value retval = parseString(valStr[1:]) else: values = string.split(valStr) d = parseValue(values[0]) # only use 1st field if (d > 255): retval = retval + chr(d >> 8) # add upper byte retval = retval + chr(d & 0x00FF) # add lower byte else: retval = retval + chr(d & 0x00FF) # add one byte return retval # parse a string delimited by a single character allowing for an escape char # all characters after the trailing delimiter are ignored def parseString(valStr,delimiter='\''): retStr = '' searchStr = valStr[1:] # drop leading delimiter ignore = False for c in searchStr: if (c == '\\') and not(ignore): # escape char ignore = True # ignore the next character elif (c == delimiter) and not(ignore): # end of string and not escaped break else: ignore = False retStr = retStr + c return retStr #------------------------------------------------------------------------------------------------------- # pass1 reads program and creates the symbol table def getInstBytes(code): numbytes = 0 if code in ops.keys(): (value,type) = ops[code] if type == TYPE_CTL: numbytes = 1 elif type == TYPE_IO: numbytes = 1 elif type == TYPE_REG: numbytes = 1 elif type == TYPE_IMM: numbytes = 2 elif type == TYPE_BR: numbytes = 2 elif type == TYPE_LBR: numbytes = 3 else: numbytes = 0 return numbytes def pass1(program): pctr = 0 # start program counter at 0 lnum = 1 startAddr = 0 # initial start address binData = '' # string of binary data retVal = False # return True if finish directive found for line in program: flds = string.split(line) # break line into fields lineStr = string.lstrip(line) # drop white space to check for a commented line #~ print pctr,flds,string.rstrip(line) if not flds: # ignore blank line pass elif lineStr[0:2] == '..': # ignore commented line pass elif line[0] > ' ': # if a label (character in first position) symb = flds[0] # get label symb = symb.upper() # all labels are upper case if string.find(symb,':') != -1: symb = string.replace(symb,':','') # drop the colon if present if len(flds) > 1: # anything else on the line? fld1 = flds[1] if (fld1 == 'EQU') or (fld1 == '='): # equate directive? value = parseValue(flds[2]) symtab[symb] = value # add to symbol table elif fld1[0] == ',': # storage directive symtab[symb] = pctr bytes = parseDataValue(fld1[1:]) # parse the line to determine size pctr = pctr + len(bytes) else: # otherwise, normal label with operations symtab[symb] = pctr pctr = pctr + getInstBytes(flds[1]) else: # normal label without any other text symtab[symb] = pctr elif flds[0] in directives.keys(): # if assembler directive, handle it type = directives[flds[0]] if type == 'ORIGIN': aStr = fieldsToString(flds[1:]) pctr = parseAddressValue(aStr,pctr) elif type == 'FINISH': print 'End of Pass 1' retVal = True # indicate directive found break else: print 'ERROR in line %d - invalid directive' % (lnum) elif lineStr[0] == ',': # storage directive bytes = parseDataValue(lineStr[1:]) # parse the line to determine size pctr = pctr + len(bytes) else: # normal line pctr = pctr + getInstBytes(flds[0]) lnum = lnum + 1 return(retVal) #------------------------------------------------------------------------------------------------------- # pass 2 does the assembly process generating machine code def assemble(lnum,pctr,flds): retval = '' # return a string of bytes code = flds[0] if code in ops.keys(): (value,type) = ops[code] # look up opcode value if type == TYPE_CTL: # if a control byte, just return the value retval = chr(value) elif type == TYPE_IO: # if an I/O instruction, determine the port if flds[1] in symtab: retval = chr(value + symtab[flds[1]]) elif flds[1] in ['1','2','3','4','5','6','7']: retval = chr(value + int(flds[1])) # add port to the opcode value else: print 'ERROR in line %d - invalid port value' % (lnum) retval = '\0' elif type == TYPE_REG: # register instruction, determine the register value if flds[1] in register_names: retval = chr(value + register_names[flds[1]]) else: regStr = flds[1] regval = parseValue(regStr) if (regval >= 0) and (regval < 16): retval = chr(value + regval) else: print 'ERROR in line %d - invalid register value' % (lnum) retval = '\0' elif type == TYPE_IMM: # immediate instruction retval = chr(value) # first the op code if flds[1] in symtab: # if a symbolic value, substitute it retval = retval + chr(symtab[flds[1]]) else: value = parseByteValue(flds[1]) if value == -1: print 'ERROR in line %d - invalid immediate value' % (lnum) value = 0 retval = retval + chr(value) # otherwise, just a value elif type == TYPE_BR: # short branch instruction retval = chr(value) # first the op code aStr = fieldsToString(flds[1:]) addr = parseAddressValue(aStr,pctr) # calculate the address if addr == -1: print 'ERROR in line %d - invalid address value' % (lnum) addr = 0 else: addr = addr & 0x00FF # mask off lower byte retval = retval + chr(addr) # add it to the list elif type == TYPE_LBR: # long branch instruction retval = chr(value) # first the op code aStr = fieldsToString(flds[1:]) addr = parseAddressValue(aStr,pctr) # calculate the address if addr == -1: print 'ERROR in line %d - invalid address value' % (lnum) addr = 0 retval = retval + chr(addr >> 8) # add upper byte retval = retval + chr(addr & 0x00FF) # add lower byte else: print 'ERROR in line %d - invalid operation type' % (lnum) else: print 'ERROR in line %d - invalid op code' % (lnum) return retval # format list file line # format is: # addr [byteStr] ; line text def genListLine(addr,line,byteStr,text): retStr = '%04X %s;' % (addr, byteStr) # start with address and object bytes retStr = retStr.ljust(PAD_VALUE) # pad out characters if len(retStr) > PAD_VALUE: # if bytes are too long (e.g. string) break the line retStr = retStr + '\n' + ' ' * PAD_VALUE newText = ''.join('%-8s' % item for item in text.split('\t')) # convert tabs to spaces retStr = retStr + '%04d %s\n' %(line, newText) # finish out line return(retStr) def pass2(program): records = [] # list of record tuples (startAddr, binary data (byte) string) binData = '' # binary data lstfile = '!M\n' # list file output (RCA standard start) startAddr = 0 # initial start address at 0 pctr = 0 # start program counter at 0 lnum = 1 for line in program: #~ print string.rstrip(line) flds = string.split(line) # break line into fields lineStr = string.lstrip(line) # drop white space to check for a commented line byteStr = '' # string of binary bytes for listing lineAddr = pctr # address at start of the line if line[0] > ' ': # drop symbol if present if line[0:2] == '..': # ignore comments pass else: flds = flds[1:] index = string.find(line,':') lineStr = line[index+1:] # drop symbol from lineStr also lineStr = string.lstrip(lineStr) if not flds: # ignore blank line pass elif lineStr[0:2] == '..': # ignore commented line pass elif flds[0] in directives.keys(): # if assembler directive, handle it type = directives[flds[0]] if type == 'ORIGIN': aStr = fieldsToString(flds[1:]) pctr = parseAddressValue(aStr,pctr) if len(binData) > 0: # if data present put in records list records.append( (startAddr,binData) ) binData = '' startAddr = pctr elif type == 'FINISH': # add last line to output file lstfile = lstfile + genListLine(pctr,lnum,byteStr,string.rstrip(line)) if len(binData) > 0: # if data present put in records list records.append( (startAddr,binData) ) print 'End of Pass 2' break else: print 'ERROR in line %d - invalid directive' % (lnum) elif (flds[0] == 'EQU') or (flds[0] == '='): # equate directive pass elif lineStr[0] == ',': # storage directive bytes = parseDataValue(lineStr[1:]) # parse the line for b in bytes: binData = binData + b byteStr = byteStr + '%02X' % ord(b) pctr = pctr + 1 else: bytes = assemble(lnum,pctr,flds) for b in bytes: binData = binData + b byteStr = byteStr + '%02X' % ord(b) pctr = pctr + 1 # add line to output file lstfile = lstfile + genListLine(lineAddr,lnum,byteStr,string.rstrip(line)) lnum = lnum + 1 lstfile = lstfile + '0000\n' # end of list file indicator return (records,lstfile) #------------------------------------------------------------------------------------------------------- # write symbol table def writeSymFile(fname,srcname): file = open(fname,'w') file.write('Symbol table for %s\n' %(srcname)) sym = symtab.keys() sym.sort() # sort the symbol list for s in sym: file.write('%-16s = %04X\n' % (s,symtab[s])) file.close() if __name__ == '__main__': format = 'bin' source = '' output = '' zeroFlg = False # offset of zero? (options, arguments) = getopt.getopt(sys.argv[1:], 'f:s:o:z') if len(options) == 0: print 'usage: asm1802 -f [bin | hex | verilog | altera] -s -o ' print '-z = output file has an offset of zero (for PROMs)' sys.exit() for opt in options: (op,val) = opt if op == '-f': format = val elif op == '-s': source = val elif op == '-o': output = val elif op == '-z': # if zero op zeroFlg = True else: print 'ERROR: invalid option' print 'asm1802 assembler (Ver %.1f)' % (VERSION) try: file = open(source,'r') except: print 'ERROR: unable to open file %s' % (source) sys.exit() program = file.readlines() file.close() print 'Starting pass 1' if not(pass1(program)): print 'ERROR: END directive not found' sys.exit() print 'Starting pass 2' (records,lstfile) = pass2(program) print 'writing output file %s' %(output) if format == 'bin': genBinFile(output,records) elif format == 'hex': genHexFile(output,records,zeroFlg) elif format == 'verilog': genVerilogFile(output,records,zeroFlg) elif format == 'altera': genAlteraFile(output,records,zeroFlg) else: print 'invalid output file format' sys.exit() # make list file and symbol table file names dp = string.find(source,'.') if dp != -1: listfile = source[:dp] + '.lst' symfile = source[:dp] + '.sym' else: listfile = source + '.lst' symfile = source + '.sym' print 'writing list file %s' % (listfile) file = open(listfile,'w') file.write(lstfile) file.close() # print symbol table print 'writing symbol table file %s' %(symfile) writeSymFile(symfile,source) print 'done'