# MIT License # # Copyright (c) 2023 Neil Webber # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # FUNCTIONALITY DISCLAIMER: # This is NOT meant to recreate the entire idea of a PDP-11 assembler. # Rather, it is meant as an ad-hoc assistance for creating and # debugging small test programs, of the sort that are found in pdptest. # As such, the methods here are written on an "as-needed" basis and # are focused around helping to create hand-constructed test code. # from branches import BRANCH_CODES from collections import defaultdict from functools import partial class PDP11InstructionAssembler: B6MODES = {} _rnames = [(f"R{_i}", _i) for _i in range(8)] + [("SP", 6), ("PC", 7)] for _rn, _i in _rnames: B6MODES[f"{_rn}"] = _i # register direct B6MODES[f"({_rn})"] = 0o10 | _i # register indirect B6MODES[f"({_rn})+"] = 0o20 | _i # autoincrement B6MODES[f"@({_rn})+"] = 0o30 | _i # autoincr deferred B6MODES[f"-({_rn})"] = 0o40 | _i # autodecrement B6MODES[f"@-({_rn})"] = 0o50 | _i # autodecr deferred del _i, _rn, _rnames def __iter__(self): if self._fwdrefs: raise ValueError(f"unresolved refs: " f"{list(self._fwdrefs)}") return iter(self._instblock) def immediate_value(self, s): # called in various contexts which may or may not # require a '$' on immediate constants; skip it if present if s[0] == '$': s = s[1:] # default octal unless number terminates with '.' base = 8 if s[-1] == '.': base = 10 s = s[:-1] val = int(s, base) # as a convenience, allow negative values and convert them if val < 0 and val >= -32768: val += 65536 if val > 65535 or val < 0: raise ValueError(f"illegal value '{s}' = {val}") return val # this is a notational convenience to create a f'*${i].' string # for an operand that is an immediate deferred (i.e., numeric pointer) def ptr(self, i): return f'*${i}.' def operand_parser(self, operand_token, /): """Parse operand_token ('r1', '-(sp)', '4(r5)', $177776, etc). Returns: sequence: [6 bit code, additional words ...] Raises ValueError for syntax errors. Literals that should become (pc)+ (mode 0o27) must start with '$' They will be octal unless they end with a '.' Literals that are pointers and should become @(pc)+ must start with '*$' and will be octal unless they end with a '.' An integer, i, can be passed in directly. """ # for convenience cannotparse = ValueError(f"cannot parse '{operand_token}'") # normalize the operand, upper case for strings, turn ints back # into their corresponding string (roundabout, but easiest) try: operand = operand_token.upper() except AttributeError: operand = f"${operand_token}." # bail out if spaces in middle, and remove spaces at ends s = operand.split() if len(s) > 1: raise cannotparse operand = s[0] # operand now fully normalized: upper case, no spaces. # the first/easiest to try is to see if it is an immediate. # It will (must) start with either '$', or '*$' if so. try: if operand[0] == '$': return [0o27, self.immediate_value(operand)] elif operand.startswith('*$'): return [0o37, self.immediate_value(operand[1:])] except ValueError: raise cannotparse from None # wasn't immediate, see if it matches the precomputed modes try: return [self.B6MODES[operand]] except KeyError: pass # last chance: X(Rn) and @X(rn) # see if X(Rn) or @X(Rn)... if operand[0] == '@': mode = 0o70 operand = operand[1:] else: mode = 0o60 # for starters, it must contain one '(' so should split to 2 s = operand.split('(') if len(s) != 2: raise cannotparse idxval = self.immediate_value(s[0]) # the back end of this, with the '(' put back on, # must end with ')' and must parse if s[1][-1] != ')': raise cannotparse try: b6 = self.B6MODES['(' + s[1]] except KeyError: raise cannotparse from None return [mode | (b6 & 0o07), idxval] def register_parser(self, operand_token, /): """Like operand_parser but token MUST be register direct.""" seq = self.operand_parser(operand_token) if len(seq) > 1 or seq[0] > 0o07: raise ValueError(f"{operand_token} must be register-direct") return seq[0] # gets overridden in InstructionBlock to track generated instructions def _seqwords(self, seq): return seq # All 2 operand instructions end up here eventually def _2op(self, operation, src, dst): src6, *src_i = self.operand_parser(src) dst6, *dst_i = self.operand_parser(dst) return self._seqwords([operation | src6 << 6 | dst6, *src_i, *dst_i]) # All 1 operand instructions end up here eventually # This also supports 0 operand "literals" (which are typically # instructions that have been hand-assembled another way) def _1op(self, operation, dst): """dst can be None for, essentially, a _0op.""" if dst is None: return self._seqwords([operation]) else: dst6, *dst_i = self.operand_parser(dst) return self._seqwords([operation | dst6, *dst_i]) # some instructions only operate on registers not fully-general operands def _regdirect(self, operation, regspec): regnum = self.register_parser(regspec) return self._seqwords([operation | regnum]) # XXX the instructions are not complete, this is being developed # as needed for pdptests.py # ALSO: see InstructionBlock for branch support def mov(self, src, dst): return self._2op(0o010000, src, dst) def movb(self, src, dst): return self._2op(0o110000, src, dst) def cmp(self, src, dst): return self._2op(0o020000, src, dst) def bit(self, src, dst): return self._2op(0o030000, src, dst) def bic(self, src, dst): return self._2op(0o040000, src, dst) def bis(self, src, dst): return self._2op(0o050000, src, dst) def add(self, src, dst): return self._2op(0o060000, src, dst) def sub(self, src, dst): return self._2op(0o160000, src, dst) # note: gets overridden in InstructionBlock to add label support def jmp(self, dst): return self._1op(0o000100, dst) # note: gets overridden in InstructionBlock to add label support def br(self, offs): return self.literal(0o000400 | (offs & 0o377)) # note: gets overridden in InstructionBlock to add label support def jsr(self, reg, dst): return self._1op(0o004000 | (self.register_parser(reg) << 6), dst) def rts(self, reg): return self.literal(0o000200 | self.register_parser(reg)) def clr(self, dst): return self._1op(0o005000, dst) def inc(self, dst): return self._1op(0o005200, dst) def dec(self, dst): return self._1op(0o005300, dst) def neg(self, dst): return self._1op(0o005400, dst) def negb(self, dst): return self._1op(0o105400, dst) def tst(self, dst): return self._1op(0o005700, dst) def swab(self, dst): return self._1op(0o000300, dst) def asl(self, dst): return self._1op(0o006300, dst) def asrb(self, dst): return self._1op(0o106200, dst) def rorb(self, dst): return self._1op(0o106000, dst) def ash(self, cnt, dst): dstreg = self.register_parser(dst) return self.literal(0o072000 | dstreg << 6, cnt) def halt(self): return self.literal(0) def rtt(self): return self.literal(6) def rti(self): return self.literal(2) def nop(self): return self.literal(0o000240) def clc(self): """Clear Carry""" return self.literal(0o000241) def wait(self): return self.literal(1) def mtpi(self, dst): return self._1op(0o006600, dst) def mfpi(self, src): return self._1op(0o006500, src) def mtpd(self, dst): return self._1op(0o106600, dst) def mfpd(self, src): return self._1op(0o106500, src) def trap(self, tnum): return self.literal(0o104400 | tnum) def literal(self, inst, oprnd=None, /): """For hand-assembled instructions. Also allows 1 operand.""" # prevent bad values from leaking into physical memory this way if inst < 0 or inst > 65535: raise ValueError(f"Bad instruction literal: {inst}") return self._1op(inst, oprnd) class FwdRef: """Values determined by a not-yet-seen label() definition.""" def __init__(self, name, block, *, idxrel=False, idxadj=0): self.loc = len(block) self.name = name self.block = block if idxrel: self.adjust = (2 * self.loc) + idxadj else: self.adjust = 0 block._fwdrefs[name].append(self) def __call__(self): block = self.block # the location to be patched is in one of three places, look for it: for loco in (0, 1, 2): if block._instblock[self.loc + loco] is self: block._instblock[self.loc + loco] = self.transform() break else: raise ValueError(f"could not find FwdRef {self}") def __iter__(self): return iter([0o27, self]) def __repr__(self): s = f"{self.__class__.__name__}<" s += f"{str(self.name)}, block" s += f", loc={self.loc}>" return s def transform(self): return self.block._neg16(self.block.getlabel(self.name) - self.adjust) class BranchTarget(FwdRef): def __init__(self, brcode, *args, **kwargs): super().__init__(*args, **kwargs) self.__brcode = brcode @staticmethod def branchencode(brcode, offs, name): """The guts of encoding/checking a branch offset.""" # offs is in 16-bit form as a byte offset; convert it to # 8-bit branch (word offset) form and make sure not too big if offs > 254 and offs < (65536 - 256): raise ValueError(f"branch target ('{name}') too far.") offs >>= 1 return brcode | (offs & 0o377) def transform(self): """Called when a forward branch ref is ready to be resolved.""" offs = self.block.getlabel(self.name) - (2 * (self.loc + 1)) return self.branchencode(self.__brcode, offs, self.name) # An InstructionBlock is a thin layer on just accumulating a sequence # of results from calling the instruction methods. # # Instead of: # a = PDP11InstructionAssembler() # insts = ( # a.mov('r1', 'r2'), # a.clr('r0'), # etc ... # ) # # An InstructionBlock can be used this way: # # a = InstructionBlock() # a.mov('r1', 'r2') # a.clr('r0') # etc ... # # Each call to an instruction method appends that instruction to the block. # Subject to opinion, this may be notationally cleaner/clearer and also # opens the possibility of if/for/etc full programming constructs in # forming the instruction sequence itself. # # An InstructionBlock adds simple label support as well, useful for branching. # # If care is taken to write position-independent code, an InstructionBlock # can eventually be placed at any arbitrary location in memory. When labels # are used in jmp or jsr instructions, they are automatically assembled # as PC-relative offsets (mode = 0o67) rather than absolute values. # # The current list of instruction words is available by using the # instruction block as an iterable. For example: # # instlist = list(a) # # or perhaps 'for x in a:' # # NOTE: If there are dangling forward references asking for the instructions # raises a ValueError. This can be suppressed (usually only useful # for debugging) by requesting a._instructions() # class InstructionBlock(PDP11InstructionAssembler): def __init__(self): super().__init__() self._instblock = [] self._labels = {} self._fwdrefs = defaultdict(list) def _seqwords(self, seq): self._instblock += seq return seq # Extend base operand_parser with ability to handle labels, # including forward references def operand_parser(self, operand_token, *args, **kwargs): # it's possible to get here with operand_token already # being a forward ref (e.g., if getlabel was used) if isinstance(operand_token, FwdRef): return operand_token else: try: return super().operand_parser(operand_token, *args, **kwargs) except ValueError as e: if not self._allowable_label(operand_token): raise # falling through to here means it is a label or forward reference # IF it starts with '+' it means use PC-relative addr mode # which will require some fussing around... if operand_token[0] == '+': return [0o67, self.getlabel( operand_token[1:], idxrel=True, idxadj=4) ] else: return [0o27, self.getlabel(operand_token)] def __len__(self): """Returns the length of the sequence in WORDS""" return len(self._instblock) def __dotandnumbers(self, w): """Turn '.' into 2x current offset, turn numbers into integers""" if isinstance(w, int): # already an integer return w elif w in '+-': return w elif w == '.': return len(self) * 2 elif w[-1] == '.': # 12345. for example return int(w[:-1]) elif self._allowable_label(w): return self.getlabel(w) else: return int(w, 8) def _allowable_label(self, s): if not hasattr(s, 'isalpha'): return False if s[0] == '+': s = s[1:] return ((s.upper() not in self.B6MODES) and (s[0].isalpha() or s[0] == '_')) def label(self, name, *, value='.'): """Record the current position, or 'value', as 'name'. If no value specified, it defaults to '.' which means the current position index, multiplied by 2 so that it is suitable to add to a base address. Otherwise the value is taken as-is, or with a trivial amount of arithmetic. Labels must start with a .isalpha() character and must not match (ignoring case) any of the tokens in B6MODES """ try: value_tokens = value.split() except AttributeError: value_tokens = [value] value_tokens = [self.__dotandnumbers(w) for w in value_tokens] if len(value_tokens) == 3: if value_tokens[1] == '+': value_tokens = [value_tokens[0] + value_tokens[2]] elif value_tokens[1] == '-': value_tokens = [value_tokens[0] - value_tokens[2]] if len(value_tokens) != 1: raise ValueError(f"cannot parse '{value}'") self._labels[name] = value_tokens[0] # if there were any forward references to this name, process them for fref in self._fwdrefs[name]: fref() del self._fwdrefs[name] return self._labels[name] def getlabel(self, name, *, fwdfactory=FwdRef, idxrel=False, idxadj=0): """Return value (loc) of name, which may be a FwdRef object. Label values are offsets relative to the start of the block. If the label is a forward reference, the fwdfactory argument (default=FwdRef) will be used to create a FwdRef object placed into the instruction stream until resolved later. The default FwdRef class patches in a 16-bit value once known. Branch (and other) instructions supply FwdRef subclasses via fwdfactory for customized encoding/processing of resolved references. If fwdfactory is passed in as None (default is FwdRef), forward references raise a TypeError """ try: x = self._labels[name] except KeyError: return fwdfactory(name=name, block=self, idxrel=idxrel, idxadj=idxadj) else: if idxrel: x = self._neg16(x - ((2 * len(self)) + idxadj)) return x @staticmethod def _neg16(x): """convert negative numbers in 16-bit two's complement.""" origx = x if x < 0 and x >= -32768: x += 65536 if x < 0 or x > 65535: raise ValueError(f"offset '{origx}' out of 16-bit range") return x def _branchcommon(self, target, *, fwdfactory=None): """Common logic for bne, bgt, etc including unconditional br.""" # target at this point can be: # An integer -- treat directly as an offset value # A string representing a direct offset - parse/use # A label -- possibly forward reference or not try: if target.startswith('$'): x = self.immediate_value(target) else: # it's a label, which may or may not be forward ref x = self.getlabel(target, fwdfactory=fwdfactory) try: x -= (2 * (len(self) + 1)) except TypeError: # a forward reference pass except AttributeError: # it's not a string, assume it is an offset x = target # At this point it's either a number or a forward ref. # For numbers, complete everything now. # For forward refs, that work is deferred. try: x = self._neg16(x) except TypeError: pass return x # dynamically construct the methods for all the Bxx branches # This makes methods: beq, bne, bgt, etc for _bname, _code in BRANCH_CODES.items(): def branchxx(self, target, code=_code): bxxfactory = partial(BranchTarget, code) w = self._branchcommon(target, fwdfactory=bxxfactory) # it's either an integer offset or a forward reference. # Encode integers now; forward references are encoded later if isinstance(w, int): w = BranchTarget.branchencode(code, offs=w, name=target) return self._seqwords([w]) branchxx.__name__ = _bname setattr(PDP11InstructionAssembler, _bname, branchxx) del _bname, _code, branchxx # override JSR to provide reference/label support (like branches) def jsr(self, reg, dst): # anything not a label handled by the regular jsr method: if not self._allowable_label(dst): return super().jsr(reg, dst) # labels become operand mode 0o67 ... PC-relative w/offset inst = 0o004067 | (self.register_parser(reg) << 6) offs = self.getlabel(dst, idxrel=True, idxadj=4) return self._seqwords([inst, offs]) def jmp(self, dst): # anything not a label handled by the regular jmp method: if not self._allowable_label(dst): return super().jmp(dst) # labels become operand mode 0o67 ... PC-relative w/offset inst = 0o000167 return self._seqwords( [inst, self.getlabel(dst, idxrel=True, idxadj=4)]) def sob(self, reg, target): # the register can be a naked integer 0 .. 5 or an 'r' string try: lc = reg.lower() except AttributeError: pass else: if len(lc) == 2 and lc[0] == 'r': reg = int(lc[1:]) # NOTE: forward references illegal; no fwdfactory given try: x = self._branchcommon(target) except (ValueError, TypeError): raise ValueError(f"sob '{target}' illegal target") from None # stricter limits on the offset size for sob: # Must be between 0 and -126 if x < 0o177602: # (65536-126) raise ValueError(f"sob target ({x}) too far") return self.literal(0o077000 | (reg << 6) | (((-x) >> 1) & 0o77)) def _instructions(self): # By default, it is an error to request the instructions if there # are unresolved forward references. This is a way around that. return list(self._instblock) def simh(self, *, startaddr=0o10000): """Generate lines of SIMH deposit commands.""" for offs, w in enumerate(self): yield f"D {oct(startaddr + (2 * offs))[2:]} {oct(w)[2:]}\n" # This method shows one typical way to use the simh generator # # A .ini file full of deposit ('D') commands starting at startaddr # will be created from the instructions in the InstructionBlock def export_to_simh_ini(self, outfilename, /, *, startaddr=0o10000): with open(outfilename, 'w') as f: for s in self.simh(startaddr=startaddr): f.write(s) # and set the PC to the start address f.write(f"D PC {oct(startaddr)[2:]}\n") if __name__ == "__main__": import unittest ASM = PDP11InstructionAssembler # NOTE: these are tests of instruction ASSEMBLY not execution. class TestMethods(unittest.TestCase): def test_bne_label_distance(self): # this should just execute without any issue for i in range(127): a = InstructionBlock() a.label('foo') for _ in range(i): a.mov('r0', 'r0') a.bne('foo') # but this should ValueError ... branch too far a = InstructionBlock() a.label('foo') for _ in range(128): a.mov('r0', 'r0') with self.assertRaises(ValueError): a.bne('foo') def test_backlab(self): a = InstructionBlock() a.mov('bozo', 'r0') a.clr('r1') a.label('bozo') a.mov('bozo', 'r1') insts = list(a) self.assertEqual(list(a), [0o012700, 6, 0o005001, 0o012701, 6]) def test_labelmath_dot(self): a = InstructionBlock() a.mov('bozo', 'r0') a.label('B') a.label('BP2', value='. + 2') a.clr('r0') a.label('bozo') self.assertEqual(a.getlabel('B'), 4) self.assertEqual(a.getlabel('BP2'), 6) self.assertEqual(a.getlabel('BP2'), a.getlabel('bozo')) self.assertEqual(list(a)[1], 6) def test_labelmath_plus(self): a = InstructionBlock() a.label('L1', value=17) a.label('L2', value='L1 + 25.') self.assertEqual(a.getlabel('L2'), 42) def test_labelmath_minus(self): a = InstructionBlock() a.label('L1') a.clr('r0') a.label('L2', value='. - L1') self.assertEqual(a.getlabel('L2'), 2) def test_unresolved(self): a = InstructionBlock() a.br('bozo') a.clr('r0') a.mov(a.getlabel('xyzzy'), 'r0') with self.assertRaises(ValueError): foo = list(a) def test_identity(self): a = InstructionBlock() a.mov('r0', 'r1') a.br('bozo') a.mov('r1', 'r2') a.label('bozo') a.mov('r2', 'r3') b = InstructionBlock() b.mov('r0', 'r1') b.br('bozo') b.mov('r1', 'r2') b.label('bozo') b.mov('r2', 'r3') self.assertEqual(list(a), list(b)) def test_sob(self): for i in range(63): # 0..62 because the sob also counts with self.subTest(i=i): a = InstructionBlock() a.label('foosob') for _ in range(i): a.mov('r0', 'r0') inst = a.sob(0, 'foosob') self.assertEqual(len(inst), 1) self.assertEqual(inst[0] & 0o77, i+1) unittest.main()