enhancements for performance tests

This commit is contained in:
Neil Webber 2024-05-18 17:25:52 -05:00
parent ddd800afb6
commit d4b8e8d896

View file

@ -40,7 +40,7 @@ from pdpasmhelper import InstructionBlock
class TestMethods(unittest.TestCase): class TestMethods(unittest.TestCase):
PDPLOGLEVEL = 'DEBUG' PDPLOGLEVEL = 'INFO'
# used to create various instances, collects all the options # used to create various instances, collects all the options
# detail into this one place... mostly this is about loglevel # detail into this one place... mostly this is about loglevel
@ -2706,64 +2706,74 @@ class TestMethods(unittest.TestCase):
# are all just dummied up right now # are all just dummied up right now
# this is not a unit test, invoke it using timeit etc # this is not a unit test, invoke it using timeit etc
def speed_test_setup(self, *, loopcount=200, mmu=True, inst=None): def speed_test_setup(self, instwords, /, *, loopcount=200, mmu=True):
"""Set up a test run of 50*loopcount inst instructions. """Set up a test run of the instwords (1 word or 2)
Returns tuple: p, pc, per .. see speed_test_run()
Returns tuple: p, pc If len(instwords) is 1, make a loop of 49 instructions and one SOB
If len == 2 (i.e., an instruction + operand), 24 instructions + SOB
""" """
p, pc = self.simplemapped_pdp() p, pc = self.u64mapped_pdp()
# the returned pdp is loaded with instructions for setting up # the returned pdp is loaded with instructions for setting up
# the mmu; only do them if that's what is wanted # the mmu; only do them if that's what is wanted
# #
# NOTE: the test code is run in USER mode Because Reasons # NOTE: the test code is run in USER mode Because Reasons
# (was experimenting with virtual caches and this was helpful). # (was experimenting with virtual caches and this was helpful).
# The test code will run at (virtual/user) 0 when the MMU is
# enabled, or its physical location (0o20000) when off.
user_physloc = 0o20000
if mmu: if mmu:
p.run(pc=pc) # set up all those mappings p.run(pc=pc) # set up all those mappings
usermode_base = 0 # physical 0o20000 maps here in USER mode usermode_base = 0o10000 # phys 0o210000 maps here in USER mode
user_physloc = 0o210000
else: else:
user_physloc = 0o20000
usermode_base = user_physloc usermode_base = user_physloc
# by default the instruction being timed will be MOV R1,R0
# but other instructions could be used. MUST ONLY BE ONE WORD
if inst is None:
inst = 0o010100
# this is the tiny kernel code used to set up and start # this is the tiny kernel code used to set up and start
# each iteration of the user mode timing code. It slightly # each major iteration of the user mode timing code. The one-time
# distorts the per-instruction overhead of course. C'est la vie. # overhead of these few instructions is irrelevant in the timing test.
k = InstructionBlock() k = InstructionBlock()
k.mov(0o20000, 'sp') # establish proper kernel stack k.mov(0o20000, 'sp') # establish proper kernel stack
k.mov(0o140340, '-(sp)') # USER mode, no interrupts k.mov(0o140340, '-(sp)') # USER mode, no interrupts
k.mov(usermode_base, '-(sp)') # pc start for loop/USER code k.mov(usermode_base, '-(sp)') # pc start for loop/USER code
# these environmental "knowns" are available for the test inst
k.mov(0o1000, 'r5') # usable writeable addr
k.clr('r0')
k.clr('r1')
k.rtt() # off to the races! k.rtt() # off to the races!
kloc = 0o4000 kloc = 0o4000
for a2, w in enumerate(k): for a2, w in enumerate(k):
p.mmu.wordRW(kloc + (2 * a2), w) p.mmu.wordRW(kloc + (2 * a2), w)
# The test timing loop... 49 "inst" instructions if len(instwords) == 1:
# and an SOB for looping (so 50 overall instructions per loop) per = 49
elif len(instwords) == 2:
per = 24
else:
raise ValueError("instwords must be a list of length 1 or 2")
# The test timing loop: N instructions and an SOB
# NOTE: The instructions must not exxceed SOB branch reach
a = InstructionBlock() a = InstructionBlock()
a.mov(loopcount, 'r4') a.mov(loopcount, 'r4')
a.label('LOOP') a.label('LOOP')
for i in range(49): for i in range(per):
a.literal(inst) for w in instwords:
a.literal(w)
a.sob('r4', 'LOOP') a.sob('r4', 'LOOP')
a.halt() a.halt()
for a2, w in enumerate(a): for a2, w in enumerate(a):
p.physRW(user_physloc + (2 * a2), w) p.physRW(user_physloc + (2 * a2), w)
return p, kloc # per+1 will be 50 (len(instwords) == 1) or 25 ( == 2)
return p, kloc, per+1
def speed_test_run(self, p, instloc): def speed_test_run(self, p, instloc):
"""See speed_test_setup""" """See speed_test_setup"""
p.psw = 0
p.run(pc=instloc) p.run(pc=instloc)
@ -2771,42 +2781,78 @@ if __name__ == "__main__":
import argparse import argparse
import timeit import timeit
movr1r0 = 0o010100 def asminst(s):
try:
return [asmint(s)]
except ValueError: # if, e.g., 'MOV r0,r1' instead of '010001'
pass
# all of these things can raise exceptions if the string is
# ill-formatted, and that's perfectly fine (causes arg rejection)
mnem, s2 = s.split()
a = InstructionBlock()
asmmeth = getattr(a, mnem.lower())
asmmeth(*s2.split(','))
return list(a)
def asmint(s):
digits = '0123456789'
if s.startswith('0o'):
base = 8
s = s[2:]
digits = '01234567'
elif s.startswith('0x'):
base = 16
s = s[2:]
digits += 'abcdef'
elif s[-1] == '.':
base = 10
s = s[:-1]
else:
base = 8
place = 1
v = 0
for d in reversed(s.lower()):
dv = digits.index(d) # ValueError for bad digits
v += (dv * place)
place *= base
return v
movr1r0 = [0o010100]
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-p', '--performance', action="store_true") parser.add_argument('-p', '--performance', action="store_true")
parser.add_argument('-i', '--instruction', default=movr1r0, type=int) parser.add_argument('-i', '--instruction', default=movr1r0, type=asminst,
help="Test instruction, in octal. E.g.: 010203")
parser.add_argument('--nommu', action="store_true") parser.add_argument('--nommu', action="store_true")
parser.add_argument('--clr', action="store_true")
parser.add_argument('tests', nargs="*") parser.add_argument('tests', nargs="*")
args = parser.parse_args() args = parser.parse_args()
if args.performance: if args.performance:
# the goal is to execute inst 1M times. The loop executes 49 inst # If the instruction is 1 word the loop will be 49 instructions
# instructions and 1 sob (which taken together are considered as 50). # and one SOB, considered as 50 instructions.
# Want to drive "number=" up more than loopcount, so use # If the instruction is 2 words it will be 24 instructions + SOB,
# loopcount=20 ... means "1000" inst instructions # considered as 25 instructions.
# number=1000 ... do that 1000 times, for 1M instructions
# simple way to test CLR instruction vs default MOV. # the goal is to execute inst 1M times. The loop executes 'per'
# The CLR instruction is not optimized the way MOV is so # instructions and 1 sob; Want to drive "number=" up more than
# this shows the difference. # loopcount, so use
if args.clr: # loopcount=20 ... means 20*per instructions
args.instruction = 0o005000 # number=1000000/(loopcount*per)
#
# number will be 1000 for 1 instruction word, or 2000 for 2.
t = TestMethods() t = TestMethods()
mmu = not args.nommu mmu = not args.nommu
inst = args.instruction instwords = args.instruction
p, pc = t.speed_test_setup(loopcount=20, inst=inst, mmu=mmu) loopcount = 20
p, pc, per = t.speed_test_setup(
instwords, loopcount=loopcount, mmu=mmu)
number = 1000000 // (loopcount * per)
ta = timeit.repeat(stmt='t.speed_test_run(p, pc)', ta = timeit.repeat(stmt='t.speed_test_run(p, pc)',
number=1000, globals=globals(), repeat=50) number=number, globals=globals(), repeat=50)
tnsec = round(1000 * min(*ta), 1) tnsec = round(1000 * min(*ta), 1)
if args.instruction == movr1r0: ws = list(map(lambda w: f"{oct(w)[2:]:0>6s}", args.instruction))
instr = 'MOV R1,R0' print(f"Instruction {ws} took {tnsec} nsecs")
elif (args.instruction & 0o177770) == 0o005000:
instr = f'CLR R{args.instruction & 7}'
else:
instr = oct(args.instruction)
print(f"Instruction {instr} took {tnsec} nsecs")
else: else:
unittest.main() unittest.main()