/* disasm.c where all the _work_ gets done in the Netwide Disassembler * * The Netwide Assembler is copyright (C) 1996 Simon Tatham and * Julian Hall. All rights reserved. The software is * redistributable under the licence given in the file "Licence" * distributed in the NASM archive. * * initial version 27/iii/95 by Simon Tatham */ #include #include #include "nasm.h" #include "insns.h" /* names.c included source file defining instruction and register * names for the Netwide [Dis]Assembler * * The Netwide Assembler is copyright (C) 1996 Simon Tatham and * Julian Hall. All rights reserved. The software is * redistributable under the licence given in the file "Licence" * distributed in the NASM archive. */ static const char *conditions[] = { /* condition code names */ "a", "ae", "b", "be", "c", "e", "g", "ge", "l", "le", "na", "nae", "nb", "nbe", "nc", "ne", "ng", "nge", "nl", "nle", "no", "np", "ns", "nz", "o", "p", "pe", "po", "s", "z" }; /* Register names automatically generated from regs.dat */ /* automatically generated from ./regs.dat - do not edit */ static const char *reg_names[] = { "ah", "al", "ax", "bh", "bl", "bp", "bx", "ch", "cl", "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", "cs", "cx", "dh", "di", "dl", "dr0", "dr1", "dr2", "dr3", "dr4", "dr5", "dr6", "dr7", "ds", "dx", "eax", "ebp", "ebx", "ecx", "edi", "edx", "es", "esi", "esp", "fs", "gs", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "segr6", "segr7", "si", "sp", "ss", "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7", "tr0", "tr1", "tr2", "tr3", "tr4", "tr5", "tr6", "tr7", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" }; /* Instruction names automatically generated from insns.dat */ /* This file is auto-generated from insns.dat by insns.pl - don't edit it */ /* This file in included by names.c */ static const char *insn_names[] = { "aaa", "aad", "aam", "aas", "adc", "add", "addpd", "addps", "addsd", "addss", "addsubpd", "addsubps", "and", "andnpd", "andnps", "andpd", "andps", "arpl", "bound", "bsf", "bsr", "bswap", "bt", "btc", "btr", "bts", "call", "cbw", "cdq", "clc", "cld", "clflush", "cli", "clts", "cmc", "cmp", "cmpeqpd", "cmpeqps", "cmpeqsd", "cmpeqss", "cmplepd", "cmpleps", "cmplesd", "cmpless", "cmpltpd", "cmpltps", "cmpltsd", "cmpltss", "cmpneqpd", "cmpneqps", "cmpneqsd", "cmpneqss", "cmpnlepd", "cmpnleps", "cmpnlesd", "cmpnless", "cmpnltpd", "cmpnltps", "cmpnltsd", "cmpnltss", "cmpordpd", "cmpordps", "cmpordsd", "cmpordss", "cmppd", "cmpps", "cmpsb", "cmpsd", "cmpss", "cmpsw", "cmpunordpd", "cmpunordps", "cmpunordsd", "cmpunordss", "cmpxchg", "cmpxchg486", "cmpxchg8b", "comisd", "comiss", "cpuid", "cvtdq2pd", "cvtdq2ps", "cvtpd2dq", "cvtpd2pi", "cvtpd2ps", "cvtpi2pd", "cvtpi2ps", "cvtps2dq", "cvtps2pd", "cvtps2pi", "cvtsd2si", "cvtsd2ss", "cvtsi2sd", "cvtsi2ss", "cvtss2sd", "cvtss2si", "cvttpd2dq", "cvttpd2pi", "cvttps2dq", "cvttps2pi", "cvttsd2si", "cvttss2si", "cwd", "cwde", "daa", "das", "db", "dd", "dec", "div", "divpd", "divps", "divsd", "divss", "dq", "dt", "dw", "emms", "enter", "equ", "f2xm1", "fabs", "fadd", "faddp", "fbld", "fbstp", "fchs", "fclex", "fcmovb", "fcmovbe", "fcmove", "fcmovnb", "fcmovnbe", "fcmovne", "fcmovnu", "fcmovu", "fcom", "fcomi", "fcomip", "fcomp", "fcompp", "fcos", "fdecstp", "fdisi", "fdiv", "fdivp", "fdivr", "fdivrp", "femms", "feni", "ffree", "ffreep", "fiadd", "ficom", "ficomp", "fidiv", "fidivr", "fild", "fimul", "fincstp", "finit", "fist", "fistp", "fisttp", "fisub", "fisubr", "fld", "fld1", "fldcw", "fldenv", "fldl2e", "fldl2t", "fldlg2", "fldln2", "fldpi", "fldz", "fmul", "fmulp", "fnclex", "fndisi", "fneni", "fninit", "fnop", "fnsave", "fnstcw", "fnstenv", "fnstsw", "fpatan", "fprem", "fprem1", "fptan", "frndint", "frstor", "fsave", "fscale", "fsetpm", "fsin", "fsincos", "fsqrt", "fst", "fstcw", "fstenv", "fstp", "fstsw", "fsub", "fsubp", "fsubr", "fsubrp", "ftst", "fucom", "fucomi", "fucomip", "fucomp", "fucompp", "fwait", "fxam", "fxch", "fxrstor", "fxsave", "fxtract", "fyl2x", "fyl2xp1", "haddpd", "haddps", "hlt", "hsubpd", "hsubps", "ibts", "icebp", "idiv", "imul", "in", "inc", "incbin", "insb", "insd", "insw", "int", "int01", "int03", "int1", "int3", "into", "invd", "invlpg", "iret", "iretd", "iretw", "jcxz", "jecxz", "jmp", "jmpe", "lahf", "lar", "lddqu", "ldmxcsr", "lds", "lea", "leave", "les", "lfence", "lfs", "lgdt", "lgs", "lidt", "lldt", "lmsw", "loadall", "loadall286", "lodsb", "lodsd", "lodsw", "loop", "loope", "loopne", "loopnz", "loopz", "lsl", "lss", "ltr", "maskmovdqu", "maskmovq", "maxpd", "maxps", "maxsd", "maxss", "mfence", "minpd", "minps", "minsd", "minss", "monitor", "mov", "movapd", "movaps", "movd", "movddup", "movdq2q", "movdqa", "movdqu", "movhlps", "movhpd", "movhps", "movlhps", "movlpd", "movlps", "movmskpd", "movmskps", "movntdq", "movnti", "movntpd", "movntps", "movntq", "movq", "movq2dq", "movsb", "movsd", "movshdup", "movsldup", "movss", "movsw", "movsx", "movupd", "movups", "movzx", "mul", "mulpd", "mulps", "mulsd", "mulss", "mwait", "neg", "nop", "not", "or", "orpd", "orps", "out", "outsb", "outsd", "outsw", "packssdw", "packsswb", "packuswb", "paddb", "paddd", "paddq", "paddsb", "paddsiw", "paddsw", "paddusb", "paddusw", "paddw", "pand", "pandn", "pause", "paveb", "pavgb", "pavgusb", "pavgw", "pcmpeqb", "pcmpeqd", "pcmpeqw", "pcmpgtb", "pcmpgtd", "pcmpgtw", "pdistib", "pextrw", "pf2id", "pf2iw", "pfacc", "pfadd", "pfcmpeq", "pfcmpge", "pfcmpgt", "pfmax", "pfmin", "pfmul", "pfnacc", "pfpnacc", "pfrcp", "pfrcpit1", "pfrcpit2", "pfrsqit1", "pfrsqrt", "pfsub", "pfsubr", "pi2fd", "pi2fw", "pinsrw", "pmachriw", "pmaddwd", "pmagw", "pmaxsw", "pmaxub", "pminsw", "pminub", "pmovmskb", "pmulhriw", "pmulhrwa", "pmulhrwc", "pmulhuw", "pmulhw", "pmullw", "pmuludq", "pmvgezb", "pmvlzb", "pmvnzb", "pmvzb", "pop", "popa", "popad", "popaw", "popf", "popfd", "popfw", "por", "prefetch", "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2", "prefetchw", "psadbw", "pshufd", "pshufhw", "pshuflw", "pshufw", "pslld", "pslldq", "psllq", "psllw", "psrad", "psraw", "psrld", "psrldq", "psrlq", "psrlw", "psubb", "psubd", "psubq", "psubsb", "psubsiw", "psubsw", "psubusb", "psubusw", "psubw", "pswapd", "punpckhbw", "punpckhdq", "punpckhqdq", "punpckhwd", "punpcklbw", "punpckldq", "punpcklqdq", "punpcklwd", "push", "pusha", "pushad", "pushaw", "pushf", "pushfd", "pushfw", "pxor", "rcl", "rcpps", "rcpss", "rcr", "rdmsr", "rdpmc", "rdshr", "rdtsc", "resb", "resd", "resq", "rest", "resw", "ret", "retf", "retn", "rol", "ror", "rsdc", "rsldt", "rsm", "rsqrtps", "rsqrtss", "rsts", "sahf", "sal", "salc", "sar", "sbb", "scasb", "scasd", "scasw", "sfence", "sgdt", "shl", "shld", "shr", "shrd", "shufpd", "shufps", "sidt", "sldt", "smi", "smint", "smintold", "smsw", "sqrtpd", "sqrtps", "sqrtsd", "sqrtss", "stc", "std", "sti", "stmxcsr", "stosb", "stosd", "stosw", "str", "sub", "subpd", "subps", "subsd", "subss", "svdc", "svldt", "svts", "syscall", "sysenter", "sysexit", "sysret", "test", "ucomisd", "ucomiss", "ud0", "ud1", "ud2", "umov", "unpckhpd", "unpckhps", "unpcklpd", "unpcklps", "verr", "verw", "wait", "wbinvd", "wrmsr", "wrshr", "xadd", "xbts", "xchg", "xlat", "xlatb", "xor", "xorpd", "xorps", "xstore" }; /* Conditional instructions */ static const char *icn[] = { "cmov", "j", "set" }; /* and the corresponding opcodes */ static int ico[] = { I_CMOVcc, I_Jcc, I_SETcc }; #define INSN_MAX 32 /* one instruction can't be longer than this */ long disasm (unsigned char *data, char *output, int segsize, long offset); extern struct itemplate **itable[]; /* * Flags that go into the `segment' field of `insn' structures * during disassembly. */ #define SEG_RELATIVE 1 #define SEG_32BIT 2 #define SEG_RMREG 4 #define SEG_DISP8 8 #define SEG_DISP16 16 #define SEG_DISP32 32 #define SEG_NODISP 64 #define SEG_SIGNED 128 static int whichreg(long regflags, int regval) { /* automatically generated from ./regs.dat - do not edit */ static const int creg [] = {R_CR0,R_CR1,R_CR2,R_CR3,R_CR4,R_CR5,R_CR6,R_CR7}; static const int dreg [] = {R_DR0,R_DR1,R_DR2,R_DR3,R_DR4,R_DR5,R_DR6,R_DR7}; static const int fpureg [] = {R_ST0,R_ST1,R_ST2,R_ST3,R_ST4,R_ST5,R_ST6,R_ST7}; static const int mmxreg [] = {R_MM0,R_MM1,R_MM2,R_MM3,R_MM4,R_MM5,R_MM6,R_MM7}; static const int reg16 [] = {R_AX,R_CX,R_DX,R_BX,R_SP,R_BP,R_SI,R_DI}; static const int reg32 [] = {R_EAX,R_ECX,R_EDX,R_EBX,R_ESP,R_EBP,R_ESI,R_EDI}; static const int reg8 [] = {R_AL,R_CL,R_DL,R_BL,R_AH,R_CH,R_DH,R_BH}; static const int sreg [] = {R_ES,R_CS,R_SS,R_DS,R_FS,R_GS,R_SEGR6,R_SEGR7}; static const int treg [] = {R_TR0,R_TR1,R_TR2,R_TR3,R_TR4,R_TR5,R_TR6,R_TR7}; static const int xmmreg [] = {R_XMM0,R_XMM1,R_XMM2,R_XMM3,R_XMM4,R_XMM5,R_XMM6,R_XMM7}; if (!(REG_AL & ~regflags)) return R_AL; if (!(REG_AX & ~regflags)) return R_AX; if (!(REG_EAX & ~regflags)) return R_EAX; if (!(REG_DL & ~regflags)) return R_DL; if (!(REG_DX & ~regflags)) return R_DX; if (!(REG_EDX & ~regflags)) return R_EDX; if (!(REG_CL & ~regflags)) return R_CL; if (!(REG_CX & ~regflags)) return R_CX; if (!(REG_ECX & ~regflags)) return R_ECX; if (!(FPU0 & ~regflags)) return R_ST0; if (!(REG_CS & ~regflags)) return (regval == 1) ? R_CS : 0; if (!(REG_DESS & ~regflags)) return (regval == 0 || regval == 2 || regval == 3 ? sreg[regval] : 0); if (!(REG_FSGS & ~regflags)) return (regval == 4 || regval == 5 ? sreg[regval] : 0); if (!(REG_SEG67 & ~regflags)) return (regval == 6 || regval == 7 ? sreg[regval] : 0); /* All the entries below look up regval in an 8-entry array */ if (regval < 0 || regval > 7) return 0; if (!((REGMEM|BITS8) & ~regflags)) return reg8[regval]; if (!((REGMEM|BITS16) & ~regflags)) return reg16[regval]; if (!((REGMEM|BITS32) & ~regflags)) return reg32[regval]; if (!(REG_SREG & ~regflags)) return sreg[regval]; if (!(REG_CREG & ~regflags)) return creg[regval]; if (!(REG_DREG & ~regflags)) return dreg[regval]; if (!(REG_TREG & ~regflags)) return treg[regval]; if (!(FPUREG & ~regflags)) return fpureg[regval]; if (!(MMXREG & ~regflags)) return mmxreg[regval]; if (!(XMMREG & ~regflags)) return xmmreg[regval]; return 0; } static const char *whichcond(int condval) { static int conds[] = { C_O, C_NO, C_C, C_NC, C_Z, C_NZ, C_NA, C_A, C_S, C_NS, C_PE, C_PO, C_L, C_NL, C_NG, C_G }; return conditions[conds[condval]]; } /* * Process an effective address (ModRM) specification. */ static unsigned char *do_ea (unsigned char *data, int modrm, int asize, int segsize, operand *op) { int mod, rm, scale, index, base; mod = (modrm >> 6) & 03; rm = modrm & 07; if (mod == 3) { /* pure register version */ op->basereg = rm; op->segment |= SEG_RMREG; return data; } op->addr_size = 0; if (asize == 16) { /* * specifies the displacement size (none, byte or * word), and specifies the register combination. * Exception: mod=0,rm=6 does not specify [BP] as one might * expect, but instead specifies [disp16]. */ op->indexreg = op->basereg = -1; op->scale = 1; /* always, in 16 bits */ switch (rm) { case 0: op->basereg = R_BX; op->indexreg = R_SI; break; case 1: op->basereg = R_BX; op->indexreg = R_DI; break; case 2: op->basereg = R_BP; op->indexreg = R_SI; break; case 3: op->basereg = R_BP; op->indexreg = R_DI; break; case 4: op->basereg = R_SI; break; case 5: op->basereg = R_DI; break; case 6: op->basereg = R_BP; break; case 7: op->basereg = R_BX; break; } if (rm == 6 && mod == 0) { /* special case */ op->basereg = -1; if (segsize != 16) op->addr_size = 16; mod = 2; /* fake disp16 */ } switch (mod) { case 0: op->segment |= SEG_NODISP; break; case 1: op->segment |= SEG_DISP8; op->offset = (signed char) *data++; break; case 2: op->segment |= SEG_DISP16; op->offset = *data++; op->offset |= ((unsigned) *data++) << 8; break; } return data; } else { /* * Once again, specifies displacement size (this time * none, byte or *dword*), while specifies the base * register. Again, [EBP] is missing, replaced by a pure * disp32 (this time that's mod=0,rm=*5*). However, rm=4 * indicates not a single base register, but instead the * presence of a SIB byte... */ op->indexreg = -1; switch (rm) { case 0: op->basereg = R_EAX; break; case 1: op->basereg = R_ECX; break; case 2: op->basereg = R_EDX; break; case 3: op->basereg = R_EBX; break; case 5: op->basereg = R_EBP; break; case 6: op->basereg = R_ESI; break; case 7: op->basereg = R_EDI; break; } if (rm == 5 && mod == 0) { op->basereg = -1; if (segsize != 32) op->addr_size = 32; mod = 2; /* fake disp32 */ } if (rm == 4) { /* process SIB */ scale = (*data >> 6) & 03; index = (*data >> 3) & 07; base = *data & 07; data++; op->scale = 1 << scale; switch (index) { case 0: op->indexreg = R_EAX; break; case 1: op->indexreg = R_ECX; break; case 2: op->indexreg = R_EDX; break; case 3: op->indexreg = R_EBX; break; case 4: op->indexreg = -1; break; case 5: op->indexreg = R_EBP; break; case 6: op->indexreg = R_ESI; break; case 7: op->indexreg = R_EDI; break; } switch (base) { case 0: op->basereg = R_EAX; break; case 1: op->basereg = R_ECX; break; case 2: op->basereg = R_EDX; break; case 3: op->basereg = R_EBX; break; case 4: op->basereg = R_ESP; break; case 6: op->basereg = R_ESI; break; case 7: op->basereg = R_EDI; break; case 5: if (mod == 0) { mod = 2; op->basereg = -1; } else op->basereg = R_EBP; break; } } switch (mod) { case 0: op->segment |= SEG_NODISP; break; case 1: op->segment |= SEG_DISP8; op->offset = (signed char) *data++; break; case 2: op->segment |= SEG_DISP32; op->offset = *data++; op->offset |= ((unsigned) *data++) << 8; op->offset |= ((long) *data++) << 16; op->offset |= ((long) *data++) << 24; break; } return data; } } /* * Determine whether the instruction template in t corresponds to the data * stream in data. Return the number of bytes matched if so. */ static int matches (struct itemplate *t, unsigned char *data, int asize, int osize, int segsize, int rep, insn *ins) { unsigned char * r = (unsigned char *)(t->code); unsigned char * origdata = data; int a_used = FALSE, o_used = FALSE; int drep = 0; if ( rep == 0xF2 ) drep = P_REPNE; else if ( rep == 0xF3 ) drep = P_REP; while (*r) { int c = *r++; if (c >= 01 && c <= 03) { while (c--) if (*r++ != *data++) return FALSE; } if (c == 04) { switch (*data++) { case 0x07: ins->oprs[0].basereg = 0; break; case 0x17: ins->oprs[0].basereg = 2; break; case 0x1F: ins->oprs[0].basereg = 3; break; default: return FALSE; } } if (c == 05) { switch (*data++) { case 0xA1: ins->oprs[0].basereg = 4; break; case 0xA9: ins->oprs[0].basereg = 5; break; default: return FALSE; } } if (c == 06) { switch (*data++) { case 0x06: ins->oprs[0].basereg = 0; break; case 0x0E: ins->oprs[0].basereg = 1; break; case 0x16: ins->oprs[0].basereg = 2; break; case 0x1E: ins->oprs[0].basereg = 3; break; default: return FALSE; } } if (c == 07) { switch (*data++) { case 0xA0: ins->oprs[0].basereg = 4; break; case 0xA8: ins->oprs[0].basereg = 5; break; default: return FALSE; } } if (c >= 010 && c <= 012) { int t = *r++, d = *data++; if (d < t || d > t+7) return FALSE; else { ins->oprs[c-010].basereg = d-t; ins->oprs[c-010].segment |= SEG_RMREG; } } if (c == 017) if (*data++) return FALSE; if (c >= 014 && c <= 016) { ins->oprs[c-014].offset = (signed char) *data++; ins->oprs[c-014].segment |= SEG_SIGNED; } if (c >= 020 && c <= 022) ins->oprs[c-020].offset = *data++; if (c >= 024 && c <= 026) ins->oprs[c-024].offset = *data++; if (c >= 030 && c <= 032) { ins->oprs[c-030].offset = *data++; ins->oprs[c-030].offset |= (((unsigned) *data++) << 8); } if (c >= 034 && c <= 036) { ins->oprs[c-034].offset = *data++; ins->oprs[c-034].offset |= (((unsigned) *data++) << 8); if (osize == 32) { ins->oprs[c-034].offset |= (((long) *data++) << 16); ins->oprs[c-034].offset |= (((long) *data++) << 24); } if (segsize != asize) ins->oprs[c-034].addr_size = asize; } if (c >= 040 && c <= 042) { ins->oprs[c-040].offset = *data++; ins->oprs[c-040].offset |= (((unsigned) *data++) << 8); ins->oprs[c-040].offset |= (((long) *data++) << 16); ins->oprs[c-040].offset |= (((long) *data++) << 24); } if (c >= 044 && c <= 046) { ins->oprs[c-044].offset = *data++; ins->oprs[c-044].offset |= (((unsigned) *data++) << 8); if (asize == 32) { ins->oprs[c-044].offset |= (((long) *data++) << 16); ins->oprs[c-044].offset |= (((long) *data++) << 24); } if (segsize != asize) ins->oprs[c-044].addr_size = asize; } if (c >= 050 && c <= 052) { ins->oprs[c-050].offset = (signed char) *data++; ins->oprs[c-050].segment |= SEG_RELATIVE; } if (c >= 060 && c <= 062) { ins->oprs[c-060].offset = *data++; ins->oprs[c-060].offset |= (((unsigned) *data++) << 8); ins->oprs[c-060].segment |= SEG_RELATIVE; ins->oprs[c-060].segment &= ~SEG_32BIT; } if (c >= 064 && c <= 066) { ins->oprs[c-064].offset = *data++; ins->oprs[c-064].offset |= (((unsigned) *data++) << 8); if (osize == 32) { ins->oprs[c-064].offset |= (((long) *data++) << 16); ins->oprs[c-064].offset |= (((long) *data++) << 24); ins->oprs[c-064].segment |= SEG_32BIT; } else ins->oprs[c-064].segment &= ~SEG_32BIT; ins->oprs[c-064].segment |= SEG_RELATIVE; if (segsize != osize) { ins->oprs[c-064].type = (ins->oprs[c-064].type & NON_SIZE) | ((osize == 16) ? BITS16 : BITS32); } } if (c >= 070 && c <= 072) { ins->oprs[c-070].offset = *data++; ins->oprs[c-070].offset |= (((unsigned) *data++) << 8); ins->oprs[c-070].offset |= (((long) *data++) << 16); ins->oprs[c-070].offset |= (((long) *data++) << 24); ins->oprs[c-070].segment |= SEG_32BIT | SEG_RELATIVE; } if (c >= 0100 && c < 0130) { int modrm = *data++; ins->oprs[c & 07].basereg = (modrm >> 3) & 07; ins->oprs[c & 07].segment |= SEG_RMREG; data = do_ea (data, modrm, asize, segsize, &ins->oprs[(c >> 3) & 07]); } if (c >= 0130 && c <= 0132) { ins->oprs[c-0130].offset = *data++; ins->oprs[c-0130].offset |= (((unsigned) *data++) << 8); } if (c >= 0140 && c <= 0142) { ins->oprs[c-0140].offset = *data++; ins->oprs[c-0140].offset |= (((unsigned) *data++) << 8); ins->oprs[c-0140].offset |= (((long) *data++) << 16); ins->oprs[c-0140].offset |= (((long) *data++) << 24); } if (c >= 0200 && c <= 0277) { int modrm = *data++; if (((modrm >> 3) & 07) != (c & 07)) return FALSE; /* spare field doesn't match up */ data = do_ea (data, modrm, asize, segsize, &ins->oprs[(c >> 3) & 07]); } if (c >= 0300 && c <= 0302) { if (asize) ins->oprs[c-0300].segment |= SEG_32BIT; else ins->oprs[c-0300].segment &= ~SEG_32BIT; a_used = TRUE; } if (c == 0310) { if (asize == 32) return FALSE; else a_used = TRUE; } if (c == 0311) { if (asize == 16) return FALSE; else a_used = TRUE; } if (c == 0312) { if (asize != segsize) return FALSE; else a_used = TRUE; } if (c == 0320) { if (osize == 32) return FALSE; else o_used = TRUE; } if (c == 0321) { if (osize == 16) return FALSE; else o_used = TRUE; } if (c == 0322) { if (osize != segsize) return FALSE; else o_used = TRUE; } if (c == 0330) { int t = *r++, d = *data++; if (d < t || d > t+15) return FALSE; else ins->condition = d - t; } if (c == 0331) { if ( rep ) return FALSE; } if (c == 0332) { if (drep == P_REP) drep = P_REPE; } if (c == 0333) { if ( rep != 0xF3 ) return FALSE; drep = 0; } } /* * Check for unused rep or a/o prefixes. */ ins->nprefix = 0; if (drep) ins->prefixes[ins->nprefix++] = drep; if (!a_used && asize != segsize) ins->prefixes[ins->nprefix++] = (asize == 16 ? P_A16 : P_A32); if (!o_used && osize != segsize) ins->prefixes[ins->nprefix++] = (osize == 16 ? P_O16 : P_O32); return data - origdata; } long disasm (unsigned char *data, char *output, int segsize, long offset) { struct itemplate **p, **best_p; int length, best_length = 0; const char *segover; int rep, lock, asize, osize, i, slen, colon; unsigned char *origdata; int works; insn tmp_ins = { NULL }, ins; unsigned long goodness, best; /* * Scan for prefixes. */ asize = osize = segsize; segover = NULL; ins.condition = ins.nprefix = rep = lock = 0; origdata = data; for (;;) { if (*data == 0xF3 || *data == 0xF2) rep = *data++; else if (*data == 0xF0) lock = *data++; else if (*data == 0x2E || *data == 0x36 || *data == 0x3E || *data == 0x26 || *data == 0x64 || *data == 0x65) { switch (*data++) { case 0x2E: segover = "cs"; break; case 0x36: segover = "ss"; break; case 0x3E: segover = "ds"; break; case 0x26: segover = "es"; break; case 0x64: segover = "fs"; break; case 0x65: segover = "gs"; break; } } else if (*data == 0x66) { osize = 48 - segsize; data++; } else if (*data == 0x67) { asize = 48 - segsize; data++; } else break; } tmp_ins.oprs[0].segment = tmp_ins.oprs[1].segment = tmp_ins.oprs[2].segment = tmp_ins.oprs[0].addr_size = tmp_ins.oprs[1].addr_size = tmp_ins.oprs[2].addr_size = (segsize == 16 ? 0 : SEG_32BIT); tmp_ins.condition = -1; best = ~0UL; /* Worst possible */ best_p = NULL; for (p = itable[*data]; *p; p++) { if ( (length = matches(*p, data, asize, osize, segsize, rep, &tmp_ins)) ) { works = TRUE; /* * Final check to make sure the types of r/m match up. */ for (i = 0; i < (*p)->operands; i++) { if ( /* If it's a mem-only EA but we have a register, die. */ ((tmp_ins.oprs[i].segment & SEG_RMREG) && !(MEMORY & ~(*p)->opd[i])) || /* If it's a reg-only EA but we have a memory ref, die. */ (!(tmp_ins.oprs[i].segment & SEG_RMREG) && !(REGNORM & ~(*p)->opd[i]) && !((*p)->opd[i] & REG_SMASK)) || /* Register type mismatch (eg FS vs REG_DESS): die. */ ((((*p)->opd[i] & (REGISTER | FPUREG)) || (tmp_ins.oprs[i].segment & SEG_RMREG)) && !whichreg ((*p)->opd[i], tmp_ins.oprs[i].basereg))) { works = FALSE; break; } } if (works) { goodness = (*p)->flags & IF_PFMASK; if ( goodness < best ) { /* This is the best one found so far */ best = goodness; best_p = p; best_length = length; ins = tmp_ins; } } } } if (!best_p) { /* no instruction was matched */ sprintf(output, "db 0%02xh", data[0]); return 1; } /* Pick the best match */ p = best_p; length = best_length; slen = 0; if (lock) slen += sprintf(output+slen, "lock "); for (i = 0; i < ins.nprefix; i++) switch (ins.prefixes[i]) { case P_REP: slen += sprintf(output+slen, "rep "); break; case P_REPE: slen += sprintf(output+slen, "repe "); break; case P_REPNE: slen += sprintf(output+slen, "repne "); break; case P_A16: slen += sprintf(output+slen, "a16 "); break; case P_A32: slen += sprintf(output+slen, "a32 "); break; case P_O16: slen += sprintf(output+slen, "o16 "); break; case P_O32: slen += sprintf(output+slen, "o32 "); break; } for (i = 0; i < (int)elements(ico); i++) if ((*p)->opcode == ico[i]) { slen += sprintf(output+slen, "%s%s", icn[i], whichcond(ins.condition)); break; } if (i >= (int)elements(ico)) slen += sprintf(output+slen, "%s", insn_names[(*p)->opcode]); colon = FALSE; length += data - origdata; /* fix up for prefixes */ for (i=0; i<(*p)->operands; i++) { output[slen++] = (colon ? ':' : i==0 ? ' ' : ','); if (ins.oprs[i].segment & SEG_RELATIVE) { ins.oprs[i].offset += offset + length; /* * sort out wraparound */ if (!(ins.oprs[i].segment & SEG_32BIT)) ins.oprs[i].offset &= 0xFFFF; } if ((*p)->opd[i] & COLON) colon = TRUE; else colon = FALSE; if (((*p)->opd[i] & (REGISTER | FPUREG)) || (ins.oprs[i].segment & SEG_RMREG)) { ins.oprs[i].basereg = whichreg ((*p)->opd[i], ins.oprs[i].basereg); if ( (*p)->opd[i] & TO ) slen += sprintf(output+slen, "to "); slen += sprintf(output+slen, "%s", reg_names[ins.oprs[i].basereg-EXPR_REG_START]); } else if (!(UNITY & ~(*p)->opd[i])) { output[slen++] = '1'; } else if ( (*p)->opd[i] & IMMEDIATE ) { if ( (*p)->opd[i] & BITS8 ) { slen += sprintf(output+slen, "byte "); if (ins.oprs[i].segment & SEG_SIGNED) { if (ins.oprs[i].offset < 0) { ins.oprs[i].offset *= -1; output[slen++] = '-'; } else output[slen++] = '+'; } } else if ( (*p)->opd[i] & BITS16 ) { slen += sprintf(output+slen, "word "); } else if ( (*p)->opd[i] & BITS32 ) { slen += sprintf(output+slen, "dword "); } else if ( (*p)->opd[i] & NEAR ) { slen += sprintf(output+slen, "near "); } else if ( (*p)->opd[i] & SHORT ) { slen += sprintf(output+slen, "short "); } slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); } else if ( !(MEM_OFFS & ~(*p)->opd[i]) ) { slen += sprintf(output+slen, "[%s%s%s0x%lx]", (segover ? segover : ""), (segover ? ":" : ""), (ins.oprs[i].addr_size == 32 ? "dword " : ins.oprs[i].addr_size == 16 ? "word " : ""), ins.oprs[i].offset); segover = NULL; } else if ( !(REGMEM & ~(*p)->opd[i]) ) { int started = FALSE; if ( (*p)->opd[i] & BITS8 ) slen += sprintf(output+slen, "byte "); if ( (*p)->opd[i] & BITS16 ) slen += sprintf(output+slen, "word "); if ( (*p)->opd[i] & BITS32 ) slen += sprintf(output+slen, "dword "); if ( (*p)->opd[i] & BITS64 ) slen += sprintf(output+slen, "qword "); if ( (*p)->opd[i] & BITS80 ) slen += sprintf(output+slen, "tword "); if ( (*p)->opd[i] & FAR ) slen += sprintf(output+slen, "far "); if ( (*p)->opd[i] & NEAR ) slen += sprintf(output+slen, "near "); output[slen++] = '['; if (ins.oprs[i].addr_size) slen += sprintf(output+slen, "%s", (ins.oprs[i].addr_size == 32 ? "dword " : ins.oprs[i].addr_size == 16 ? "word " : "")); if (segover) { slen += sprintf(output+slen, "%s:", segover); segover = NULL; } if (ins.oprs[i].basereg != -1) { slen += sprintf(output+slen, "%s", reg_names[(ins.oprs[i].basereg - EXPR_REG_START)]); started = TRUE; } if (ins.oprs[i].indexreg != -1) { if (started) output[slen++] = '+'; slen += sprintf(output+slen, "%s", reg_names[(ins.oprs[i].indexreg - EXPR_REG_START)]); if (ins.oprs[i].scale > 1) slen += sprintf(output+slen, "*%d", ins.oprs[i].scale); started = TRUE; } if (ins.oprs[i].segment & SEG_DISP8) { int sign = '+'; if (ins.oprs[i].offset & 0x80) { ins.oprs[i].offset = - (signed char) ins.oprs[i].offset; sign = '-'; } slen += sprintf(output+slen, "%c0x%lx", sign, ins.oprs[i].offset); } else if (ins.oprs[i].segment & SEG_DISP16) { if (started) output[slen++] = '+'; slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); } else if (ins.oprs[i].segment & SEG_DISP32) { if (started) output[slen++] = '+'; slen += sprintf(output+slen, "0x%lx", ins.oprs[i].offset); } output[slen++] = ']'; } else { slen += sprintf(output+slen, "", i); } } output[slen] = '\0'; if (segover) { /* unused segment override */ char *p = output; int count = slen+1; while (count--) p[count+3] = p[count]; strncpy (output, segover, 2); output[2] = ' '; } return length; }