diff options
| author | Uneven Prankster <unevenprankster@protonmail.com> | 2023-07-12 13:22:29 -0300 |
|---|---|---|
| committer | Uneven Prankster <unevenprankster@protonmail.com> | 2023-07-12 13:22:29 -0300 |
| commit | fa2bdd711212ba6b7a94a20971e8bfa281e73296 (patch) | |
| tree | 6713b3c0379507d49558287b71dd360ce188a2f0 /tinycc/i386-asm.c | |
lol
Diffstat (limited to 'tinycc/i386-asm.c')
| -rw-r--r-- | tinycc/i386-asm.c | 1744 |
1 files changed, 1744 insertions, 0 deletions
diff --git a/tinycc/i386-asm.c b/tinycc/i386-asm.c new file mode 100644 index 0000000..3cc8d18 --- /dev/null +++ b/tinycc/i386-asm.c @@ -0,0 +1,1744 @@ +/* + * i386 specific functions for TCC assembler + * + * Copyright (c) 2001, 2002 Fabrice Bellard + * Copyright (c) 2009 Frédéric Feret (x86_64 support) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define USING_GLOBALS +#include "tcc.h" + +#define MAX_OPERANDS 3 + +#define TOK_ASM_first TOK_ASM_clc +#define TOK_ASM_last TOK_ASM_emms +#define TOK_ASM_alllast TOK_ASM_subps + +#define OPC_B 0x01 /* only used with OPC_WL */ +#define OPC_WL 0x02 /* accepts w, l or no suffix */ +#define OPC_BWL (OPC_B | OPC_WL) /* accepts b, w, l or no suffix */ +#define OPC_REG 0x04 /* register is added to opcode */ +#define OPC_MODRM 0x08 /* modrm encoding */ + +#define OPCT_MASK 0x70 +#define OPC_FWAIT 0x10 /* add fwait opcode */ +#define OPC_SHIFT 0x20 /* shift opcodes */ +#define OPC_ARITH 0x30 /* arithmetic opcodes */ +#define OPC_FARITH 0x40 /* FPU arithmetic opcodes */ +#define OPC_TEST 0x50 /* test opcodes */ +#define OPC_0F01 0x60 /* 0x0f01XX (group 7, XX is 2nd opcode, + no operands and unstructured mod/rm) */ +#define OPCT_IS(v,i) (((v) & OPCT_MASK) == (i)) + +#define OPC_0F 0x100 /* Is secondary map (0x0f prefix) */ +#define OPC_48 0x200 /* Always has REX prefix */ +#ifdef TCC_TARGET_X86_64 +# define OPC_WLQ 0x1000 /* accepts w, l, q or no suffix */ +# define OPC_BWLQ (OPC_B | OPC_WLQ) /* accepts b, w, l, q or no suffix */ +# define OPC_WLX OPC_WLQ +# define OPC_BWLX OPC_BWLQ +#else +# define OPC_WLX OPC_WL +# define OPC_BWLX OPC_BWL +#endif + +#define OPC_GROUP_SHIFT 13 + +/* in order to compress the operand type, we use specific operands and + we or only with EA */ +enum { + OPT_REG8=0, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_REG16, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_REG32, /* warning: value is hardcoded from TOK_ASM_xxx */ +#ifdef TCC_TARGET_X86_64 + OPT_REG64, /* warning: value is hardcoded from TOK_ASM_xxx */ +#endif + OPT_MMX, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_SSE, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_CR, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_TR, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_DB, /* warning: value is hardcoded from TOK_ASM_xxx */ + OPT_SEG, + OPT_ST, +#ifdef TCC_TARGET_X86_64 + OPT_REG8_LOW, /* %spl,%bpl,%sil,%dil, encoded like ah,ch,dh,bh, but + with REX prefix, not used in insn templates */ +#endif + OPT_IM8, + OPT_IM8S, + OPT_IM16, + OPT_IM32, +#ifdef TCC_TARGET_X86_64 + OPT_IM64, +#endif + OPT_EAX, /* %al, %ax, %eax or %rax register */ + OPT_ST0, /* %st(0) register */ + OPT_CL, /* %cl register */ + OPT_DX, /* %dx register */ + OPT_ADDR, /* OP_EA with only offset */ + OPT_INDIR, /* *(expr) */ + /* composite types */ + OPT_COMPOSITE_FIRST, + OPT_IM, /* IM8 | IM16 | IM32 */ + OPT_REG, /* REG8 | REG16 | REG32 | REG64 */ + OPT_REGW, /* REG16 | REG32 | REG64 */ + OPT_IMW, /* IM16 | IM32 */ + OPT_MMXSSE, /* MMX | SSE */ + OPT_DISP, /* Like OPT_ADDR, but emitted as displacement (for jumps) */ + OPT_DISP8, /* Like OPT_ADDR, but only 8bit (short jumps) */ + /* can be ored with any OPT_xxx */ + OPT_EA = 0x80 +}; + +#define OP_REG8 (1 << OPT_REG8) +#define OP_REG16 (1 << OPT_REG16) +#define OP_REG32 (1 << OPT_REG32) +#define OP_MMX (1 << OPT_MMX) +#define OP_SSE (1 << OPT_SSE) +#define OP_CR (1 << OPT_CR) +#define OP_TR (1 << OPT_TR) +#define OP_DB (1 << OPT_DB) +#define OP_SEG (1 << OPT_SEG) +#define OP_ST (1 << OPT_ST) +#define OP_IM8 (1 << OPT_IM8) +#define OP_IM8S (1 << OPT_IM8S) +#define OP_IM16 (1 << OPT_IM16) +#define OP_IM32 (1 << OPT_IM32) +#define OP_EAX (1 << OPT_EAX) +#define OP_ST0 (1 << OPT_ST0) +#define OP_CL (1 << OPT_CL) +#define OP_DX (1 << OPT_DX) +#define OP_ADDR (1 << OPT_ADDR) +#define OP_INDIR (1 << OPT_INDIR) +#ifdef TCC_TARGET_X86_64 +# define OP_REG64 (1 << OPT_REG64) +# define OP_REG8_LOW (1 << OPT_REG8_LOW) +# define OP_IM64 (1 << OPT_IM64) +# define OP_EA32 (OP_EA << 1) +#else +# define OP_REG64 0 +# define OP_REG8_LOW 0 +# define OP_IM64 0 +# define OP_EA32 0 +#endif + +#define OP_EA 0x40000000 +#define OP_REG (OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64) + +#ifdef TCC_TARGET_X86_64 +# define TREG_XAX TREG_RAX +# define TREG_XCX TREG_RCX +# define TREG_XDX TREG_RDX +#else +# define TREG_XAX TREG_EAX +# define TREG_XCX TREG_ECX +# define TREG_XDX TREG_EDX +#endif + +typedef struct ASMInstr { + uint16_t sym; + uint16_t opcode; + uint16_t instr_type; + uint8_t nb_ops; + uint8_t op_type[MAX_OPERANDS]; /* see OP_xxx */ +} ASMInstr; + +typedef struct Operand { + uint32_t type; + int8_t reg; /* register, -1 if none */ + int8_t reg2; /* second register, -1 if none */ + uint8_t shift; + ExprValue e; +} Operand; + +static const uint8_t reg_to_size[9] = { +/* + [OP_REG8] = 0, + [OP_REG16] = 1, + [OP_REG32] = 2, +#ifdef TCC_TARGET_X86_64 + [OP_REG64] = 3, +#endif +*/ + 0, 0, 1, 0, 2, 0, 0, 0, 3 +}; + +#define NB_TEST_OPCODES 30 + +static const uint8_t test_bits[NB_TEST_OPCODES] = { + 0x00, /* o */ + 0x01, /* no */ + 0x02, /* b */ + 0x02, /* c */ + 0x02, /* nae */ + 0x03, /* nb */ + 0x03, /* nc */ + 0x03, /* ae */ + 0x04, /* e */ + 0x04, /* z */ + 0x05, /* ne */ + 0x05, /* nz */ + 0x06, /* be */ + 0x06, /* na */ + 0x07, /* nbe */ + 0x07, /* a */ + 0x08, /* s */ + 0x09, /* ns */ + 0x0a, /* p */ + 0x0a, /* pe */ + 0x0b, /* np */ + 0x0b, /* po */ + 0x0c, /* l */ + 0x0c, /* nge */ + 0x0d, /* nl */ + 0x0d, /* ge */ + 0x0e, /* le */ + 0x0e, /* ng */ + 0x0f, /* nle */ + 0x0f, /* g */ +}; + +static const uint8_t segment_prefixes[] = { + 0x26, /* es */ + 0x2e, /* cs */ + 0x36, /* ss */ + 0x3e, /* ds */ + 0x64, /* fs */ + 0x65 /* gs */ +}; + +static const ASMInstr asm_instrs[] = { +#define ALT(x) x +/* This removes a 0x0f in the second byte */ +#define O(o) ((uint64_t) ((((o) & 0xff00) == 0x0f00) ? ((((o) >> 8) & ~0xff) | ((o) & 0xff)) : (o))) +/* This constructs instr_type from opcode, type and group. */ +#define T(o,i,g) ((i) | ((g) << OPC_GROUP_SHIFT) | ((((o) & 0xff00) == 0x0f00) ? OPC_0F : 0)) +#define DEF_ASM_OP0(name, opcode) +#define DEF_ASM_OP0L(name, opcode, group, instr_type) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 0, { 0 } }, +#define DEF_ASM_OP1(name, opcode, group, instr_type, op0) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 1, { op0 }}, +#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 2, { op0, op1 }}, +#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) { TOK_ASM_ ## name, O(opcode), T(opcode, instr_type, group), 3, { op0, op1, op2 }}, +#ifdef TCC_TARGET_X86_64 +# include "x86_64-asm.h" +#else +# include "i386-asm.h" +#endif + /* last operation */ + { 0, }, +}; + +static const uint16_t op0_codes[] = { +#define ALT(x) +#define DEF_ASM_OP0(x, opcode) opcode, +#define DEF_ASM_OP0L(name, opcode, group, instr_type) +#define DEF_ASM_OP1(name, opcode, group, instr_type, op0) +#define DEF_ASM_OP2(name, opcode, group, instr_type, op0, op1) +#define DEF_ASM_OP3(name, opcode, group, instr_type, op0, op1, op2) +#ifdef TCC_TARGET_X86_64 +# include "x86_64-asm.h" +#else +# include "i386-asm.h" +#endif +}; + +static inline int get_reg_shift(TCCState *s1) +{ + int shift, v; + v = asm_int_expr(s1); + switch(v) { + case 1: + shift = 0; + break; + case 2: + shift = 1; + break; + case 4: + shift = 2; + break; + case 8: + shift = 3; + break; + default: + expect("1, 2, 4 or 8 constant"); + shift = 0; + break; + } + return shift; +} + +#ifdef TCC_TARGET_X86_64 +static int asm_parse_numeric_reg(int t, unsigned int *type) +{ + int reg = -1; + if (t >= TOK_IDENT && t < tok_ident) { + const char *s = table_ident[t - TOK_IDENT]->str; + char c; + *type = OP_REG64; + if (*s == 'c') { + s++; + *type = OP_CR; + } + if (*s++ != 'r') + return -1; + /* Don't allow leading '0'. */ + if ((c = *s++) >= '1' && c <= '9') + reg = c - '0'; + else + return -1; + if ((c = *s) >= '0' && c <= '5') + s++, reg = reg * 10 + c - '0'; + if (reg > 15) + return -1; + if ((c = *s) == 0) + ; + else if (*type != OP_REG64) + return -1; + else if (c == 'b' && !s[1]) + *type = OP_REG8; + else if (c == 'w' && !s[1]) + *type = OP_REG16; + else if (c == 'd' && !s[1]) + *type = OP_REG32; + else + return -1; + } + return reg; +} +#endif + +static int asm_parse_reg(unsigned int *type) +{ + int reg = 0; + *type = 0; + if (tok != '%') + goto error_32; + next(); + if (tok >= TOK_ASM_eax && tok <= TOK_ASM_edi) { + reg = tok - TOK_ASM_eax; + *type = OP_REG32; +#ifdef TCC_TARGET_X86_64 + } else if (tok >= TOK_ASM_rax && tok <= TOK_ASM_rdi) { + reg = tok - TOK_ASM_rax; + *type = OP_REG64; + } else if (tok == TOK_ASM_rip) { + reg = -2; /* Probably should use different escape code. */ + *type = OP_REG64; + } else if ((reg = asm_parse_numeric_reg(tok, type)) >= 0 + && (*type == OP_REG32 || *type == OP_REG64)) { + ; +#endif + } else { + error_32: + expect("register"); + } + next(); + return reg; +} + +static void parse_operand(TCCState *s1, Operand *op) +{ + ExprValue e; + int reg, indir; + const char *p; + + indir = 0; + if (tok == '*') { + next(); + indir = OP_INDIR; + } + + if (tok == '%') { + next(); + if (tok >= TOK_ASM_al && tok <= TOK_ASM_db7) { + reg = tok - TOK_ASM_al; + op->type = 1 << (reg >> 3); /* WARNING: do not change constant order */ + op->reg = reg & 7; + if ((op->type & OP_REG) && op->reg == TREG_XAX) + op->type |= OP_EAX; + else if (op->type == OP_REG8 && op->reg == TREG_XCX) + op->type |= OP_CL; + else if (op->type == OP_REG16 && op->reg == TREG_XDX) + op->type |= OP_DX; + } else if (tok >= TOK_ASM_dr0 && tok <= TOK_ASM_dr7) { + op->type = OP_DB; + op->reg = tok - TOK_ASM_dr0; + } else if (tok >= TOK_ASM_es && tok <= TOK_ASM_gs) { + op->type = OP_SEG; + op->reg = tok - TOK_ASM_es; + } else if (tok == TOK_ASM_st) { + op->type = OP_ST; + op->reg = 0; + next(); + if (tok == '(') { + next(); + if (tok != TOK_PPNUM) + goto reg_error; + p = tokc.str.data; + reg = p[0] - '0'; + if ((unsigned)reg >= 8 || p[1] != '\0') + goto reg_error; + op->reg = reg; + next(); + skip(')'); + } + if (op->reg == 0) + op->type |= OP_ST0; + goto no_skip; +#ifdef TCC_TARGET_X86_64 + } else if (tok >= TOK_ASM_spl && tok <= TOK_ASM_dil) { + op->type = OP_REG8 | OP_REG8_LOW; + op->reg = 4 + tok - TOK_ASM_spl; + } else if ((op->reg = asm_parse_numeric_reg(tok, &op->type)) >= 0) { + ; +#endif + } else { + reg_error: + tcc_error("unknown register %%%s", get_tok_str(tok, &tokc)); + } + next(); + no_skip: ; + } else if (tok == '$') { + /* constant value */ + next(); + asm_expr(s1, &e); + op->type = OP_IM32; + op->e = e; + if (!op->e.sym) { + if (op->e.v == (uint8_t)op->e.v) + op->type |= OP_IM8; + if (op->e.v == (int8_t)op->e.v) + op->type |= OP_IM8S; + if (op->e.v == (uint16_t)op->e.v) + op->type |= OP_IM16; +#ifdef TCC_TARGET_X86_64 + if (op->e.v != (int32_t)op->e.v && op->e.v != (uint32_t)op->e.v) + op->type = OP_IM64; +#endif + } + } else { + /* address(reg,reg2,shift) with all variants */ + op->type = OP_EA; + op->reg = -1; + op->reg2 = -1; + op->shift = 0; + if (tok != '(') { + asm_expr(s1, &e); + op->e = e; + } else { + next(); + if (tok == '%') { + unget_tok('('); + op->e.v = 0; + op->e.sym = NULL; + } else { + /* bracketed offset expression */ + asm_expr(s1, &e); + if (tok != ')') + expect(")"); + next(); + op->e.v = e.v; + op->e.sym = e.sym; + } + op->e.pcrel = 0; + } + if (tok == '(') { + unsigned int type = 0; + next(); + if (tok != ',') { + op->reg = asm_parse_reg(&type); + } + if (tok == ',') { + next(); + if (tok != ',') { + op->reg2 = asm_parse_reg(&type); + } + if (tok == ',') { + next(); + op->shift = get_reg_shift(s1); + } + } + if (type & OP_REG32) + op->type |= OP_EA32; + skip(')'); + } + if (op->reg == -1 && op->reg2 == -1) + op->type |= OP_ADDR; + } + op->type |= indir; +} + +/* XXX: unify with C code output ? */ +ST_FUNC void gen_expr32(ExprValue *pe) +{ + if (pe->pcrel) + /* If PC-relative, always set VT_SYM, even without symbol, + so as to force a relocation to be emitted. */ + gen_addrpc32(VT_SYM, pe->sym, pe->v); + else + gen_addr32(pe->sym ? VT_SYM : 0, pe->sym, pe->v); +} + +#ifdef TCC_TARGET_X86_64 +ST_FUNC void gen_expr64(ExprValue *pe) +{ + gen_addr64(pe->sym ? VT_SYM : 0, pe->sym, pe->v); +} +#endif + +/* XXX: unify with C code output ? */ +static void gen_disp32(ExprValue *pe) +{ + Sym *sym = pe->sym; + ElfSym *esym = elfsym(sym); + if (esym && esym->st_shndx == cur_text_section->sh_num) { + /* same section: we can output an absolute value. Note + that the TCC compiler behaves differently here because + it always outputs a relocation to ease (future) code + elimination in the linker */ + gen_le32(pe->v + esym->st_value - ind - 4); + } else { + if (sym && sym->type.t == VT_VOID) { + sym->type.t = VT_FUNC; + sym->type.ref = NULL; + } + gen_addrpc32(VT_SYM, sym, pe->v); + } +} + +/* generate the modrm operand */ +static inline int asm_modrm(int reg, Operand *op) +{ + int mod, reg1, reg2, sib_reg1; + + if (op->type & (OP_REG | OP_MMX | OP_SSE)) { + g(0xc0 + (reg << 3) + op->reg); + } else if (op->reg == -1 && op->reg2 == -1) { + /* displacement only */ +#ifdef TCC_TARGET_X86_64 + g(0x04 + (reg << 3)); + g(0x25); +#else + g(0x05 + (reg << 3)); +#endif + gen_expr32(&op->e); +#ifdef TCC_TARGET_X86_64 + } else if (op->reg == -2) { + ExprValue *pe = &op->e; + g(0x05 + (reg << 3)); + gen_addrpc32(pe->sym ? VT_SYM : 0, pe->sym, pe->v); + return ind; +#endif + } else { + sib_reg1 = op->reg; + /* fist compute displacement encoding */ + if (sib_reg1 == -1) { + sib_reg1 = 5; + mod = 0x00; + } else if (op->e.v == 0 && !op->e.sym && op->reg != 5) { + mod = 0x00; + } else if (op->e.v == (int8_t)op->e.v && !op->e.sym) { + mod = 0x40; + } else { + mod = 0x80; + } + /* compute if sib byte needed */ + reg1 = op->reg; + if (op->reg2 != -1) + reg1 = 4; + g(mod + (reg << 3) + reg1); + if (reg1 == 4) { + /* add sib byte */ + reg2 = op->reg2; + if (reg2 == -1) + reg2 = 4; /* indicate no index */ + g((op->shift << 6) + (reg2 << 3) + sib_reg1); + } + /* add offset */ + if (mod == 0x40) { + g(op->e.v); + } else if (mod == 0x80 || op->reg == -1) { + gen_expr32(&op->e); + } + } + return 0; +} + +#ifdef TCC_TARGET_X86_64 +#define REX_W 0x48 +#define REX_R 0x44 +#define REX_X 0x42 +#define REX_B 0x41 + +static void asm_rex(int width64, Operand *ops, int nb_ops, int *op_type, + int regi, int rmi) +{ + unsigned char rex = width64 ? 0x48 : 0; + int saw_high_8bit = 0; + int i; + if (rmi == -1) { + /* No mod/rm byte, but we might have a register op nevertheless + (we will add it to the opcode later). */ + for(i = 0; i < nb_ops; i++) { + if (op_type[i] & (OP_REG | OP_ST)) { + if (ops[i].reg >= 8) { + rex |= REX_B; + ops[i].reg -= 8; + } else if (ops[i].type & OP_REG8_LOW) + rex |= 0x40; + else if (ops[i].type & OP_REG8 && ops[i].reg >= 4) + /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */ + saw_high_8bit = ops[i].reg; + break; + } + } + } else { + if (regi != -1) { + if (ops[regi].reg >= 8) { + rex |= REX_R; + ops[regi].reg -= 8; + } else if (ops[regi].type & OP_REG8_LOW) + rex |= 0x40; + else if (ops[regi].type & OP_REG8 && ops[regi].reg >= 4) + /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */ + saw_high_8bit = ops[regi].reg; + } + if (ops[rmi].type & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_EA)) { + if (ops[rmi].reg >= 8) { + rex |= REX_B; + ops[rmi].reg -= 8; + } else if (ops[rmi].type & OP_REG8_LOW) + rex |= 0x40; + else if (ops[rmi].type & OP_REG8 && ops[rmi].reg >= 4) + /* An 8 bit reg >= 4 without REG8 is ah/ch/dh/bh */ + saw_high_8bit = ops[rmi].reg; + } + if (ops[rmi].type & OP_EA && ops[rmi].reg2 >= 8) { + rex |= REX_X; + ops[rmi].reg2 -= 8; + } + } + if (rex) { + if (saw_high_8bit) + tcc_error("can't encode register %%%ch when REX prefix is required", + "acdb"[saw_high_8bit-4]); + g(rex); + } +} +#endif + + +static void maybe_print_stats (void) +{ + static int already; + + if (0 && !already) + /* print stats about opcodes */ + { + const struct ASMInstr *pa; + int freq[4]; + int op_vals[500]; + int nb_op_vals, i, j; + + already = 1; + nb_op_vals = 0; + memset(freq, 0, sizeof(freq)); + for(pa = asm_instrs; pa->sym != 0; pa++) { + freq[pa->nb_ops]++; + //for(i=0;i<pa->nb_ops;i++) { + for(j=0;j<nb_op_vals;j++) { + //if (pa->op_type[i] == op_vals[j]) + if (pa->instr_type == op_vals[j]) + goto found; + } + //op_vals[nb_op_vals++] = pa->op_type[i]; + op_vals[nb_op_vals++] = pa->instr_type; + found: ; + //} + } + for(i=0;i<nb_op_vals;i++) { + int v = op_vals[i]; + //if ((v & (v - 1)) != 0) + printf("%3d: %08x\n", i, v); + } + printf("size=%d nb=%d f0=%d f1=%d f2=%d f3=%d\n", + (int)sizeof(asm_instrs), + (int)sizeof(asm_instrs) / (int)sizeof(ASMInstr), + freq[0], freq[1], freq[2], freq[3]); + } +} + +ST_FUNC void asm_opcode(TCCState *s1, int opcode) +{ + const ASMInstr *pa; + int i, modrm_index, modreg_index, reg, v, op1, seg_prefix, pc, p; + int nb_ops, s; + Operand ops[MAX_OPERANDS], *pop; + int op_type[3]; /* decoded op type */ + int alltypes; /* OR of all operand types */ + int autosize; + int p66; +#ifdef TCC_TARGET_X86_64 + int rex64; +#endif + + maybe_print_stats(); + /* force synthetic ';' after prefix instruction, so we can handle */ + /* one-line things like "rep stosb" instead of only "rep\nstosb" */ + if (opcode >= TOK_ASM_wait && opcode <= TOK_ASM_repnz) + unget_tok(';'); + + /* get operands */ + pop = ops; + nb_ops = 0; + seg_prefix = 0; + alltypes = 0; + for(;;) { + if (tok == ';' || tok == TOK_LINEFEED) + break; + if (nb_ops >= MAX_OPERANDS) { + tcc_error("incorrect number of operands"); + } + parse_operand(s1, pop); + if (tok == ':') { + if (pop->type != OP_SEG || seg_prefix) + tcc_error("incorrect prefix"); + seg_prefix = segment_prefixes[pop->reg]; + next(); + parse_operand(s1, pop); + if (!(pop->type & OP_EA)) { + tcc_error("segment prefix must be followed by memory reference"); + } + } + pop++; + nb_ops++; + if (tok != ',') + break; + next(); + } + + s = 0; /* avoid warning */ + +again: + /* optimize matching by using a lookup table (no hashing is needed + !) */ + for(pa = asm_instrs; pa->sym != 0; pa++) { + int it = pa->instr_type & OPCT_MASK; + s = 0; + if (it == OPC_FARITH) { + v = opcode - pa->sym; + if (!((unsigned)v < 8 * 6 && (v % 6) == 0)) + continue; + } else if (it == OPC_ARITH) { + if (!(opcode >= pa->sym && opcode < pa->sym + 8*NBWLX)) + continue; + s = (opcode - pa->sym) % NBWLX; + if ((pa->instr_type & OPC_BWLX) == OPC_WLX) + { + /* We need to reject the xxxb opcodes that we accepted above. + Note that pa->sym for WLX opcodes is the 'w' token, + to get the 'b' token subtract one. */ + if (((opcode - pa->sym + 1) % NBWLX) == 0) + continue; + s++; + } + } else if (it == OPC_SHIFT) { + if (!(opcode >= pa->sym && opcode < pa->sym + 7*NBWLX)) + continue; + s = (opcode - pa->sym) % NBWLX; + } else if (it == OPC_TEST) { + if (!(opcode >= pa->sym && opcode < pa->sym + NB_TEST_OPCODES)) + continue; + /* cmovxx is a test opcode but accepts multiple sizes. + The suffixes aren't encoded in the table, instead we + simply force size autodetection always and deal with suffixed + variants below when we don't find e.g. "cmovzl". */ + if (pa->instr_type & OPC_WLX) + s = NBWLX - 1; + } else if (pa->instr_type & OPC_B) { +#ifdef TCC_TARGET_X86_64 + /* Some instructions don't have the full size but only + bwl form. insb e.g. */ + if ((pa->instr_type & OPC_WLQ) != OPC_WLQ + && !(opcode >= pa->sym && opcode < pa->sym + NBWLX-1)) + continue; +#endif + if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX)) + continue; + s = opcode - pa->sym; + } else if (pa->instr_type & OPC_WLX) { + if (!(opcode >= pa->sym && opcode < pa->sym + NBWLX-1)) + continue; + s = opcode - pa->sym + 1; + } else { + if (pa->sym != opcode) + continue; + } + if (pa->nb_ops != nb_ops) + continue; +#ifdef TCC_TARGET_X86_64 + /* Special case for moves. Selecting the IM64->REG64 form + should only be done if we really have an >32bit imm64, and that + is hardcoded. Ignore it here. */ + if (pa->opcode == 0xb0 && ops[0].type != OP_IM64 + && (ops[1].type & OP_REG) == OP_REG64 + && !(pa->instr_type & OPC_0F)) + continue; +#endif + /* now decode and check each operand */ + alltypes = 0; + for(i = 0; i < nb_ops; i++) { + int op1, op2; + op1 = pa->op_type[i]; + op2 = op1 & 0x1f; + switch(op2) { + case OPT_IM: + v = OP_IM8 | OP_IM16 | OP_IM32; + break; + case OPT_REG: + v = OP_REG8 | OP_REG16 | OP_REG32 | OP_REG64; + break; + case OPT_REGW: + v = OP_REG16 | OP_REG32 | OP_REG64; + break; + case OPT_IMW: + v = OP_IM16 | OP_IM32; + break; + case OPT_MMXSSE: + v = OP_MMX | OP_SSE; + break; + case OPT_DISP: + case OPT_DISP8: + v = OP_ADDR; + break; + default: + v = 1 << op2; + break; + } + if (op1 & OPT_EA) + v |= OP_EA; + op_type[i] = v; + if ((ops[i].type & v) == 0) + goto next; + alltypes |= ops[i].type; + } + (void)alltypes; /* maybe unused */ + /* all is matching ! */ + break; + next: ; + } + if (pa->sym == 0) { + if (opcode >= TOK_ASM_first && opcode <= TOK_ASM_last) { + int b; + b = op0_codes[opcode - TOK_ASM_first]; + if (b & 0xff00) + g(b >> 8); + g(b); + return; + } else if (opcode <= TOK_ASM_alllast) { + tcc_error("bad operand with opcode '%s'", + get_tok_str(opcode, NULL)); + } else { + /* Special case for cmovcc, we accept size suffixes but ignore + them, but we don't want them to blow up our tables. */ + TokenSym *ts = table_ident[opcode - TOK_IDENT]; + if (ts->len >= 6 + && strchr("wlq", ts->str[ts->len-1]) + && !memcmp(ts->str, "cmov", 4)) { + opcode = tok_alloc(ts->str, ts->len-1)->tok; + goto again; + } + tcc_error("unknown opcode '%s'", ts->str); + } + } + /* if the size is unknown, then evaluate it (OPC_B or OPC_WL case) */ + autosize = NBWLX-1; +#ifdef TCC_TARGET_X86_64 + /* XXX the autosize should rather be zero, to not have to adjust this + all the time. */ + if ((pa->instr_type & OPC_BWLQ) == OPC_B) + autosize = NBWLX-2; +#endif + if (s == autosize) { + /* Check for register operands providing hints about the size. + Start from the end, i.e. destination operands. This matters + only for opcodes accepting different sized registers, lar and lsl + are such opcodes. */ + for(i = nb_ops - 1; s == autosize && i >= 0; i--) { + if ((ops[i].type & OP_REG) && !(op_type[i] & (OP_CL | OP_DX))) + s = reg_to_size[ops[i].type & OP_REG]; + } + if (s == autosize) { + if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) && + (ops[0].type & (OP_SEG | OP_IM8S | OP_IM32))) + s = 2; + else if ((opcode == TOK_ASM_push || opcode == TOK_ASM_pop) && + (ops[0].type & OP_EA)) + s = NBWLX - 2; + else + tcc_error("cannot infer opcode suffix"); + } + } + +#ifdef TCC_TARGET_X86_64 + rex64 = 0; + if (pa->instr_type & OPC_48) + rex64 = 1; + else if (s == 3 || (alltypes & OP_REG64)) { + /* generate REX prefix */ + int default64 = 0; + for(i = 0; i < nb_ops; i++) { + if (op_type[i] == OP_REG64 && pa->opcode != 0xb8) { + /* If only 64bit regs are accepted in one operand + this is a default64 instruction without need for + REX prefixes, except for movabs(0xb8). */ + default64 = 1; + break; + } + } + /* XXX find better encoding for the default64 instructions. */ + if (((opcode != TOK_ASM_push && opcode != TOK_ASM_pop + && opcode != TOK_ASM_pushw && opcode != TOK_ASM_pushl + && opcode != TOK_ASM_pushq && opcode != TOK_ASM_popw + && opcode != TOK_ASM_popl && opcode != TOK_ASM_popq + && opcode != TOK_ASM_call && opcode != TOK_ASM_jmp)) + && !default64) + rex64 = 1; + } +#endif + + /* now generates the operation */ + if (OPCT_IS(pa->instr_type, OPC_FWAIT)) + g(0x9b); + if (seg_prefix) + g(seg_prefix); +#ifdef TCC_TARGET_X86_64 + /* Generate addr32 prefix if needed */ + for(i = 0; i < nb_ops; i++) { + if (ops[i].type & OP_EA32) { + g(0x67); + break; + } + } +#endif + /* generate data16 prefix if needed */ + p66 = 0; + if (s == 1) + p66 = 1; + else { + /* accepting mmx+sse in all operands --> needs 0x66 to + switch to sse mode. Accepting only sse in an operand --> is + already SSE insn and needs 0x66/f2/f3 handling. */ + for (i = 0; i < nb_ops; i++) + if ((op_type[i] & (OP_MMX | OP_SSE)) == (OP_MMX | OP_SSE) + && ops[i].type & OP_SSE) + p66 = 1; + } + if (p66) + g(0x66); + + v = pa->opcode; + p = v >> 8; /* possibly prefix byte(s) */ + switch (p) { + case 0: break; /* no prefix */ + case 0x48: break; /* REX, handled elsewhere */ + case 0x66: + case 0x67: + case 0xf2: + case 0xf3: v = v & 0xff; g(p); break; + case 0xd4: case 0xd5: break; /* aam and aad, not prefix, but hardcoded immediate argument "10" */ + case 0xd8: case 0xd9: case 0xda: case 0xdb: /* x87, no normal prefix */ + case 0xdc: case 0xdd: case 0xde: case 0xdf: break; + default: tcc_error("bad prefix 0x%2x in opcode table", p); break; + } + if (pa->instr_type & OPC_0F) + v = ((v & ~0xff) << 8) | 0x0f00 | (v & 0xff); + if ((v == 0x69 || v == 0x6b) && nb_ops == 2) { + /* kludge for imul $im, %reg */ + nb_ops = 3; + ops[2] = ops[1]; + op_type[2] = op_type[1]; + } else if (v == 0xcd && ops[0].e.v == 3 && !ops[0].e.sym) { + v--; /* int $3 case */ + nb_ops = 0; + } else if ((v == 0x06 || v == 0x07)) { + if (ops[0].reg >= 4) { + /* push/pop %fs or %gs */ + v = 0x0fa0 + (v - 0x06) + ((ops[0].reg - 4) << 3); + } else { + v += ops[0].reg << 3; + } + nb_ops = 0; + } else if (v <= 0x05) { + /* arith case */ + v += ((opcode - TOK_ASM_addb) / NBWLX) << 3; + } else if ((pa->instr_type & (OPCT_MASK | OPC_MODRM)) == OPC_FARITH) { + /* fpu arith case */ + v += ((opcode - pa->sym) / 6) << 3; + } + + /* search which operand will be used for modrm */ + modrm_index = -1; + modreg_index = -1; + if (pa->instr_type & OPC_MODRM) { + if (!nb_ops) { + /* A modrm opcode without operands is a special case (e.g. mfence). + It has a group and acts as if there's an register operand 0 + (ax). */ + i = 0; + ops[i].type = OP_REG; + ops[i].reg = 0; + goto modrm_found; + } + /* first look for an ea operand */ + for(i = 0;i < nb_ops; i++) { + if (op_type[i] & OP_EA) + goto modrm_found; + } + /* then if not found, a register or indirection (shift instructions) */ + for(i = 0;i < nb_ops; i++) { + if (op_type[i] & (OP_REG | OP_MMX | OP_SSE | OP_INDIR)) + goto modrm_found; + } +#ifdef ASM_DEBUG + tcc_error("bad op table"); +#endif + modrm_found: + modrm_index = i; + /* if a register is used in another operand then it is + used instead of group */ + for(i = 0;i < nb_ops; i++) { + int t = op_type[i]; + if (i != modrm_index && + (t & (OP_REG | OP_MMX | OP_SSE | OP_CR | OP_TR | OP_DB | OP_SEG))) { + modreg_index = i; + break; + } + } + } +#ifdef TCC_TARGET_X86_64 + asm_rex (rex64, ops, nb_ops, op_type, modreg_index, modrm_index); +#endif + + if (pa->instr_type & OPC_REG) { + /* mov $im, %reg case */ + if (v == 0xb0 && s >= 1) + v += 7; + for(i = 0; i < nb_ops; i++) { + if (op_type[i] & (OP_REG | OP_ST)) { + v += ops[i].reg; + break; + } + } + } + if (pa->instr_type & OPC_B) + v += s >= 1; + if (nb_ops == 1 && pa->op_type[0] == OPT_DISP8) { + ElfSym *esym; + int jmp_disp; + + /* see if we can really generate the jump with a byte offset */ + esym = elfsym(ops[0].e.sym); + if (!esym || esym->st_shndx != cur_text_section->sh_num) + goto no_short_jump; + jmp_disp = ops[0].e.v + esym->st_value - ind - 2 - (v >= 0xff); + if (jmp_disp == (int8_t)jmp_disp) { + /* OK to generate jump */ + ops[0].e.sym = 0; + ops[0].e.v = jmp_disp; + op_type[0] = OP_IM8S; + } else { + no_short_jump: + /* long jump will be allowed. need to modify the + opcode slightly */ + if (v == 0xeb) /* jmp */ + v = 0xe9; + else if (v == 0x70) /* jcc */ + v += 0x0f10; + else + tcc_error("invalid displacement"); + } + } + if (OPCT_IS(pa->instr_type, OPC_TEST)) + v += test_bits[opcode - pa->sym]; + else if (OPCT_IS(pa->instr_type, OPC_0F01)) + v |= 0x0f0100; + op1 = v >> 16; + if (op1) + g(op1); + op1 = (v >> 8) & 0xff; + if (op1) + g(op1); + g(v); + + if (OPCT_IS(pa->instr_type, OPC_SHIFT)) { + reg = (opcode - pa->sym) / NBWLX; + if (reg == 6) + reg = 7; + } else if (OPCT_IS(pa->instr_type, OPC_ARITH)) { + reg = (opcode - pa->sym) / NBWLX; + } else if (OPCT_IS(pa->instr_type, OPC_FARITH)) { + reg = (opcode - pa->sym) / 6; + } else { + reg = (pa->instr_type >> OPC_GROUP_SHIFT) & 7; + } + + pc = 0; + if (pa->instr_type & OPC_MODRM) { + /* if a register is used in another operand then it is + used instead of group */ + if (modreg_index >= 0) + reg = ops[modreg_index].reg; + pc = asm_modrm(reg, &ops[modrm_index]); + } + + /* emit constants */ +#ifndef TCC_TARGET_X86_64 + if (!(pa->instr_type & OPC_0F) + && (pa->opcode == 0x9a || pa->opcode == 0xea)) { + /* ljmp or lcall kludge */ + gen_expr32(&ops[1].e); + if (ops[0].e.sym) + tcc_error("cannot relocate"); + gen_le16(ops[0].e.v); + return; + } +#endif + for(i = 0;i < nb_ops; i++) { + v = op_type[i]; + if (v & (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64 | OP_IM8S | OP_ADDR)) { + /* if multiple sizes are given it means we must look + at the op size */ + if ((v | OP_IM8 | OP_IM64) == (OP_IM8 | OP_IM16 | OP_IM32 | OP_IM64)) { + if (s == 0) + v = OP_IM8; + else if (s == 1) + v = OP_IM16; + else if (s == 2 || (v & OP_IM64) == 0) + v = OP_IM32; + else + v = OP_IM64; + } + + if ((v & (OP_IM8 | OP_IM8S | OP_IM16)) && ops[i].e.sym) + tcc_error("cannot relocate"); + + if (v & (OP_IM8 | OP_IM8S)) { + g(ops[i].e.v); + } else if (v & OP_IM16) { + gen_le16(ops[i].e.v); +#ifdef TCC_TARGET_X86_64 + } else if (v & OP_IM64) { + gen_expr64(&ops[i].e); +#endif + } else if (pa->op_type[i] == OPT_DISP || pa->op_type[i] == OPT_DISP8) { + gen_disp32(&ops[i].e); + } else { + gen_expr32(&ops[i].e); + } + } + } + + /* after immediate operands, adjust pc-relative address */ + if (pc) + add32le(cur_text_section->data + pc - 4, pc - ind); +} + +/* return the constraint priority (we allocate first the lowest + numbered constraints) */ +static inline int constraint_priority(const char *str) +{ + int priority, c, pr; + + /* we take the lowest priority */ + priority = 0; + for(;;) { + c = *str; + if (c == '\0') + break; + str++; + switch(c) { + case 'A': + pr = 0; + break; + case 'a': + case 'b': + case 'c': + case 'd': + case 'S': + case 'D': + pr = 1; + break; + case 'q': + pr = 2; + break; + case 'r': + case 'R': + case 'p': + pr = 3; + break; + case 'N': + case 'M': + case 'I': + case 'e': + case 'i': + case 'm': + case 'g': + pr = 4; + break; + default: + tcc_error("unknown constraint '%c'", c); + pr = 0; + } + if (pr > priority) + priority = pr; + } + return priority; +} + +static const char *skip_constraint_modifiers(const char *p) +{ + while (*p == '=' || *p == '&' || *p == '+' || *p == '%') + p++; + return p; +} + +/* If T (a token) is of the form "%reg" returns the register + number and type, otherwise return -1. */ +ST_FUNC int asm_parse_regvar (int t) +{ + const char *s; + Operand op; + if (t < TOK_IDENT || (t & SYM_FIELD)) + return -1; + s = table_ident[t - TOK_IDENT]->str; + if (s[0] != '%') + return -1; + t = tok_alloc_const(s + 1); + unget_tok(t); + unget_tok('%'); + parse_operand(tcc_state, &op); + /* Accept only integer regs for now. */ + if (op.type & OP_REG) + return op.reg; + else + return -1; +} + +#define REG_OUT_MASK 0x01 +#define REG_IN_MASK 0x02 + +#define is_reg_allocated(reg) (regs_allocated[reg] & reg_mask) + +ST_FUNC void asm_compute_constraints(ASMOperand *operands, + int nb_operands, int nb_outputs, + const uint8_t *clobber_regs, + int *pout_reg) +{ + ASMOperand *op; + int sorted_op[MAX_ASM_OPERANDS]; + int i, j, k, p1, p2, tmp, reg, c, reg_mask; + const char *str; + uint8_t regs_allocated[NB_ASM_REGS]; + + /* init fields */ + for(i=0;i<nb_operands;i++) { + op = &operands[i]; + op->input_index = -1; + op->ref_index = -1; + op->reg = -1; + op->is_memory = 0; + op->is_rw = 0; + } + /* compute constraint priority and evaluate references to output + constraints if input constraints */ + for(i=0;i<nb_operands;i++) { + op = &operands[i]; + str = op->constraint; + str = skip_constraint_modifiers(str); + if (isnum(*str) || *str == '[') { + /* this is a reference to another constraint */ + k = find_constraint(operands, nb_operands, str, NULL); + if ((unsigned)k >= i || i < nb_outputs) + tcc_error("invalid reference in constraint %d ('%s')", + i, str); + op->ref_index = k; + if (operands[k].input_index >= 0) + tcc_error("cannot reference twice the same operand"); + operands[k].input_index = i; + op->priority = 5; + } else if ((op->vt->r & VT_VALMASK) == VT_LOCAL + && op->vt->sym + && (reg = op->vt->sym->r & VT_VALMASK) < VT_CONST) { + op->priority = 1; + op->reg = reg; + } else { + op->priority = constraint_priority(str); + } + } + + /* sort operands according to their priority */ + for(i=0;i<nb_operands;i++) + sorted_op[i] = i; + for(i=0;i<nb_operands - 1;i++) { + for(j=i+1;j<nb_operands;j++) { + p1 = operands[sorted_op[i]].priority; + p2 = operands[sorted_op[j]].priority; + if (p2 < p1) { + tmp = sorted_op[i]; + sorted_op[i] = sorted_op[j]; + sorted_op[j] = tmp; + } + } + } + + for(i = 0;i < NB_ASM_REGS; i++) { + if (clobber_regs[i]) + regs_allocated[i] = REG_IN_MASK | REG_OUT_MASK; + else + regs_allocated[i] = 0; + } + /* esp cannot be used */ + regs_allocated[4] = REG_IN_MASK | REG_OUT_MASK; + /* ebp cannot be used yet */ + regs_allocated[5] = REG_IN_MASK | REG_OUT_MASK; + + /* allocate registers and generate corresponding asm moves */ + for(i=0;i<nb_operands;i++) { + j = sorted_op[i]; + op = &operands[j]; + str = op->constraint; + /* no need to allocate references */ + if (op->ref_index >= 0) + continue; + /* select if register is used for output, input or both */ + if (op->input_index >= 0) { + reg_mask = REG_IN_MASK | REG_OUT_MASK; + } else if (j < nb_outputs) { + reg_mask = REG_OUT_MASK; + } else { + reg_mask = REG_IN_MASK; + } + if (op->reg >= 0) { + if (is_reg_allocated(op->reg)) + tcc_error("asm regvar requests register that's taken already"); + reg = op->reg; + goto reg_found; + } + try_next: + c = *str++; + switch(c) { + case '=': + goto try_next; + case '+': + op->is_rw = 1; + /* FALL THRU */ + case '&': + if (j >= nb_outputs) + tcc_error("'%c' modifier can only be applied to outputs", c); + reg_mask = REG_IN_MASK | REG_OUT_MASK; + goto try_next; + case 'A': + /* allocate both eax and edx */ + if (is_reg_allocated(TREG_XAX) || + is_reg_allocated(TREG_XDX)) + goto try_next; + op->is_llong = 1; + op->reg = TREG_XAX; + regs_allocated[TREG_XAX] |= reg_mask; + regs_allocated[TREG_XDX] |= reg_mask; + break; + case 'a': + reg = TREG_XAX; + goto alloc_reg; + case 'b': + reg = 3; + goto alloc_reg; + case 'c': + reg = TREG_XCX; + goto alloc_reg; + case 'd': + reg = TREG_XDX; + goto alloc_reg; + case 'S': + reg = 6; + goto alloc_reg; + case 'D': + reg = 7; + alloc_reg: + if (is_reg_allocated(reg)) + goto try_next; + goto reg_found; + case 'q': + /* eax, ebx, ecx or edx */ + for(reg = 0; reg < 4; reg++) { + if (!is_reg_allocated(reg)) + goto reg_found; + } + goto try_next; + case 'r': + case 'R': + case 'p': /* A general address, for x86(64) any register is acceptable*/ + /* any general register */ + for(reg = 0; reg < 8; reg++) { + if (!is_reg_allocated(reg)) + goto reg_found; + } + goto try_next; + reg_found: + /* now we can reload in the register */ + op->is_llong = 0; + op->reg = reg; + regs_allocated[reg] |= reg_mask; + break; + case 'e': + case 'i': + if (!((op->vt->r & (VT_VALMASK | VT_LVAL)) == VT_CONST)) + goto try_next; + break; + case 'I': + case 'N': + case 'M': + if (!((op->vt->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST)) + goto try_next; + break; + case 'm': + case 'g': + /* nothing special to do because the operand is already in + memory, except if the pointer itself is stored in a + memory variable (VT_LLOCAL case) */ + /* XXX: fix constant case */ + /* if it is a reference to a memory zone, it must lie + in a register, so we reserve the register in the + input registers and a load will be generated + later */ + if (j < nb_outputs || c == 'm') { + if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) { + /* any general register */ + for(reg = 0; reg < 8; reg++) { + if (!(regs_allocated[reg] & REG_IN_MASK)) + goto reg_found1; + } + goto try_next; + reg_found1: + /* now we can reload in the register */ + regs_allocated[reg] |= REG_IN_MASK; + op->reg = reg; + op->is_memory = 1; + } + } + break; + default: + tcc_error("asm constraint %d ('%s') could not be satisfied", + j, op->constraint); + break; + } + /* if a reference is present for that operand, we assign it too */ + if (op->input_index >= 0) { + operands[op->input_index].reg = op->reg; + operands[op->input_index].is_llong = op->is_llong; + } + } + + /* compute out_reg. It is used to store outputs registers to memory + locations references by pointers (VT_LLOCAL case) */ + *pout_reg = -1; + for(i=0;i<nb_operands;i++) { + op = &operands[i]; + if (op->reg >= 0 && + (op->vt->r & VT_VALMASK) == VT_LLOCAL && + !op->is_memory) { + for(reg = 0; reg < 8; reg++) { + if (!(regs_allocated[reg] & REG_OUT_MASK)) + goto reg_found2; + } + tcc_error("could not find free output register for reloading"); + reg_found2: + *pout_reg = reg; + break; + } + } + + /* print sorted constraints */ +#ifdef ASM_DEBUG + for(i=0;i<nb_operands;i++) { + j = sorted_op[i]; + op = &operands[j]; + printf("%%%d [%s]: \"%s\" r=0x%04x reg=%d\n", + j, + op->id ? get_tok_str(op->id, NULL) : "", + op->constraint, + op->vt->r, + op->reg); + } + if (*pout_reg >= 0) + printf("out_reg=%d\n", *pout_reg); +#endif +} + +ST_FUNC void subst_asm_operand(CString *add_str, + SValue *sv, int modifier) +{ + int r, reg, size, val; + char buf[64]; + + r = sv->r; + if ((r & VT_VALMASK) == VT_CONST) { + if (!(r & VT_LVAL) && modifier != 'c' && modifier != 'n' && + modifier != 'P') + cstr_ccat(add_str, '$'); + if (r & VT_SYM) { + const char *name = get_tok_str(sv->sym->v, NULL); + if (sv->sym->v >= SYM_FIRST_ANOM) { + /* In case of anonymous symbols ("L.42", used + for static data labels) we can't find them + in the C symbol table when later looking up + this name. So enter them now into the asm label + list when we still know the symbol. */ + get_asm_sym(tok_alloc_const(name), sv->sym); + } + if (tcc_state->leading_underscore) + cstr_ccat(add_str, '_'); + cstr_cat(add_str, name, -1); + if ((uint32_t)sv->c.i == 0) + goto no_offset; + cstr_ccat(add_str, '+'); + } + val = sv->c.i; + if (modifier == 'n') + val = -val; + snprintf(buf, sizeof(buf), "%d", (int)sv->c.i); + cstr_cat(add_str, buf, -1); + no_offset:; +#ifdef TCC_TARGET_X86_64 + if (r & VT_LVAL) + cstr_cat(add_str, "(%rip)", -1); +#endif + } else if ((r & VT_VALMASK) == VT_LOCAL) { +#ifdef TCC_TARGET_X86_64 + snprintf(buf, sizeof(buf), "%d(%%rbp)", (int)sv->c.i); +#else + snprintf(buf, sizeof(buf), "%d(%%ebp)", (int)sv->c.i); +#endif + cstr_cat(add_str, buf, -1); + } else if (r & VT_LVAL) { + reg = r & VT_VALMASK; + if (reg >= VT_CONST) + tcc_internal_error(""); + snprintf(buf, sizeof(buf), "(%%%s)", +#ifdef TCC_TARGET_X86_64 + get_tok_str(TOK_ASM_rax + reg, NULL) +#else + get_tok_str(TOK_ASM_eax + reg, NULL) +#endif + ); + cstr_cat(add_str, buf, -1); + } else { + /* register case */ + reg = r & VT_VALMASK; + if (reg >= VT_CONST) + tcc_internal_error(""); + + /* choose register operand size */ + if ((sv->type.t & VT_BTYPE) == VT_BYTE || + (sv->type.t & VT_BTYPE) == VT_BOOL) + size = 1; + else if ((sv->type.t & VT_BTYPE) == VT_SHORT) + size = 2; +#ifdef TCC_TARGET_X86_64 + else if ((sv->type.t & VT_BTYPE) == VT_LLONG || + (sv->type.t & VT_BTYPE) == VT_PTR) + size = 8; +#endif + else + size = 4; + if (size == 1 && reg >= 4) + size = 4; + + if (modifier == 'b') { + if (reg >= 4) + tcc_error("cannot use byte register"); + size = 1; + } else if (modifier == 'h') { + if (reg >= 4) + tcc_error("cannot use byte register"); + size = -1; + } else if (modifier == 'w') { + size = 2; + } else if (modifier == 'k') { + size = 4; +#ifdef TCC_TARGET_X86_64 + } else if (modifier == 'q') { + size = 8; +#endif + } + + switch(size) { + case -1: + reg = TOK_ASM_ah + reg; + break; + case 1: + reg = TOK_ASM_al + reg; + break; + case 2: + reg = TOK_ASM_ax + reg; + break; + default: + reg = TOK_ASM_eax + reg; + break; +#ifdef TCC_TARGET_X86_64 + case 8: + reg = TOK_ASM_rax + reg; + break; +#endif + } + snprintf(buf, sizeof(buf), "%%%s", get_tok_str(reg, NULL)); + cstr_cat(add_str, buf, -1); + } +} + +/* generate prolog and epilog code for asm statement */ +ST_FUNC void asm_gen_code(ASMOperand *operands, int nb_operands, + int nb_outputs, int is_output, + uint8_t *clobber_regs, + int out_reg) +{ + uint8_t regs_allocated[NB_ASM_REGS]; + ASMOperand *op; + int i, reg; + + /* Strictly speaking %Xbp and %Xsp should be included in the + call-preserved registers, but currently it doesn't matter. */ +#ifdef TCC_TARGET_X86_64 +#ifdef TCC_TARGET_PE + static const uint8_t reg_saved[] = { 3, 6, 7, 12, 13, 14, 15 }; +#else + static const uint8_t reg_saved[] = { 3, 12, 13, 14, 15 }; +#endif +#else + static const uint8_t reg_saved[] = { 3, 6, 7 }; +#endif + + /* mark all used registers */ + memcpy(regs_allocated, clobber_regs, sizeof(regs_allocated)); + for(i = 0; i < nb_operands;i++) { + op = &operands[i]; + if (op->reg >= 0) + regs_allocated[op->reg] = 1; + } + if (!is_output) { + /* generate reg save code */ + for(i = 0; i < sizeof(reg_saved)/sizeof(reg_saved[0]); i++) { + reg = reg_saved[i]; + if (regs_allocated[reg]) { + if (reg >= 8) + g(0x41), reg-=8; + g(0x50 + reg); + } + } + + /* generate load code */ + for(i = 0; i < nb_operands; i++) { + op = &operands[i]; + if (op->reg >= 0) { + if ((op->vt->r & VT_VALMASK) == VT_LLOCAL && + op->is_memory) { + /* memory reference case (for both input and + output cases) */ + SValue sv; + sv = *op->vt; + sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL | VT_LVAL; + sv.type.t = VT_PTR; + load(op->reg, &sv); + } else if (i >= nb_outputs || op->is_rw) { + /* load value in register */ + load(op->reg, op->vt); + if (op->is_llong) { + SValue sv; + sv = *op->vt; + sv.c.i += 4; + load(TREG_XDX, &sv); + } + } + } + } + } else { + /* generate save code */ + for(i = 0 ; i < nb_outputs; i++) { + op = &operands[i]; + if (op->reg >= 0) { + if ((op->vt->r & VT_VALMASK) == VT_LLOCAL) { + if (!op->is_memory) { + SValue sv; + sv = *op->vt; + sv.r = (sv.r & ~VT_VALMASK) | VT_LOCAL; + sv.type.t = VT_PTR; + load(out_reg, &sv); + + sv = *op->vt; + sv.r = (sv.r & ~VT_VALMASK) | out_reg; + store(op->reg, &sv); + } + } else { + store(op->reg, op->vt); + if (op->is_llong) { + SValue sv; + sv = *op->vt; + sv.c.i += 4; + store(TREG_XDX, &sv); + } + } + } + } + /* generate reg restore code */ + for(i = sizeof(reg_saved)/sizeof(reg_saved[0]) - 1; i >= 0; i--) { + reg = reg_saved[i]; + if (regs_allocated[reg]) { + if (reg >= 8) + g(0x41), reg-=8; + g(0x58 + reg); + } + } + } +} + +ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str) +{ + int reg; +#ifdef TCC_TARGET_X86_64 + unsigned int type; +#endif + + if (!strcmp(str, "memory") || + !strcmp(str, "cc") || + !strcmp(str, "flags")) + return; + reg = tok_alloc_const(str); + if (reg >= TOK_ASM_eax && reg <= TOK_ASM_edi) { + reg -= TOK_ASM_eax; + } else if (reg >= TOK_ASM_ax && reg <= TOK_ASM_di) { + reg -= TOK_ASM_ax; +#ifdef TCC_TARGET_X86_64 + } else if (reg >= TOK_ASM_rax && reg <= TOK_ASM_rdi) { + reg -= TOK_ASM_rax; + } else if ((reg = asm_parse_numeric_reg(reg, &type)) >= 0) { + ; +#endif + } else { + tcc_error("invalid clobber register '%s'", str); + } + clobber_regs[reg] = 1; +} |
