Re: Still problems.

View: New views
7 Messages — Rating Filter:   Alert me  

Parent Message unknown Re: Still problems.

by Paolo Bonzini-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message


> 3) modified jit_shift_args to use nbargs instead.
>
> #define jit_shift_args() \
>    ((_jitl.nbargs >= 3 ? (void) (MOVQrr(_R10D, _RDX)) : (void) 0), \
>     (_jitl.nbargs >= 4 ? (void) (MOVQrr(_R11D, _RCX)) : (void) 0))
>
> With this change, it moves the values as expected.

It turns out that with JIT_Rx mapped to RAX/R10/R11 there is no need for
jit_shift_args anymore.  That's good!

I attach the patch I committed so that you can see what's going on.

Paolo

diff --git a/ChangeLog b/ChangeLog
index 1e05fe1..0d0f86a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2008-06-16  Paolo Bonzini  <bonzini@...>
+
+ * lightning/i386/core.h: Use jit_save in jit_replace.  Move JIT_R
+ definition...
+ * lightning/i386/core-32.h: ... here; define jit_save so that
+ the core.h has no effect on the 32-bit backend.
+ * lightning/i386/core-64.h: Place JIT_R1/JIT_R2 in R10/R11,
+ place outgoing arguments in the right spot from the beginning,
+ define jit_save, fix jit_reg8/jit_reg16.
+
 2008-06-15  Paolo Bonzini  <bonzini@...>
 
         * lightning/i386/core-64.h: Rewrite argument passing to
diff --git a/lightning/i386/core-32.h b/lightning/i386/core-32.h
index 9775fc8..46b3516 100644
--- a/lightning/i386/core-32.h
+++ b/lightning/i386/core-32.h
@@ -36,6 +36,9 @@
 
 #define JIT_CAN_16 1
 #define JIT_AP _EBP
+
+#define JIT_R_NUM 3
+#define JIT_R(i) (_EAX + (i))
 #define JIT_V_NUM 3
 #define JIT_V(i) ((i) == 0 ? _EBX : _ESI + (i) - 1)
 
@@ -46,6 +49,9 @@ struct jit_local_state {
   int alloca_slack;
 };
 
+/* Whether a register is used for the user-accessible registers.  */
+#define jit_save(reg) 1
+
 #define jit_base_prolog() (_jitl.framesize = 20, _jitl.alloca_offset = 0, \
   PUSHLr(_EBX), PUSHLr(_ESI), PUSHLr(_EDI), PUSHLr(_EBP), MOVLrr(_ESP, _EBP))
 #define jit_base_ret(ofs)  \
diff --git a/lightning/i386/core-64.h b/lightning/i386/core-64.h
index 173a229..5da7535 100644
--- a/lightning/i386/core-64.h
+++ b/lightning/i386/core-64.h
@@ -38,6 +38,8 @@
 #define JIT_CAN_16 0
 #define JIT_REXTMP _R9D
 
+#define JIT_R_NUM 3
+#define JIT_R(i)                ((i) == 0 ? _EAX : _R9D + (i))
 #define JIT_V_NUM               3
 #define JIT_V(i)                ((i) == 0 ? _EBX : _R11D + (i))
 
@@ -51,6 +53,9 @@ struct jit_local_state {
   int   alloca_slack;
 };
 
+/* Whether a register in the "low" bank is used for the user-accessible
+   registers.  */
+#define jit_save(reg) ((reg) == _EAX || (reg) == _EBX)
 
 /* Keep the stack 16-byte aligned, the SSE hardware prefers it this way.  */
 #define jit_allocai_internal(amount, slack)                           \
@@ -128,19 +133,13 @@ struct jit_local_state {
 /* Stack isn't used for arguments: */
 #define jit_prepare_i(ni) (_jitl.argssize = (ni))
 
-#define jit_pusharg_i(rs) (--_jitl.argssize, MOVQrr(rs, jit_arg_reg_temp[_jitl.argssize]))
-#define jit_finish(sub)         (jit_shift_args(), \
- MOVQir((long) (sub), JIT_REXTMP), \
+#define jit_pusharg_i(rs) (--_jitl.argssize, MOVQrr(rs, jit_arg_reg_order[_jitl.argssize]))
+#define jit_finish(sub)         (MOVQir((long) (sub), JIT_REXTMP), \
  CALLsr(JIT_REXTMP))
 #define jit_reg_is_arg(reg)     ((reg) == _ECX || (reg) == _EDX)
 #define jit_finishr(reg) ((jit_reg_is_arg((reg)) ? MOVQrr(reg, JIT_REXTMP) : (void)0), \
-                                 jit_shift_args(), \
                                  CALLsr(jit_reg_is_arg((reg)) ? JIT_REXTMP : (reg)))
 
-#define jit_shift_args() \
-   ((_jitl.argssize >= 3 ? (void) (MOVQrr(_R10D, _RDX)) : (void) 0), \
-    (_jitl.argssize >= 4 ? (void) (MOVQrr(_R11D, _RCX)) : (void) 0))
-
 #define jit_retval_l(rd) ((void)jit_movr_l ((rd), _EAX))
 #define jit_arg_c()        (jit_arg_reg_order[_jitl.nextarg_geti++])
 #define jit_arg_uc()        (jit_arg_reg_order[_jitl.nextarg_geti++])
@@ -152,7 +151,6 @@ struct jit_local_state {
 #define jit_arg_ul()        (jit_arg_reg_order[_jitl.nextarg_geti++])
 #define jit_arg_p()        (jit_arg_reg_order[_jitl.nextarg_geti++])
 
-static int jit_arg_reg_temp[] = { _EDI, _ESI, _R10D, _R11D, _R8D, _R9D };
 static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D };
 
 #define jit_negr_l(d, rs) jit_opi_((d), (rs), NEGQr(d), (XORQrr((d), (d)), SUBQrr((rs), (d))) )
@@ -185,8 +183,8 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D };
 
 /* Used to implement ldc, stc, ... We have SIL and friends which simplify it all.  */
 #define jit_check8(rs)          1
-#define jit_reg8(rs)            (_rN(rs) | _AL )
-#define jit_reg16(rs)           (_rN(rs) | _AX )
+#define jit_reg8(rs)            (_rR(rs) | _AL )
+#define jit_reg16(rs)           (_rR(rs) | _AX )
 #define jit_movbrm(rs, dd, db, di, ds)         MOVBrm(jit_reg8(rs), dd, db, di, ds)
 
 #define jit_ldi_c(d, is)                (_u32P((long)(is)) ? MOVSBLmr((is), 0,    0,    0, (d)) :  (jit_movi_l(JIT_REXTMP, is), jit_ldr_c(d, JIT_REXTMP)))
diff --git a/lightning/i386/core.h b/lightning/i386/core.h
index ad99d4d..64f8e80 100644
--- a/lightning/i386/core.h
+++ b/lightning/i386/core.h
@@ -38,9 +38,6 @@
 #define JIT_SP _ESP
 #define JIT_RET _EAX
 
-#define JIT_R_NUM 3
-#define JIT_R(i) (_EAX + (i))
-
 
 /* 3-parameter operation */
 #define jit_opr_(d, s1, s2, op1d, op2d) \
@@ -64,7 +61,11 @@
 /* An operand is forced into a register */
 #define jit_replace(rd, rs, forced, op) \
  ((rd == forced) ? JITSORRY("Register conflict for " # op) : \
- (rs == forced) ? op : (jit_pushr_i(forced), MOVLrr(rs, forced), op, jit_popr_i(forced)))
+ (rs == forced) ? op : \
+ jit_save (forced) \
+  ? (jit_pushr_i(forced), jit_movr_l(rs, forced), op, \
+     jit_popr_i(forced)) \
+  : (jit_movr_l(rs, forced), op))
 
 /* For LT, LE, ... */
 #define jit_replace8(d, cmp, op) \

_______________________________________________
Lightning mailing list
Lightning@...
http://lists.gnu.org/mailman/listinfo/lightning

Re: Still problems.

by Laurent Michel :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message


On Jun 17, 2008, at 3:34 AM, Paolo Bonzini wrote:


3) modified jit_shift_args to use nbargs instead.
#define jit_shift_args() \
  ((_jitl.nbargs >= 3 ? (void) (MOVQrr(_R10D, _RDX)) : (void) 0), \
   (_jitl.nbargs >= 4 ? (void) (MOVQrr(_R11D, _RCX)) : (void) 0))
With this change, it moves the values as expected.

It turns out that with JIT_Rx mapped to RAX/R10/R11 there is no need for jit_shift_args anymore.  That's good!

Wonderful. I'll update. Looks like the other small patch is no longer necessary since there is no shifting ;-) Good all around!




I attach the patch I committed so that you can see what's going on.

Paolo
diff --git a/ChangeLog b/ChangeLog
index 1e05fe1..0d0f86a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2008-06-16  Paolo Bonzini  <bonzini@...>
+
+ * lightning/i386/core.h: Use jit_save in jit_replace.  Move JIT_R
+ definition...
+ * lightning/i386/core-32.h: ... here; define jit_save so that
+ the core.h has no effect on the 32-bit backend.
+ * lightning/i386/core-64.h: Place JIT_R1/JIT_R2 in R10/R11,
+ place outgoing arguments in the right spot from the beginning,
+ define jit_save, fix jit_reg8/jit_reg16.
+
2008-06-15  Paolo Bonzini  <bonzini@...>

        * lightning/i386/core-64.h: Rewrite argument passing to
diff --git a/lightning/i386/core-32.h b/lightning/i386/core-32.h
index 9775fc8..46b3516 100644
--- a/lightning/i386/core-32.h
+++ b/lightning/i386/core-32.h
@@ -36,6 +36,9 @@

#define JIT_CAN_16 1
#define JIT_AP _EBP
+
+#define JIT_R_NUM 3
+#define JIT_R(i) (_EAX + (i))
#define JIT_V_NUM 3
#define JIT_V(i) ((i) == 0 ? _EBX : _ESI + (i) - 1)

@@ -46,6 +49,9 @@ struct jit_local_state {
  int alloca_slack;
};

+/* Whether a register is used for the user-accessible registers.  */
+#define jit_save(reg) 1
+
#define jit_base_prolog() (_jitl.framesize = 20, _jitl.alloca_offset = 0, \
  PUSHLr(_EBX), PUSHLr(_ESI), PUSHLr(_EDI), PUSHLr(_EBP), MOVLrr(_ESP, _EBP))
#define jit_base_ret(ofs)  \
diff --git a/lightning/i386/core-64.h b/lightning/i386/core-64.h
index 173a229..5da7535 100644
--- a/lightning/i386/core-64.h
+++ b/lightning/i386/core-64.h
@@ -38,6 +38,8 @@
#define JIT_CAN_16 0
#define JIT_REXTMP _R9D

+#define JIT_R_NUM 3
+#define JIT_R(i)                ((i) == 0 ? _EAX : _R9D + (i))
#define JIT_V_NUM               3
#define JIT_V(i)                ((i) == 0 ? _EBX : _R11D + (i))

@@ -51,6 +53,9 @@ struct jit_local_state {
  int   alloca_slack;
};

+/* Whether a register in the "low" bank is used for the user-accessible
+   registers.  */
+#define jit_save(reg) ((reg) == _EAX || (reg) == _EBX)

/* Keep the stack 16-byte aligned, the SSE hardware prefers it this way.  */
#define jit_allocai_internal(amount, slack)                           \
@@ -128,19 +133,13 @@ struct jit_local_state {
/* Stack isn't used for arguments: */
#define jit_prepare_i(ni) (_jitl.argssize = (ni))

-#define jit_pusharg_i(rs) (--_jitl.argssize, MOVQrr(rs, jit_arg_reg_temp[_jitl.argssize]))
-#define jit_finish(sub)         (jit_shift_args(), \
- MOVQir((long) (sub), JIT_REXTMP), \
+#define jit_pusharg_i(rs) (--_jitl.argssize, MOVQrr(rs, jit_arg_reg_order[_jitl.argssize]))
+#define jit_finish(sub)         (MOVQir((long) (sub), JIT_REXTMP), \
CALLsr(JIT_REXTMP))
#define jit_reg_is_arg(reg)     ((reg) == _ECX || (reg) == _EDX)
#define jit_finishr(reg) ((jit_reg_is_arg((reg)) ? MOVQrr(reg, JIT_REXTMP) : (void)0), \
-                                 jit_shift_args(), \
                                 CALLsr(jit_reg_is_arg((reg)) ? JIT_REXTMP : (reg)))

-#define jit_shift_args() \
-   ((_jitl.argssize >= 3 ? (void) (MOVQrr(_R10D, _RDX)) : (void) 0), \
-    (_jitl.argssize >= 4 ? (void) (MOVQrr(_R11D, _RCX)) : (void) 0))
-
#define jit_retval_l(rd) ((void)jit_movr_l ((rd), _EAX))
#define jit_arg_c()        (jit_arg_reg_order[_jitl.nextarg_geti++])
#define jit_arg_uc()        (jit_arg_reg_order[_jitl.nextarg_geti++])
@@ -152,7 +151,6 @@ struct jit_local_state {
#define jit_arg_ul()        (jit_arg_reg_order[_jitl.nextarg_geti++])
#define jit_arg_p()        (jit_arg_reg_order[_jitl.nextarg_geti++])

-static int jit_arg_reg_temp[] = { _EDI, _ESI, _R10D, _R11D, _R8D, _R9D };
static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D };

#define jit_negr_l(d, rs) jit_opi_((d), (rs), NEGQr(d), (XORQrr((d), (d)), SUBQrr((rs), (d))) )
@@ -185,8 +183,8 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D };

/* Used to implement ldc, stc, ... We have SIL and friends which simplify it all.  */
#define jit_check8(rs)          1
-#define jit_reg8(rs)            (_rN(rs) | _AL )
-#define jit_reg16(rs)           (_rN(rs) | _AX )
+#define jit_reg8(rs)            (_rR(rs) | _AL )
+#define jit_reg16(rs)           (_rR(rs) | _AX )
#define jit_movbrm(rs, dd, db, di, ds)         MOVBrm(jit_reg8(rs), dd, db, di, ds)

#define jit_ldi_c(d, is)                (_u32P((long)(is)) ? MOVSBLmr((is), 0,    0,    0, (d)) :  (jit_movi_l(JIT_REXTMP, is), jit_ldr_c(d, JIT_REXTMP)))
diff --git a/lightning/i386/core.h b/lightning/i386/core.h
index ad99d4d..64f8e80 100644
--- a/lightning/i386/core.h
+++ b/lightning/i386/core.h
@@ -38,9 +38,6 @@
#define JIT_SP _ESP
#define JIT_RET _EAX

-#define JIT_R_NUM 3
-#define JIT_R(i) (_EAX + (i))
-

/* 3-parameter operation */
#define jit_opr_(d, s1, s2, op1d, op2d) \
@@ -64,7 +61,11 @@
/* An operand is forced into a register */
#define jit_replace(rd, rs, forced, op) \
((rd == forced) ? JITSORRY("Register conflict for " # op) : \
- (rs == forced) ? op : (jit_pushr_i(forced), MOVLrr(rs, forced), op, jit_popr_i(forced)))
+ (rs == forced) ? op : \
+ jit_save (forced) \
+  ? (jit_pushr_i(forced), jit_movr_l(rs, forced), op, \
+     jit_popr_i(forced)) \
+  : (jit_movr_l(rs, forced), op))

/* For LT, LE, ... */
#define jit_replace8(d, cmp, op) \

--
  Laurent






_______________________________________________
Lightning mailing list
Lightning@...
http://lists.gnu.org/mailman/listinfo/lightning

smime.p7s (5K) Download Attachment

Re: Still problems.

by Laurent Michel :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

I tried the patch. The call goes through, but I'm afraid something else is interfering. The code fragment:

   _code->getarg_p(CJIT_V0, ofs4);                          
   _code->ldxi_l(CJIT_R0,CJIT_V2,LocRuntime::offsetOfST()); 
   _code->ldxi_l(CJIT_R1,CJIT_R0,LocRTStackI::offsetOfMX());
   _code->ldxi_l(CJIT_R2,CJIT_R0,LocRTStackI::offsetOfST());
   _code->muli_l(CJIT_R1,CJIT_R1,sizeof(ColSlotI));      

Now generates:

0x1000300002: mov    %rcx,%rbx
0x1000300005: mov    0x48(%r13),%rax
0x1000300009: mov    0x8(%rax),%r10
0x100030000d: mov    0x10(%rax),%r11
0x1000300011: imul   $0x10,%r10,%rdx

The last instruction is of interest. It should be  R1 <- R1 * $0x10. Now it somehow gets translated into a write into RDX rather than into R10. I checked the macro in core-64.h

#define jit_muli_l(d, rs, is) jit_op_ ((d), (rs),       IMULQir((is), (d))       )


and it looks fine. 

The jit_op_ looks fine too:

/* 3-parameter operation, with immediate */
#define jit_op_(d, s1, op2d) \
((s1 == d) ? op2d : (MOVLrr(s1, d), op2d))


I double checked and the first arguments to the macro  are indeed equal and equal to 0x4A (R10). 

Here is the relevant macro.  Both references to RD use the same masking

#define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL (0x69 ,_b11,_r8(RD),_r8(RD) ,IM ))

The _r8(RD) macro tests RD to decide how to mask it and it appears that it elects to use _r4(RD) for its encoding. Which produces:

(gdb) p /t CJIT_R1 & 0x07
$19 = 10

I checked how RDX would be masked and of course I get:

(gdb) p /t 0x52 & 0x07
$20 = 10

So it would sound like R10 is not really a GPR. It cannot appear anywhere ? Either that is the case, or the instruction should use _r4 to encode the destination register. I'll try to get the intel spec to check, but I thought I'd report now anyhow. 







On Jun 17, 2008, at 3:34 AM, Paolo Bonzini wrote:


3) modified jit_shift_args to use nbargs instead.
#define jit_shift_args() \
  ((_jitl.nbargs >= 3 ? (void) (MOVQrr(_R10D, _RDX)) : (void) 0), \
   (_jitl.nbargs >= 4 ? (void) (MOVQrr(_R11D, _RCX)) : (void) 0))
With this change, it moves the values as expected.

It turns out that with JIT_Rx mapped to RAX/R10/R11 there is no need for jit_shift_args anymore.  That's good!

I attach the patch I committed so that you can see what's going on.

Paolo
diff --git a/ChangeLog b/ChangeLog
index 1e05fe1..0d0f86a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2008-06-16  Paolo Bonzini  <bonzini@...>
+
+ * lightning/i386/core.h: Use jit_save in jit_replace.  Move JIT_R
+ definition...
+ * lightning/i386/core-32.h: ... here; define jit_save so that
+ the core.h has no effect on the 32-bit backend.
+ * lightning/i386/core-64.h: Place JIT_R1/JIT_R2 in R10/R11,
+ place outgoing arguments in the right spot from the beginning,
+ define jit_save, fix jit_reg8/jit_reg16.
+
2008-06-15  Paolo Bonzini  <bonzini@...>

        * lightning/i386/core-64.h: Rewrite argument passing to
diff --git a/lightning/i386/core-32.h b/lightning/i386/core-32.h
index 9775fc8..46b3516 100644
--- a/lightning/i386/core-32.h
+++ b/lightning/i386/core-32.h
@@ -36,6 +36,9 @@

#define JIT_CAN_16 1
#define JIT_AP _EBP
+
+#define JIT_R_NUM 3
+#define JIT_R(i) (_EAX + (i))
#define JIT_V_NUM 3
#define JIT_V(i) ((i) == 0 ? _EBX : _ESI + (i) - 1)

@@ -46,6 +49,9 @@ struct jit_local_state {
  int alloca_slack;
};

+/* Whether a register is used for the user-accessible registers.  */
+#define jit_save(reg) 1
+
#define jit_base_prolog() (_jitl.framesize = 20, _jitl.alloca_offset = 0, \
  PUSHLr(_EBX), PUSHLr(_ESI), PUSHLr(_EDI), PUSHLr(_EBP), MOVLrr(_ESP, _EBP))
#define jit_base_ret(ofs)  \
diff --git a/lightning/i386/core-64.h b/lightning/i386/core-64.h
index 173a229..5da7535 100644
--- a/lightning/i386/core-64.h
+++ b/lightning/i386/core-64.h
@@ -38,6 +38,8 @@
#define JIT_CAN_16 0
#define JIT_REXTMP _R9D

+#define JIT_R_NUM 3
+#define JIT_R(i)                ((i) == 0 ? _EAX : _R9D + (i))
#define JIT_V_NUM               3
#define JIT_V(i)                ((i) == 0 ? _EBX : _R11D + (i))

@@ -51,6 +53,9 @@ struct jit_local_state {
  int   alloca_slack;
};

+/* Whether a register in the "low" bank is used for the user-accessible
+   registers.  */
+#define jit_save(reg) ((reg) == _EAX || (reg) == _EBX)

/* Keep the stack 16-byte aligned, the SSE hardware prefers it this way.  */
#define jit_allocai_internal(amount, slack)                           \
@@ -128,19 +133,13 @@ struct jit_local_state {
/* Stack isn't used for arguments: */
#define jit_prepare_i(ni) (_jitl.argssize = (ni))

-#define jit_pusharg_i(rs) (--_jitl.argssize, MOVQrr(rs, jit_arg_reg_temp[_jitl.argssize]))
-#define jit_finish(sub)         (jit_shift_args(), \
- MOVQir((long) (sub), JIT_REXTMP), \
+#define jit_pusharg_i(rs) (--_jitl.argssize, MOVQrr(rs, jit_arg_reg_order[_jitl.argssize]))
+#define jit_finish(sub)         (MOVQir((long) (sub), JIT_REXTMP), \
CALLsr(JIT_REXTMP))
#define jit_reg_is_arg(reg)     ((reg) == _ECX || (reg) == _EDX)
#define jit_finishr(reg) ((jit_reg_is_arg((reg)) ? MOVQrr(reg, JIT_REXTMP) : (void)0), \
-                                 jit_shift_args(), \
                                 CALLsr(jit_reg_is_arg((reg)) ? JIT_REXTMP : (reg)))

-#define jit_shift_args() \
-   ((_jitl.argssize >= 3 ? (void) (MOVQrr(_R10D, _RDX)) : (void) 0), \
-    (_jitl.argssize >= 4 ? (void) (MOVQrr(_R11D, _RCX)) : (void) 0))
-
#define jit_retval_l(rd) ((void)jit_movr_l ((rd), _EAX))
#define jit_arg_c()        (jit_arg_reg_order[_jitl.nextarg_geti++])
#define jit_arg_uc()        (jit_arg_reg_order[_jitl.nextarg_geti++])
@@ -152,7 +151,6 @@ struct jit_local_state {
#define jit_arg_ul()        (jit_arg_reg_order[_jitl.nextarg_geti++])
#define jit_arg_p()        (jit_arg_reg_order[_jitl.nextarg_geti++])

-static int jit_arg_reg_temp[] = { _EDI, _ESI, _R10D, _R11D, _R8D, _R9D };
static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D };

#define jit_negr_l(d, rs) jit_opi_((d), (rs), NEGQr(d), (XORQrr((d), (d)), SUBQrr((rs), (d))) )
@@ -185,8 +183,8 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D };

/* Used to implement ldc, stc, ... We have SIL and friends which simplify it all.  */
#define jit_check8(rs)          1
-#define jit_reg8(rs)            (_rN(rs) | _AL )
-#define jit_reg16(rs)           (_rN(rs) | _AX )
+#define jit_reg8(rs)            (_rR(rs) | _AL )
+#define jit_reg16(rs)           (_rR(rs) | _AX )
#define jit_movbrm(rs, dd, db, di, ds)         MOVBrm(jit_reg8(rs), dd, db, di, ds)

#define jit_ldi_c(d, is)                (_u32P((long)(is)) ? MOVSBLmr((is), 0,    0,    0, (d)) :  (jit_movi_l(JIT_REXTMP, is), jit_ldr_c(d, JIT_REXTMP)))
diff --git a/lightning/i386/core.h b/lightning/i386/core.h
index ad99d4d..64f8e80 100644
--- a/lightning/i386/core.h
+++ b/lightning/i386/core.h
@@ -38,9 +38,6 @@
#define JIT_SP _ESP
#define JIT_RET _EAX

-#define JIT_R_NUM 3
-#define JIT_R(i) (_EAX + (i))
-

/* 3-parameter operation */
#define jit_opr_(d, s1, s2, op1d, op2d) \
@@ -64,7 +61,11 @@
/* An operand is forced into a register */
#define jit_replace(rd, rs, forced, op) \
((rd == forced) ? JITSORRY("Register conflict for " # op) : \
- (rs == forced) ? op : (jit_pushr_i(forced), MOVLrr(rs, forced), op, jit_popr_i(forced)))
+ (rs == forced) ? op : \
+ jit_save (forced) \
+  ? (jit_pushr_i(forced), jit_movr_l(rs, forced), op, \
+     jit_popr_i(forced)) \
+  : (jit_movr_l(rs, forced), op))

/* For LT, LE, ... */
#define jit_replace8(d, cmp, op) \

--
  Laurent






_______________________________________________
Lightning mailing list
Lightning@...
http://lists.gnu.org/mailman/listinfo/lightning

smime.p7s (5K) Download Attachment

Re: Still problems.

by Paolo Bonzini-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message


> #define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL (0x69
> ,_b11,_r8(RD),_r8(RD) ,IM ))

I tried assembling two instructions and got;

    0:   4d 6b d2 0a             imul   $0xa,%r10,%r10
    4:   49 6b d2 0a             imul   $0xa,%r10,%rdx

I think the _REXQrr arguments should be "RD, RD".

Paolo


_______________________________________________
Lightning mailing list
Lightning@...
http://lists.gnu.org/mailman/listinfo/lightning

Re: Still problems.

by Paolo Bonzini-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message


> Here is the relevant macro.  Both references to RD use the same masking
>
> #define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL (0x69
> ,_b11,_r8(RD),_r8(RD) ,IM ))

Even better, as the disassembly suggests:

#define IMULQir(IM, RD)                 IMULQir(IM, (RD), (RD))

There seem to be other problems in the encoding of the REX bits though.

Paolo


_______________________________________________
Lightning mailing list
Lightning@...
http://lists.gnu.org/mailman/listinfo/lightning

Re: Still problems.

by Paolo Bonzini-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message


> Here is the relevant macro.  Both references to RD use the same masking
>
> #define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL (0x69
> ,_b11,_r8(RD),_r8(RD) ,IM ))

As Murphy's law goes, the only wrong ones were IMULQir and
IMULQirr--checked against as+objdump with the following assembly source
code (stuff after # is a comment):

movq %r10, %rdx         #define MOVQrr(RS, RD)
(_REXQrr(RS, RD)
testq %r10, %rdx        #define TESTQrr(RS, RD)
(_REXQrr(RS, RD)
cmpxchgq %r10, %rdx     #define CMPXCHGQrr(RS, RD)
(_REXQrr(RS, RD)
xaddq %r10, %rdx        #define XADDQrr(RS, RD)
(_REXQrr(RS, RD)
xchgq %r10, %rdx        #define XCHGQrr(RS, RD)
(_REXQrr(RS, RD)

imulq %r10, %rdx        #define IMULQrr(RS, RD)
(_REXQrr(RD, RS)
imulq $10, %r10, %rdx   #define IMULQirr(IM,RS,RD)
(_REXQrr(RS, RD)  <<< WRONG!
cmovbq %r10, %rdx       #define CMOVQrr(CC,RS,RD)
(_REXQrr(RD, RS)
bsfq %r10, %rdx         #define BSFQrr(RS, RD)
(_REXQrr(RD, RS)
bsrq %r10, %rdx         #define BSRQrr(RS, RD)
(_REXQrr(RD, RS)
movsbq %r10b, %rdx      #define MOVSBQrr(RS, RD)
(_REXQrr(RD, RS)
movzbq %r10b, %rdx      #define MOVZBQrr(RS, RD)
(_REXQrr(RD, RS)
movswq %r10w, %rdx      #define MOVSWQrr(RS, RD)
(_REXQrr(RD, RS)
movzwq %r10w, %rdx      #define MOVZWQrr(RS, RD)
(_REXQrr(RD, RS)
movslq %r10d, %rdx      #define MOVSLQrr(RS, RD)
_m64only((_REXQrr(RD, RS)

Fix committed & pushed.

Paolo


_______________________________________________
Lightning mailing list
Lightning@...
http://lists.gnu.org/mailman/listinfo/lightning

Re: Re: Still problems.

by Laurent Michel :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

You're the King  ;-)

Thanks!

---
   Laurent

On Jun 17, 2008, at 9:25 AM, Paolo Bonzini wrote:

>
>> Here is the relevant macro.  Both references to RD use the same  
>> masking
>> #define IMULQir(IM, RD) (_REXQrr(0, RD), _Os_Mrm_sL  
>> (0x69 ,_b11,_r8(RD),_r8(RD) ,IM ))
>
> As Murphy's law goes, the only wrong ones were IMULQir and IMULQirr--
> checked against as+objdump with the following assembly source code  
> (stuff after # is a comment):
>
> movq %r10, %rdx         #define MOVQrr(RS, RD) (_REXQrr(RS, RD)
> testq %r10, %rdx        #define TESTQrr(RS, RD) (_REXQrr(RS, RD)
> cmpxchgq %r10, %rdx     #define CMPXCHGQrr(RS, RD) (_REXQrr(RS, RD)
> xaddq %r10, %rdx        #define XADDQrr(RS, RD) (_REXQrr(RS, RD)
> xchgq %r10, %rdx        #define XCHGQrr(RS, RD) (_REXQrr(RS, RD)
>
> imulq %r10, %rdx        #define IMULQrr(RS, RD) (_REXQrr(RD, RS)
> imulq $10, %r10, %rdx   #define IMULQirr(IM,RS,RD) (_REXQrr(RS, RD)  
> <<< WRONG!
> cmovbq %r10, %rdx       #define CMOVQrr(CC,RS,RD) (_REXQrr(RD, RS)
> bsfq %r10, %rdx         #define BSFQrr(RS, RD) (_REXQrr(RD, RS)
> bsrq %r10, %rdx         #define BSRQrr(RS, RD) (_REXQrr(RD, RS)
> movsbq %r10b, %rdx      #define MOVSBQrr(RS, RD) (_REXQrr(RD, RS)
> movzbq %r10b, %rdx      #define MOVZBQrr(RS, RD) (_REXQrr(RD, RS)
> movswq %r10w, %rdx      #define MOVSWQrr(RS, RD) (_REXQrr(RD, RS)
> movzwq %r10w, %rdx      #define MOVZWQrr(RS, RD) (_REXQrr(RD, RS)
> movslq %r10d, %rdx      #define MOVSLQrr(RS, RD)  
> _m64only((_REXQrr(RD, RS)
>
> Fix committed & pushed.
>
> Paolo
>
>
> _______________________________________________
> Lightning mailing list
> Lightning@...
> http://lists.gnu.org/mailman/listinfo/lightning
--
   Laurent






_______________________________________________
Lightning mailing list
Lightning@...
http://lists.gnu.org/mailman/listinfo/lightning

smime.p7s (6K) Download Attachment