|
View:
New views
3 Messages
—
Rating Filter:
Alert me
|
|
|
|
|
|
Re: About asmjit experance over the weekendHi mike,
thanks for your interest about project. I have created asmjit-dev group in google for AsmJit development, so you can join and we can discuss everything there (http://groups.google.com/group/asmjit-dev/). I have also some notes about your code: 1) Instead of push/pop esi use ebx (must be preserved, esi is not used) 2) This code: a.movd(mm1,eax); a.movd(mm2,eax); a.psllq(mm1,imm(32)); a.por(mm1,mm2); can be rewritten as: a.movd(mm1, eax); a.punpckldq(mm1, mm1); // this is good thick to unpack low 4 bytes to higher ones So, new mailing list about AsmJit is asmjit-dev@.... Cheers - Petr 2009/2/3 Mike Tajmajer <mike@...>: > I tried out asmjit this weekend, and was impressed with it. Easy to use and > get decent performance gains. > > I tested it by creating a clear buffer call dynamically that was specific > for my viewport size. I put the viewport parameters and pointers into the > JIT'd assembler, and saw a 10% boost in performance over my existing > assembly code. > > Of course, when the viewport size changes I need to rebuild the routine - > but that is human controlled. > > I imagine that creating transform functions which have the matrix embedded > in the function would also gain a fair amount. > > The other ability I like is that I can detect the availability of > BBX/SSE/SSE2 at runtime and generate functions that are tailored to the CPU. > > Some example code: > > #if defined(_USE_JIT) > // Test of JIT function use > #pragma warning(disable : 4311 4312) > using namespace AsmJit; > > // JIT function - use fastcall, first 2 args in ecx, edx > typedef void (__fastcall *JIT_clear_bb)(DWORD dwColor); > if(m_JIT_clear_bb_32.empty()) > { > // X86 assembler > X86 a; > > // Prolog > a.push(ebp); > a.mov(ebp, esp); > a.push(esi); > a.push(edi); > > if(haveSSECPU()) > { > // MMX version is 20% faster then x86 version > Label L_Loop_Next_Line; > Label L_Loop_Line; > Label L_Loop_Pixel; > > // Code > a.mov(eax,ecx); // color in eax > a.xor_(ecx,ecx); // line counter > a.movd(mm1,eax); > a.movd(mm2,eax); > a.psllq(mm1,imm(32)); > a.por(mm1,mm2); > > a.bind(&L_Loop_Line); > a.mov(ebx,(SysUInt)(m_dwScanLen>>3)); > a.mov(edi,imm((SysUInt)m_bbuffer)); > a.mov(edi,dword_ptr(edi,ecx,2)); > > a.bind(&L_Loop_Pixel); > a.movq(dword_ptr(edi,0),mm1); > a.dec(ebx); > a.j(C_ZERO, &L_Loop_Next_Line); > a.add(edi,8); > a.jmp(&L_Loop_Pixel); > > a.bind(&L_Loop_Next_Line); > if(m_dwScanLen%8) > { > a.mov(dword_ptr(edi,0),eax); > } > a.inc(ecx); > a.cmp(ecx,imm(m_cy)); > a.j(C_NOT_EQUAL,&L_Loop_Line); > > a.emms(); > } > else > { > // x86 version > Label L_Loop_Next_Line; > Label L_Loop_Line; > Label L_Loop_Pixel; > > // Code > a.mov(eax,ecx); // color in eax > a.xor_(ecx,ecx); // line counter > > a.bind(&L_Loop_Line); > a.mov(ebx,imm((SysUInt)(m_dwScanLen>>2))); > a.mov(edi,imm((SysUInt)m_bbuffer)); > a.mov(edi,dword_ptr(edi,ecx,2)); > > a.bind(&L_Loop_Pixel); > a.mov(dword_ptr(edi,0),eax); > a.dec(ebx); > a.j(C_ZERO, &L_Loop_Next_Line); > a.add(edi,4); > a.jmp(&L_Loop_Pixel); > > a.bind(&L_Loop_Next_Line); > a.inc(ecx); > a.cmp(ecx,imm(m_cy)); > a.j(C_NOT_EQUAL,&L_Loop_Line); > } > > // Epilog > a.pop(edi); > a.pop(esi); > a.mov(esp, ebp); > a.pop(ebp); > > a.ret(0); // fastcall, no args > > // Allocate execution enabled memory > m_JIT_clear_bb_32.build(a); > } > > reinterpret_cast<JIT_clear_bb>(m_JIT_clear_bb_32.ptr())(m_dwBGColor); > > #pragma warning(default : 4311 4312) > #endif > > I made a simple class to manage the JIT Data, still a work in progress: > > // > // Wrapper for JIT methods > // > struct JIT_METHOD > { > void *m_pJIT; > AsmJit::SysUInt m_cbJIT; > > JIT_METHOD() : > m_pJIT(NULL), > m_cbJIT(0x0) > { > } > > ~JIT_METHOD() > { > clear(); > } > > void clear() > { > if(m_pJIT) > AsmJit::VM::free(m_pJIT, m_cbJIT); > > m_pJIT = NULL; > m_cbJIT = 0x0; > } > > void build(AsmJit::X86 & a) > { > m_pJIT = AsmJit::VM::alloc(a.codeSize(), &m_cbJIT, /*canExecute*/ > true); > assert(m_pJIT); > memcpy(m_pJIT, a.pData, a.codeSize()); > } > > void *append(AsmJit::X86 & a) > { > AsmJit::SysUInt cbOldJIT = m_cbJIT; > AsmJit::SysUInt cbAlign = (m_cbJIT % 16); // align on > 16 byte boundry > AsmJit::SysUInt cbOldJITAlign = m_cbJIT + cbAlign; > void *pOrg = m_pJIT; > byte *pNew = > (byte*)AsmJit::VM::alloc(a.codeSize()+cbOldJITAlign, &m_cbJIT, > /*canExecute*/ true); > > memcpy(pNew, pOrg, cbOldJIT); > memset(pNew+cbOldJIT, 0, cbAlign); // clear buffer bytes > memcpy(pNew+cbOldJITAlign, a.pData, a.codeSize()); > > if(m_pJIT) > AsmJit::VM::free(m_pJIT, cbOldJIT); > > m_pJIT = (void*)pNew; > > return (void*)(pNew+cbOldJITAlign); // return appended method ptr - > note when this JIT is cleared, all the functions are cleared! > } > > __inline bool empty() > { > return 0x0 == m_cbJIT; > } > > // Get JIT function pointer > __inline void *ptr() > { > return m_pJIT; > } > > // Get JIT function pointer with offset > __inline void *ptr(AsmJit::SysUInt offset) > { > assert(offset<m_cbJIT); > return (void*)(((byte*)m_pJIT)+offset); > } > }; > > > > ------------------------------------------------------------------------------ > Create and Deploy Rich Internet Apps outside the browser with Adobe(R)AIR(TM) > software. With Adobe AIR, Ajax developers can use existing skills and code to > build responsive, highly engaging applications that combine the power of local > resources and data with the reach of the web. Download the Adobe AIR SDK and > Ajax docs to start building applications today-http://p.sf.net/sfu/adobe-com > _______________________________________________ > Vector-agg-general mailing list > Vector-agg-general@... > https://lists.sourceforge.net/lists/listinfo/vector-agg-general > ------------------------------------------------------------------------------ Create and Deploy Rich Internet Apps outside the browser with Adobe(R)AIR(TM) software. With Adobe AIR, Ajax developers can use existing skills and code to build responsive, highly engaging applications that combine the power of local resources and data with the reach of the web. Download the Adobe AIR SDK and Ajax docs to start building applications today-http://p.sf.net/sfu/adobe-com _______________________________________________ Vector-agg-general mailing list Vector-agg-general@... https://lists.sourceforge.net/lists/listinfo/vector-agg-general |
|
|
Re: About asmjit experance over the weekendHi Petr,
I did some timing with rdtsc and using punpckldq is actually slower then shifting and or'ing. Thanks for pointing out esi! I really don't need any Prolog for this function :-) > > Hi mike, > > thanks for your interest about project. I have created > asmjit-dev group in google for AsmJit development, so you can > join and we can discuss everything there > (http://groups.google.com/group/asmjit-dev/). > > I have also some notes about your code: > > 1) Instead of push/pop esi use ebx (must be preserved, esi > is not used) > 2) This code: > a.movd(mm1,eax); > a.movd(mm2,eax); > a.psllq(mm1,imm(32)); > a.por(mm1,mm2); > can be rewritten as: > a.movd(mm1, eax); > a.punpckldq(mm1, mm1); // this is good thick to unpack low > 4 bytes to higher ones > > So, new mailing list about AsmJit is asmjit-dev@.... > > Cheers > - Petr ------------------------------------------------------------------------------ Create and Deploy Rich Internet Apps outside the browser with Adobe(R)AIR(TM) software. With Adobe AIR, Ajax developers can use existing skills and code to build responsive, highly engaging applications that combine the power of local resources and data with the reach of the web. Download the Adobe AIR SDK and Ajax docs to start building applications today-http://p.sf.net/sfu/adobe-com _______________________________________________ Vector-agg-general mailing list Vector-agg-general@... https://lists.sourceforge.net/lists/listinfo/vector-agg-general |
| Free embeddable forum powered by Nabble | Forum Help |