可能是最快的算法alpha blend，有人能幫翻成delphi嗎?

發表時間：2005-06-03 17:40:07

IP:203.204.xxx.xxx 未訂閱

在網路上找到的一段AlphaBlend程式碼，不過不是delphi的，看看有沒有大大對這個有興趣的幫忙翻成delphi，想測測看是不是真的粉快，想來用用，謝謝 ==================================================================== 可能是最快的算法alpha blend??源代?，Intel官方提供 Intel官方网站有一?ablend_565的快速??算法，理?上是是把一?32bit RGBA渲染到16bit的buffer上，我的机器是PIII800,函?在system menory中?行，640*480的256?alpha blending，?到100fps，我想可以?足?大部分的要求了，在?里，我提供了??算法的?用，希望可以?大家有所?助。 ablend_565函?，源代?可以直接??使用，?需其他?函?，感?intel提供?么好的?西。首先，我提供一些本人??的把32bit tga文件?入pRGBABuffer的函? 文件尺寸保存在 width,height //----------------------------------------------------------------------- // Name: LoadTgaFile( TCHAR* strPathname, DWORD** pRGBABuffer, long* width, long* height ) // Desc: ?取32bit tga文件到DWORD??里，返回其尺寸 // Time: 2002.06.22 00:36 // Author: RealRender // Para: // Return: // Note: ?段代??自directx 7.0 sample中的d3dtextr.cpp，我把他提取了出? // 方便使用 //----------------------------------------------------------------------- BOOL LoadTgaFile( TCHAR* strPathname, DWORD** pRGBABuffer, long* width, long* height ) { FILE* file = fopen( strPathname, "rb" ); if( NULL == file ) return false; struct TargaHeader { BYTE IDLength; BYTE ColormapType; BYTE ImageType; BYTE ColormapSpecification[5]; WORD XOrigin; WORD YOrigin; WORD ImageWidth; WORD ImageHeight; BYTE PixelDepth; BYTE ImageDescriptor; } tga; fread( &tga, sizeof(TargaHeader), 1, file ); // Only true color, non-mapped images are supported if( ( 0 != tga.ColormapType ) || ( tga.ImageType != 10 && tga.ImageType != 2 ) ) { fclose( file ); return false; } // Skip the ID field. The first byte of the header is the length of this field if( tga.IDLength ) fseek( file, tga.IDLength, SEEK_CUR ); DWORD m_dwWidth = tga.ImageWidth; DWORD m_dwHeight = tga.ImageHeight; DWORD m_dwBPP = tga.PixelDepth; DWORD *m_pRGBAData = new DWORD[m_dwWidth*m_dwHeight]; if( m_pRGBAData == NULL ) { fclose(file); return false; } for( DWORD y=0; y>24)&0x000000ff); g = ((dwPixel>>16)&0x000000ff); b = ((dwPixel>> 8)&0x000000ff); a = ((dwPixel>> 0)&0x000000ff); alpha[i] = a; // 888i?化?565 color[i] = RGBTo16( r, g, b ); } *pAlpha = alpha; *pBitmap = color; } // ???intel官方提供的函?，函?的描述，用我的???就是把一??有256?alpha通道的565?色?据?制到16位目??面。函??明： unsigned char *lpAlpha, // 256 ?alpha通道 unsigned int iAlpPitch, // alpha通道的pitch unsigned char *lpSrc, // 原色彩?? unsigned int iSrcX, // unsigned int iSrcY, // 原色彩位置 unsigned int iSrcPitch, // 原色彩pitch unsigned char *lpDst, // 目??? unsigned int iDstX, unsigned int iDstY, // 目?位置 unsigned int iDstW, unsigned int iDstH, // 目???的尺寸 unsigned int iDstPitch // 目???的pitch void ablend_565(unsigned char *lpAlpha,unsigned int iAlpPitch, unsigned char *lpSrc,unsigned int iSrcX, unsigned int iSrcY, unsigned int iSrcPitch, unsigned char *lpDst, unsigned int iDstX, unsigned int iDstY, unsigned int iDstW, unsigned int iDstH, unsigned int iDstPitch) { //Mask for isolating the red,green, and blue components static __int64 MASKB=0x001F001F001F001F; static __int64 MASKG=0x07E007E007E007E0; static __int64 MASKSHIFTG=0x03F003F003F003F0; static __int64 MASKR=0xF800F800F800F800; //constants used by the integer alpha blending equation static __int64 SIXTEEN=0x0010001000100010; static __int64 FIVETWELVE=0x0200020002000200; static __int64 SIXONES=0x003F003F003F003F; unsigned char *lpLinearDstBp=(iDstX<<1) (iDstY*iDstPitch) lpDst; //base pointer for linear destination unsigned char *lpLinearSrcBp=(iSrcX<<1) (iSrcY*iSrcPitch) lpSrc; //base pointer for linear source unsigned char *lpLinearAlpBp=iSrcX (iSrcY*iAlpPitch) lpAlpha; //base pointer for linear alpha _asm{ mov esi,lpLinearSrcBp; //src mov edi,lpLinearDstBp; //dst mov eax,lpLinearAlpBp; //alpha mov ecx,iDstH; //ecx=number of lines to copy mov ebx,iDstW; //ebx=span width to copy test esi,6; //check if source address is qword aligned //since addr coming in is always word aligned(16bit) jnz done; //if not qword aligned we don't do anything primeloop: movd mm1,[eax]; //mm1=00 00 00 00 a3 a2 a1 a0 pxor mm2,mm2; //mm2=0; movq mm4,[esi]; //g1: mm4=src3 src2 src1 src0 punpcklbw mm1,mm2; //mm1=00a3 00a2 00a1 00a0 loopqword: mov edx,[eax]; test ebx,0xFFFFFFFC; //check if only 3 pixels left jz checkback; //3 or less pixels left //early out tests cmp edx,0xffffffff; //test for alpha value of 1 je copyback; //if 1's copy the source pixels to the destination test edx,0xffffffff; //test for alpha value of 0 jz leavefront; //if so go to the next 4 pixels //the alpha blend starts //green //i=a*sg (63-a)*dg; //i=(i 32) ((i 32)>>6)>>6; //red //i=a*sr (31-a)*dr; //i=(i 16) ((i 16)>>5)>>5; movq mm5,[edi]; //g2: mm5=dst3 dst2 dst1 dst0 psrlw mm1,2; //mm1=a?>>2 nuke out lower 2 bits movq mm7,MASKSHIFTG; //g3: mm7=1 bit shifted green mask psrlw mm4,1; //g3a: move src green down by 1 so that we won't overflow movq mm0,mm1; //mm0=00a3 00a2 00a1 00a0 psrlw mm5,1; //g3b: move dst green down by 1 so that we won't overflow psrlw mm1,1; //mm1=a?>>1 nuke out lower 1 bits pand mm4,mm7; //g5: mm4=sg3 sg2 sg1 sg0 movq mm2,SIXONES;//g4: mm2=63 pand mm5,mm7; //g7: mm5=dg3 dg2 dg1 dg0 movq mm3,[esi]; //b1: mm3=src3 src2 src1 src0 psubsb mm2,mm0; //g6: mm2=63-a3 63-a2 63-a1 63-a0 movq mm7,MASKB; //b2: mm7=BLUE MASK pmullw mm4,mm0; //g8: mm4=sg?*a? movq mm0,[edi]; //b3: mm0=dst3 dst2 dst1 dst0 pmullw mm5,mm2; //g9: mm5=dg?*(1-a?) movq mm2,mm7; //b4: mm2=fiveones pand mm3,mm7; //b4: mm3=sb3 sb2 sb1 sb0 pmullw mm3,mm1; //b6: mm3=sb?*a? pand mm0,mm7; //b5: mm0=db3 db2 db1 db0 movq mm7,[esi]; //r1: mm7=src3 src2 src1 src0 paddw mm4,mm5; //g10: mm4=sg?*a? dg?*(1-a?) pand mm7,MASKR; //r2: mm7=sr3 sr2 sr1 sr0 psubsb mm2,mm1; //b5a: mm2=31-a3 31-a2 31-a1 31-a0 paddw mm4,FIVETWELVE; //g11: mm4=(mm4 512) green pmullw mm0,mm2; //b7: mm0=db?*(1-a?) movq mm5,mm4; //g12: mm5=mm4 green psrlw mm7,11; //r4: shift src red down to position 0 psrlw mm4,6; //g13: mm4=mm4>>6 paddw mm4,mm5; //g14: mm4=mm4 mm5 green paddw mm0,mm3; //b8: mm0=sb?*a? db?*(1-a?) movq mm5,[edi]; //r3: mm5=dst3 dst2 dst1 dst0 paddw mm0,SIXTEEN; //b9: mm0=(mm0 16) blue pand mm5,MASKR; //r5: mm5=dr3 dr2 dr1 dr0 psrlw mm4,5; //g15: mm4=0?g0 0?g0 0?g0 0?g0 green movq mm3,mm0; //b10: mm3=mm0 blue psrlw mm0,5; //b11: mm0=mm0>>5 blue psrlw mm5,11; //r6: shift dst red down to position 0 paddw mm0,mm3; //b12: mm0=mm3 mm0 blue psrlw mm0,5; //b13: mm0=000b 000b 000b 000b blue pmullw mm7,mm1; //mm7=sr?*a? pand mm4,MASKG; //g16: mm4=00g0 00g0 00g0 00g0 green pmullw mm5,mm2; //r7: mm5=dr?*(31-a?) por mm0,mm4; //mm0=00gb 00gb 00gb 00gb add eax,4; //move to next 4 alphas add esi,8; //move to next 4 pixels in src add edi,8; //move to next 4 pixels in dst movd mm1,[eax]; //mm1=00 00 00 00 a3 a2 a1 a0 paddw mm5,mm7; //r8: mm5=sr?*a? dr?*(31-a?) paddw mm5,SIXTEEN; //r9: mm5=(mm5 16) red pxor mm2,mm2; //mm2=0; movq mm7,mm5; //r10: mm7=mm5 red psrlw mm5,5; //r11: mm5=mm5>>5 red movq mm4,[esi]; //g1: mm4=src3 src2 src1 src0 paddw mm5,mm7; //r12: mm5=mm7 mm5 red punpcklbw mm1,mm2; //mm1=00a3 00a2 00a1 00a0 psrlw mm5,5; //r13: mm5=mm5>>5 red psllw mm5,11; //r14: mm5=mm5<<10 red por mm0,mm5; //mm0=0rgb 0rgb 0rgb 0rgb sub ebx,4; //polished off 4 pixels movq [edi-8],mm0; //dst=0rgb 0rgb 0rgb 0rgb jmp loopqword; //go back to start copyback: movq [edi],mm4; //copy source to destination leavefront: add edi,8; //advance destination by 4 pixels add eax,4; //advance alpha by 4 add esi,8; //advance source by 4 pixels sub ebx,4; //decrease pixel count by 4 jmp primeloop; checkback: test ebx,0xFF; //check if 0 pixels left jz nextline; //done with this span //backalign: //work out back end pixels movq mm5,[edi]; //g2: mm5=dst3 dst2 dst1 dst0 psrlw mm1,2; //mm1=a?>>2 nuke out lower 2 bits movq mm7,MASKSHIFTG; //g3: mm7=shift 1 bit green mask psrlw mm4,1; //g3a: move src green down by 1 so that we won't overflow movq mm0,mm1; //mm0=00a3 00a2 00a1 00a0 psrlw mm5,1; //g3b: move dst green down by 1 so that we won't overflow psrlw mm1,1; //mm1=a?>>1 nuke out lower 1 bits pand mm4,mm7; //g5: mm4=sg3 sg2 sg1 sg0 movq mm2,SIXONES;//g4: mm2=63 pand mm5,mm7; //g7: mm5=dg3 dg2 dg1 dg0 movq mm3,[esi]; //b1: mm3=src3 src2 src1 src0 psubsb mm2,mm0; //g6: mm2=63-a3 63-a2 63-a1 63-a0 movq mm7,MASKB; //b2: mm7=BLUE MASK pmullw mm4,mm0; //g8: mm4=sg?*a? movq mm0,[edi]; //b3: mm0=dst3 dst2 dst1 dst0 pmullw mm5,mm2; //g9: mm5=dg?*(1-a?) movq mm2,mm7; //b4: mm2=fiveones pand mm3,mm7; //b4: mm3=sr3 sr2 sr1 sr0 pmullw mm3,mm1; //b6: mm3=sb?*a? pand mm0,mm7; //b5: mm0=db3 db2 db1 db0 movq mm7,[esi]; //r1: mm7=src3 src2 src1 src0 paddw mm4,mm5; //g10: mm4=sg?*a? dg?*(1-a?) pand mm7,MASKR; //r2: mm7=sr3 sr2 sr1 sr0 psubsb mm2,mm1; //b5a: mm2=31-a3 31-a2 31-a1 31-a0 paddw mm4,FIVETWELVE; //g11: mm4=(i 512) green pmullw mm0,mm2; //b7: mm0=db?*(1-a?) movq mm5,mm4; //g12: mm5=(i 512) green psrlw mm7,11; //r4: shift src red down to position 0 psrlw mm4,6; //g13: mm4=(i 512)>>6 paddw mm4,mm5; //g14: mm4=(i 512) ((i 512)>>6) green paddw mm0,mm3; //b8: mm0=sb?*a? db?*(1-a?) movq mm5,[edi]; //r3: mm5=dst3 dst2 dst1 dst0 paddw mm0,SIXTEEN; //b9: mm0=(i 16) blue pand mm5,MASKR; //r5: mm5=dr3 dr2 dr1 dr0 psrlw mm4,5; //g15: mm4=0?g0 0?g0 0?g0 0?g0 green movq mm3,mm0; //b10: mm3=(i 16) blue psrlw mm0,5; //b11: mm0=(i 16)>>5 blue psrlw mm5,11; //r6: shift dst red down to position 0 paddw mm0,mm3; //b12: mm0=(i 16) (i 16)>>5 blue psrlw mm0,5; //b13: mm0=000r 000r 000r 000r blue pmullw mm7,mm1; //mm7=sr?*a? pand mm4,MASKG; //g16: mm4=00g0 00g0 00g0 00g0 green pmullw mm5,mm2; //r7: mm5=dr?*(31-a?) por mm0,mm4; //mm0=00gb 00gb 00gb 00gb add eax,4; //move to next 4 alphas //stall paddw mm5,mm7; //r8: mm5=sr?*a? dr?*(31-a?) paddw mm5,SIXTEEN; //r9: mm5=(i 16) red movq mm7,mm5; //r10: mm7=(i 16) red psrlw mm5,5; //r11: mm5=(i 16)>>5 red paddw mm5,mm7; //r12: mm5=(i 16) ((i 16)>>5) red psrlw mm5,5; //r13: mm5=(i 16) ((i 16)>>5)>>5 red psllw mm5,11; //r14: mm5=mm5<<10 red por mm0,mm5; //mm0=0rgb 0rgb 0rgb 0rgb test ebx,2; //check if there are 2 pixels jz oneendpixel; //goto one pixel if that's it movd [edi],mm0; //dst=0000 0000 0rgb 0rgb psrlq mm0,32; //mm0>>32 add edi,4; //edi=edi 4 sub ebx,2; //saved 2 pixels jz nextline; //all done goto next line oneendpixel: //work on last pixel movd edx,mm0; //edx=0rgb mov [edi],dx; //dst=0rgb nextline: //goto next line dec ecx; //nuke one line jz done; //all done mov eax,lpLinearAlpBp; //alpha mov esi,lpLinearSrcBp; //src mov edi,lpLinearDstBp; //dst add eax,iAlpPitch; //inc alpha ptr by 1 line add esi,iSrcPitch; //inc src ptr by 1 line add edi,iDstPitch; //inc dst ptr by 1 line mov lpLinearAlpBp,eax; //save new alpha base ptr mov ebx,iDstW; //ebx=span width to copy mov lpLinearSrcBp,esi; //save new src base ptr mov lpLinearDstBp,edi; //save new dst base ptr jmp primeloop; //start the next span done: emms } }

iamjsn 初階會員發表：78 回覆：95 積分：44 註冊：2002-08-16 發送簡訊給我	#1 引用回覆回覆發表時間：2005-06-03 17:40:07 IP:203.204.xxx.xxx 未訂閱在網路上找到的一段AlphaBlend程式碼，不過不是delphi的，看看有沒有大大對這個有興趣的幫忙翻成delphi，想測測看是不是真的粉快，想來用用，謝謝 ==================================================================== 可能是最快的算法alpha blend??源代?，Intel官方提供 Intel官方网站有一?ablend_565的快速??算法，理?上是是把一?32bit RGBA渲染到16bit的buffer上，我的机器是PIII800,函?在system menory中?行，640480的256?alpha blending，?到100fps，我想可以?足?大部分的要求了，在?里，我提供了??算法的?用，希望可以?大家有所?助。 ablend_565函?，源代?可以直接??使用，?需其他?函?，感?intel提供?么好的?西。首先，我提供一些本人??的把32bit tga文件?入pRGBABuffer的函? 文件尺寸保存在 width,height //----------------------------------------------------------------------- // Name: LoadTgaFile( TCHAR strPathname, DWORD** pRGBABuffer, long* width, long* height ) // Desc: ?取32bit tga文件到DWORD??里，返回其尺寸 // Time: 2002.06.22 00:36 // Author: RealRender // Para: // Return: // Note: ?段代??自directx 7.0 sample中的d3dtextr.cpp，我把他提取了出? // 方便使用 //----------------------------------------------------------------------- BOOL LoadTgaFile( TCHAR* strPathname, DWORD** pRGBABuffer, long* width, long* height ) { FILE* file = fopen( strPathname, "rb" ); if( NULL == file ) return false; struct TargaHeader { BYTE IDLength; BYTE ColormapType; BYTE ImageType; BYTE ColormapSpecification[5]; WORD XOrigin; WORD YOrigin; WORD ImageWidth; WORD ImageHeight; BYTE PixelDepth; BYTE ImageDescriptor; } tga; fread( &tga, sizeof(TargaHeader), 1, file ); // Only true color, non-mapped images are supported if( ( 0 != tga.ColormapType ) \|\| ( tga.ImageType != 10 && tga.ImageType != 2 ) ) { fclose( file ); return false; } // Skip the ID field. The first byte of the header is the length of this field if( tga.IDLength ) fseek( file, tga.IDLength, SEEK_CUR ); DWORD m_dwWidth = tga.ImageWidth; DWORD m_dwHeight = tga.ImageHeight; DWORD m_dwBPP = tga.PixelDepth; DWORD m_pRGBAData = new DWORD[m_dwWidthm_dwHeight]; if( m_pRGBAData == NULL ) { fclose(file); return false; } for( DWORD y=0; y>24)&0x000000ff); g = ((dwPixel>>16)&0x000000ff); b = ((dwPixel>> 8)&0x000000ff); a = ((dwPixel>> 0)&0x000000ff); alpha[i] = a; // 888i?化?565 color[i] = RGBTo16( r, g, b ); } pAlpha = alpha; pBitmap = color; } // ???intel官方提供的函?，函?的描述，用我的???就是把一??有256?alpha通道的565?色?据?制到16位目??面。函??明： unsigned char lpAlpha, // 256 ?alpha通道 unsigned int iAlpPitch, // alpha通道的pitch unsigned char lpSrc, // 原色彩?? unsigned int iSrcX, // unsigned int iSrcY, // 原色彩位置 unsigned int iSrcPitch, // 原色彩pitch unsigned char lpDst, // 目??? unsigned int iDstX, unsigned int iDstY, // 目?位置 unsigned int iDstW, unsigned int iDstH, // 目???的尺寸 unsigned int iDstPitch // 目???的pitch void ablend_565(unsigned char lpAlpha,unsigned int iAlpPitch, unsigned char lpSrc,unsigned int iSrcX, unsigned int iSrcY, unsigned int iSrcPitch, unsigned char lpDst, unsigned int iDstX, unsigned int iDstY, unsigned int iDstW, unsigned int iDstH, unsigned int iDstPitch) { //Mask for isolating the red,green, and blue components static __int64 MASKB=0x001F001F001F001F; static __int64 MASKG=0x07E007E007E007E0; static __int64 MASKSHIFTG=0x03F003F003F003F0; static __int64 MASKR=0xF800F800F800F800; //constants used by the integer alpha blending equation static __int64 SIXTEEN=0x0010001000100010; static __int64 FIVETWELVE=0x0200020002000200; static __int64 SIXONES=0x003F003F003F003F; unsigned char lpLinearDstBp=(iDstX<<1) (iDstYiDstPitch) lpDst; //base pointer for linear destination unsigned char lpLinearSrcBp=(iSrcX<<1) (iSrcYiSrcPitch) lpSrc; //base pointer for linear source unsigned char lpLinearAlpBp=iSrcX (iSrcYiAlpPitch) lpAlpha; //base pointer for linear alpha _asm{ mov esi,lpLinearSrcBp; //src mov edi,lpLinearDstBp; //dst mov eax,lpLinearAlpBp; //alpha mov ecx,iDstH; //ecx=number of lines to copy mov ebx,iDstW; //ebx=span width to copy test esi,6; //check if source address is qword aligned //since addr coming in is always word aligned(16bit) jnz done; //if not qword aligned we don't do anything primeloop: movd mm1,[eax]; //mm1=00 00 00 00 a3 a2 a1 a0 pxor mm2,mm2; //mm2=0; movq mm4,[esi]; //g1: mm4=src3 src2 src1 src0 punpcklbw mm1,mm2; //mm1=00a3 00a2 00a1 00a0 loopqword: mov edx,[eax]; test ebx,0xFFFFFFFC; //check if only 3 pixels left jz checkback; //3 or less pixels left //early out tests cmp edx,0xffffffff; //test for alpha value of 1 je copyback; //if 1's copy the source pixels to the destination test edx,0xffffffff; //test for alpha value of 0 jz leavefront; //if so go to the next 4 pixels //the alpha blend starts //green //i=asg (63-a)dg; //i=(i 32) ((i 32)>>6)>>6; //red //i=asr (31-a)dr; //i=(i 16) ((i 16)>>5)>>5; movq mm5,[edi]; //g2: mm5=dst3 dst2 dst1 dst0 psrlw mm1,2; //mm1=a?>>2 nuke out lower 2 bits movq mm7,MASKSHIFTG; //g3: mm7=1 bit shifted green mask psrlw mm4,1; //g3a: move src green down by 1 so that we won't overflow movq mm0,mm1; //mm0=00a3 00a2 00a1 00a0 psrlw mm5,1; //g3b: move dst green down by 1 so that we won't overflow psrlw mm1,1; //mm1=a?>>1 nuke out lower 1 bits pand mm4,mm7; //g5: mm4=sg3 sg2 sg1 sg0 movq mm2,SIXONES;//g4: mm2=63 pand mm5,mm7; //g7: mm5=dg3 dg2 dg1 dg0 movq mm3,[esi]; //b1: mm3=src3 src2 src1 src0 psubsb mm2,mm0; //g6: mm2=63-a3 63-a2 63-a1 63-a0 movq mm7,MASKB; //b2: mm7=BLUE MASK pmullw mm4,mm0; //g8: mm4=sg?a? movq mm0,[edi]; //b3: mm0=dst3 dst2 dst1 dst0 pmullw mm5,mm2; //g9: mm5=dg?(1-a?) movq mm2,mm7; //b4: mm2=fiveones pand mm3,mm7; //b4: mm3=sb3 sb2 sb1 sb0 pmullw mm3,mm1; //b6: mm3=sb?a? pand mm0,mm7; //b5: mm0=db3 db2 db1 db0 movq mm7,[esi]; //r1: mm7=src3 src2 src1 src0 paddw mm4,mm5; //g10: mm4=sg?a? dg?(1-a?) pand mm7,MASKR; //r2: mm7=sr3 sr2 sr1 sr0 psubsb mm2,mm1; //b5a: mm2=31-a3 31-a2 31-a1 31-a0 paddw mm4,FIVETWELVE; //g11: mm4=(mm4 512) green pmullw mm0,mm2; //b7: mm0=db?(1-a?) movq mm5,mm4; //g12: mm5=mm4 green psrlw mm7,11; //r4: shift src red down to position 0 psrlw mm4,6; //g13: mm4=mm4>>6 paddw mm4,mm5; //g14: mm4=mm4 mm5 green paddw mm0,mm3; //b8: mm0=sb?a? db?(1-a?) movq mm5,[edi]; //r3: mm5=dst3 dst2 dst1 dst0 paddw mm0,SIXTEEN; //b9: mm0=(mm0 16) blue pand mm5,MASKR; //r5: mm5=dr3 dr2 dr1 dr0 psrlw mm4,5; //g15: mm4=0?g0 0?g0 0?g0 0?g0 green movq mm3,mm0; //b10: mm3=mm0 blue psrlw mm0,5; //b11: mm0=mm0>>5 blue psrlw mm5,11; //r6: shift dst red down to position 0 paddw mm0,mm3; //b12: mm0=mm3 mm0 blue psrlw mm0,5; //b13: mm0=000b 000b 000b 000b blue pmullw mm7,mm1; //mm7=sr?a? pand mm4,MASKG; //g16: mm4=00g0 00g0 00g0 00g0 green pmullw mm5,mm2; //r7: mm5=dr?(31-a?) por mm0,mm4; //mm0=00gb 00gb 00gb 00gb add eax,4; //move to next 4 alphas add esi,8; //move to next 4 pixels in src add edi,8; //move to next 4 pixels in dst movd mm1,[eax]; //mm1=00 00 00 00 a3 a2 a1 a0 paddw mm5,mm7; //r8: mm5=sr?a? dr?(31-a?) paddw mm5,SIXTEEN; //r9: mm5=(mm5 16) red pxor mm2,mm2; //mm2=0; movq mm7,mm5; //r10: mm7=mm5 red psrlw mm5,5; //r11: mm5=mm5>>5 red movq mm4,[esi]; //g1: mm4=src3 src2 src1 src0 paddw mm5,mm7; //r12: mm5=mm7 mm5 red punpcklbw mm1,mm2; //mm1=00a3 00a2 00a1 00a0 psrlw mm5,5; //r13: mm5=mm5>>5 red psllw mm5,11; //r14: mm5=mm5<<10 red por mm0,mm5; //mm0=0rgb 0rgb 0rgb 0rgb sub ebx,4; //polished off 4 pixels movq [edi-8],mm0; //dst=0rgb 0rgb 0rgb 0rgb jmp loopqword; //go back to start copyback: movq [edi],mm4; //copy source to destination leavefront: add edi,8; //advance destination by 4 pixels add eax,4; //advance alpha by 4 add esi,8; //advance source by 4 pixels sub ebx,4; //decrease pixel count by 4 jmp primeloop; checkback: test ebx,0xFF; //check if 0 pixels left jz nextline; //done with this span //backalign: //work out back end pixels movq mm5,[edi]; //g2: mm5=dst3 dst2 dst1 dst0 psrlw mm1,2; //mm1=a?>>2 nuke out lower 2 bits movq mm7,MASKSHIFTG; //g3: mm7=shift 1 bit green mask psrlw mm4,1; //g3a: move src green down by 1 so that we won't overflow movq mm0,mm1; //mm0=00a3 00a2 00a1 00a0 psrlw mm5,1; //g3b: move dst green down by 1 so that we won't overflow psrlw mm1,1; //mm1=a?>>1 nuke out lower 1 bits pand mm4,mm7; //g5: mm4=sg3 sg2 sg1 sg0 movq mm2,SIXONES;//g4: mm2=63 pand mm5,mm7; //g7: mm5=dg3 dg2 dg1 dg0 movq mm3,[esi]; //b1: mm3=src3 src2 src1 src0 psubsb mm2,mm0; //g6: mm2=63-a3 63-a2 63-a1 63-a0 movq mm7,MASKB; //b2: mm7=BLUE MASK pmullw mm4,mm0; //g8: mm4=sg?a? movq mm0,[edi]; //b3: mm0=dst3 dst2 dst1 dst0 pmullw mm5,mm2; //g9: mm5=dg?(1-a?) movq mm2,mm7; //b4: mm2=fiveones pand mm3,mm7; //b4: mm3=sr3 sr2 sr1 sr0 pmullw mm3,mm1; //b6: mm3=sb?a? pand mm0,mm7; //b5: mm0=db3 db2 db1 db0 movq mm7,[esi]; //r1: mm7=src3 src2 src1 src0 paddw mm4,mm5; //g10: mm4=sg?a? dg?(1-a?) pand mm7,MASKR; //r2: mm7=sr3 sr2 sr1 sr0 psubsb mm2,mm1; //b5a: mm2=31-a3 31-a2 31-a1 31-a0 paddw mm4,FIVETWELVE; //g11: mm4=(i 512) green pmullw mm0,mm2; //b7: mm0=db?(1-a?) movq mm5,mm4; //g12: mm5=(i 512) green psrlw mm7,11; //r4: shift src red down to position 0 psrlw mm4,6; //g13: mm4=(i 512)>>6 paddw mm4,mm5; //g14: mm4=(i 512) ((i 512)>>6) green paddw mm0,mm3; //b8: mm0=sb?a? db?(1-a?) movq mm5,[edi]; //r3: mm5=dst3 dst2 dst1 dst0 paddw mm0,SIXTEEN; //b9: mm0=(i 16) blue pand mm5,MASKR; //r5: mm5=dr3 dr2 dr1 dr0 psrlw mm4,5; //g15: mm4=0?g0 0?g0 0?g0 0?g0 green movq mm3,mm0; //b10: mm3=(i 16) blue psrlw mm0,5; //b11: mm0=(i 16)>>5 blue psrlw mm5,11; //r6: shift dst red down to position 0 paddw mm0,mm3; //b12: mm0=(i 16) (i 16)>>5 blue psrlw mm0,5; //b13: mm0=000r 000r 000r 000r blue pmullw mm7,mm1; //mm7=sr?a? pand mm4,MASKG; //g16: mm4=00g0 00g0 00g0 00g0 green pmullw mm5,mm2; //r7: mm5=dr?(31-a?) por mm0,mm4; //mm0=00gb 00gb 00gb 00gb add eax,4; //move to next 4 alphas //stall paddw mm5,mm7; //r8: mm5=sr?a? dr?(31-a?) paddw mm5,SIXTEEN; //r9: mm5=(i 16) red movq mm7,mm5; //r10: mm7=(i 16) red psrlw mm5,5; //r11: mm5=(i 16)>>5 red paddw mm5,mm7; //r12: mm5=(i 16) ((i 16)>>5) red psrlw mm5,5; //r13: mm5=(i 16) ((i 16)>>5)>>5 red psllw mm5,11; //r14: mm5=mm5<<10 red por mm0,mm5; //mm0=0rgb 0rgb 0rgb 0rgb test ebx,2; //check if there are 2 pixels jz oneendpixel; //goto one pixel if that's it movd [edi],mm0; //dst=0000 0000 0rgb 0rgb psrlq mm0,32; //mm0>>32 add edi,4; //edi=edi 4 sub ebx,2; //saved 2 pixels jz nextline; //all done goto next line oneendpixel: //work on last pixel movd edx,mm0; //edx=0rgb mov [edi],dx; //dst=0rgb nextline: //goto next line dec ecx; //nuke one line jz done; //all done mov eax,lpLinearAlpBp; //alpha mov esi,lpLinearSrcBp; //src mov edi,lpLinearDstBp; //dst add eax,iAlpPitch; //inc alpha ptr by 1 line add esi,iSrcPitch; //inc src ptr by 1 line add edi,iDstPitch; //inc dst ptr by 1 line mov lpLinearAlpBp,eax; //save new alpha base ptr mov ebx,iDstW; //ebx=span width to copy mov lpLinearSrcBp,esi; //save new src base ptr mov lpLinearDstBp,edi; //save new dst base ptr jmp primeloop; //start the next span done: emms } }
taishyang 站務副站長發表：377 回覆：5490 積分：4563 註冊：2002-10-08 發送簡訊給我	#2 引用回覆回覆發表時間：2005-06-03 17:44:23 IP:210.68.xxx.xxx 未訂閱您好: PO程式碼的方式請參考版規說明,煩請修改謝謝您的配合 >
iamjsn 初階會員發表：78 回覆：95 積分：44 註冊：2002-08-16 發送簡訊給我	#3 引用回覆回覆發表時間：2005-06-07 19:11:24 IP:203.204.xxx.xxx 未訂閱程式碼重貼過了