123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693 |
- /****************************************************************************
- *
- * Module Title : preproc.c
- *
- * Description : Simple pre-processor.
- *
- ****************************************************************************/
- /****************************************************************************
- * Header Files
- ****************************************************************************/
- #include "memory.h"
- #include "preproc.h"
- /****************************************************************************
- * Macros
- ****************************************************************************/
- #define FRAMECOUNT 7
- #define ROUNDUP32(X) ( ( ( (unsigned long) X ) + 31 )&( 0xFFFFFFE0 ) )
- /****************************************************************************
- * Imports
- ****************************************************************************/
- extern void GetProcessorFlags (int *MmxEnabled, int *XmmEnabled, int *WmtEnabled );
- /****************************************************************************
- * Exported Global Variables
- ****************************************************************************/
- void (*tempFilter)( PreProcInstance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength );
- #ifndef MAPCA
- /****************************************************************************
- *
- * ROUTINE : spatialFilter_wmt
- *
- * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
- * unsigned char *s : Pointer to source frame.
- * unsigned char *d : Pointer to destination frame.
- * int width : WIdth of images.
- * int height : Height of images.
- * int pitch : Stride of images.
- * int strength : Strength of filter to apply.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs a closesness adjusted temporarl blur
- *
- * SPECIAL NOTES : Destination frame can be same as source frame.
- *
- ****************************************************************************/
- void spatialFilter_wmt
- (
- PreProcInstance *ppi,
- unsigned char *s,
- unsigned char *d,
- int width,
- int height,
- int pitch,
- int strength
- )
- {
- int i;
- int row = 1;
- int PixelOffsets[] =
- {
- -pitch-1, -pitch, -pitch+1,
- -1, 0, +1,
- pitch-1, pitch, pitch+1
- };
- unsigned char *frameptr = ppi->frameBuffer;
-
- __declspec(align(16)) unsigned short threes[] = { 3, 3, 3, 3, 3, 3, 3, 3};
- __declspec(align(16)) unsigned short sixteens[]= {16,16,16,16,16,16,16,16};
- memcpy ( d, s, width );
-
- d += pitch;
- s += pitch;
-
- do
- {
- // NOTE: By doing it this way I am ensuring that pixels will always be unaligned!!!
- int col = 1;
- d[0] = s[0];
- d[width - 1] = s[width - 1];
- do
- {
- __declspec(align(16)) unsigned short counts[8];
- __declspec(align(16)) unsigned short sums[8];
- _asm
- {
- mov esi, s // get the source line
- add esi, col // add the column offset
- pxor xmm1,xmm1 // accumulator
- pxor xmm2,xmm2 // count
- pxor xmm7,xmm7 // 0s for use with unpack
- movq xmm3, QWORD PTR [esi] // get 8 pixels
- punpcklbw xmm3, xmm7 // unpack to shorts
- xor eax, eax // neighbor iterator
- NextNeighbor:
- mov ecx, [PixelOffsets+eax*4] // get eax index pixel neighbor offset
- movq xmm4, QWORD PTR [esi + ecx] // get ecx index neighbor values
- punpcklbw xmm4, xmm7 // xmm4 unpacked neighbor values
- movdqa xmm6, xmm4 // save the pixel values
- psubsw xmm4, xmm3 // subtracted pixel values
- pmullw xmm4, xmm4 // square xmm4
- movd xmm5, strength
- psrlw xmm4, xmm5 // should be strength
- pmullw xmm4, threes // 3 * modifier
- movdqa xmm5, sixteens // 16s
- psubusw xmm5, xmm4 // 16 - modifiers
- movdqa xmm4, xmm5 // save the modifiers
- pmullw xmm4, xmm6 // multiplier values
- paddusw xmm1, xmm4 // accumulator
- paddusw xmm2, xmm5 // count
- inc eax // next neighbor
- cmp eax,9 // there are nine neigbors
- jne NextNeighbor
- movdqa counts, xmm2
- psrlw xmm2,1 // divide count by 2 for rounding
- paddusw xmm1,xmm2 // rounding added in
- mov frameptr,esi
- movdqa sums, xmm1
- }
-
- for ( i=0; i<8; i++ )
- {
- int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
- blurvalue >>= 16;
- d[col+i] = blurvalue;
- }
- col += 8;
- } while ( col<width-1 );
- d += pitch;
- s += pitch;
- ++row;
- } while ( row<height-1 );
- memcpy ( d, s, width );
- __asm emms
- }
- #endif
- /****************************************************************************
- *
- * ROUTINE : tempFilter_c
- *
- * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
- * unsigned char *s : Pointer to source frame.
- * unsigned char *d : Pointer to destination frame.
- * int bytes : Number of bytes to filter.
- * int strength : Strength of filter to apply.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs a closesness adjusted temporarl blur
- *
- * SPECIAL NOTES : Destination frame can be same as source frame.
- *
- ****************************************************************************/
- void tempFilter_c
- (
- PreProcInstance *ppi,
- unsigned char *s,
- unsigned char *d,
- int bytes,
- int strength
- )
- {
- int byte = 0;
- unsigned char *frameptr = ppi->frameBuffer;
- if ( ppi->frame == 0 )
- {
- do
- {
- int frame = 0;
- do
- {
- *frameptr = s[byte];
- ++frameptr;
- ++frame;
- } while ( frame < FRAMECOUNT );
-
- d[byte] = s[byte];
-
- ++byte;
- } while ( byte < bytes );
- }
- else
- {
- int modifier;
- int offset = (ppi->frame % FRAMECOUNT);
- do
- {
- int accumulator = 0;
- int count = 0;
- int frame = 0;
-
- frameptr[offset] = s[byte];
- do
- {
- int pixelValue = *frameptr;
-
- modifier = s[byte];
- modifier -= pixelValue;
- modifier *= modifier;
- modifier >>= strength;
- modifier *= 3;
- if(modifier > 16)
- modifier = 16;
-
- modifier = 16 - modifier;
-
- accumulator += modifier * pixelValue;
-
- count += modifier;
-
- frameptr++;
-
- ++frame;
- } while ( frame < FRAMECOUNT );
-
- accumulator += (count >> 1);
- accumulator *= ppi->fixedDivide[count];
- accumulator >>= 16;
- d[byte] = accumulator;
-
- ++byte;
- } while ( byte < bytes );
- }
- ++ppi->frame;
- }
- #ifndef MAPCA
- /****************************************************************************
- *
- * ROUTINE : tempFilter_wmt
- *
- * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
- * unsigned char *s : Pointer to source frame.
- * unsigned char *d : Pointer to destination frame.
- * int bytes : Number of bytes to filter.
- * int strength : Strength of filter to apply.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs a closesness adjusted temporarl blur
- *
- * SPECIAL NOTES : Destination frame can be same as source frame.
- *
- ****************************************************************************/
- void tempFilter_wmt
- (
- PreProcInstance *ppi,
- unsigned char *s,
- unsigned char *d,
- int bytes,
- int strength
- )
- {
- int byte = 0;
- unsigned char * frameptr = ppi->frameBuffer;
- __declspec(align(16)) unsigned short threes[] ={ 3, 3, 3, 3, 3, 3, 3, 3};
- __declspec(align(16)) unsigned short sixteens[]={16,16,16,16,16,16,16,16};
- if ( ppi->frame == 0 )
- {
- do
- {
- int i;
- int frame = 0;
-
- do
- {
- for ( i=0; i<8; i++ )
- {
- *frameptr = s[byte+i];
- ++frameptr;
- }
- ++frame;
- } while ( frame < FRAMECOUNT );
-
- for ( i=0; i<8; i++ )
- d[byte+i] = s[byte+i];
- byte += 8;
-
- } while ( byte < bytes );
- }
- else
- {
- int i;
- int offset2 = (ppi->frame % FRAMECOUNT);
-
- do
- {
- __declspec(align(16)) unsigned short counts[8];
- __declspec(align(16)) unsigned short sums[8];
- int accumulator = 0;
- int count = 0;
- int frame = 0;
- _asm
- {
- mov eax,offset2
- mov edi,s // source pixels
- pxor xmm1,xmm1 // accumulator
- pxor xmm7,xmm7
- mov esi,frameptr // accumulator
- pxor xmm2,xmm2 // count
- movq xmm3, QWORD PTR [edi]
- movq QWORD PTR [esi+8*eax],xmm3
- punpcklbw xmm3, xmm2 // xmm3 source pixels
- mov ecx, FRAMECOUNT
- NextFrame:
- movq xmm4, QWORD PTR [esi] // get frame buffer values
- punpcklbw xmm4, xmm7 // xmm4 frame buffer pixels
- movdqa xmm6, xmm4 // save the pixel values
- psubsw xmm4, xmm3 // subtracted pixel values
- pmullw xmm4, xmm4 // square xmm4
- movd xmm5, strength
- psrlw xmm4, xmm5 // should be strength
- pmullw xmm4, threes // 3 * modifier
- movdqa xmm5, sixteens // 16s
- psubusw xmm5, xmm4 // 16 - modifiers
- movdqa xmm4, xmm5 // save the modifiers
- pmullw xmm4, xmm6 // multiplier values
- paddusw xmm1, xmm4 // accumulator
- paddusw xmm2, xmm5 // count
- add esi, 8 // next frame
- dec ecx // next set of eight pixels
- jnz NextFrame
- movdqa counts, xmm2
- psrlw xmm2,1 // divide count by 2 for rounding
- paddusw xmm1,xmm2 // rounding added in
- mov frameptr,esi
- movdqa sums, xmm1
- }
-
- for ( i=0; i<8; i++ )
- {
- int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
- blurvalue >>= 16;
- d[i] = blurvalue;
- }
- s += 8;
- d += 8;
- byte += 8;
- } while ( byte < bytes );
- }
- ++ppi->frame;
- __asm emms
- }
- /****************************************************************************
- *
- * ROUTINE : tempFilter_mmx
- *
- * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
- * unsigned char *s : Pointer to source frame.
- * unsigned char *d : Pointer to destination frame.
- * int bytes : Number of bytes to filter.
- * int strength : Strength of filter to apply.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs a closesness adjusted temporarl blur
- *
- * SPECIAL NOTES : Destination frame can be same as source frame.
- *
- ****************************************************************************/
- void tempFilter_mmx
- (
- PreProcInstance *ppi,
- unsigned char *s,
- unsigned char *d,
- int bytes,
- int strength
- )
- {
- int byte = 0;
- unsigned char *frameptr = ppi->frameBuffer;
-
- __declspec(align(16)) unsigned short threes[] ={ 3, 3, 3, 3};
- __declspec(align(16)) unsigned short sixteens[]={16,16,16,16};
- if ( ppi->frame == 0 )
- {
- do
- {
- int i;
- int frame = 0;
-
- do
- {
- for ( i=0; i<4; i++ )
- {
- *frameptr = s[byte+i];
- ++frameptr;
- }
- ++frame;
- } while ( frame < FRAMECOUNT );
-
- for ( i=0; i<4; i++ )
- d[byte+i] = s[byte+i];
- byte += 4;
-
- } while ( byte < bytes );
- }
- else
- {
- int i;
- int offset2 = (ppi->frame % FRAMECOUNT);
- do
- {
- __declspec(align(16)) unsigned short counts[8];
- __declspec(align(16)) unsigned short sums[8];
- int accumulator = 0;
- int count = 0;
- int frame = 0;
- _asm
- {
- mov eax,offset2
- mov edi,s // source pixels
- pxor mm1,mm1 // accumulator
- pxor mm7,mm7
- mov esi,frameptr // accumulator
- pxor mm2,mm2 // count
- movd mm3, DWORD PTR [edi]
- movd DWORD PTR [esi+4*eax],mm3
- punpcklbw mm3, mm2 // mm3 source pixels
- mov ecx, FRAMECOUNT
- NextFrame:
- movd mm4, DWORD PTR [esi] // get frame buffer values
- punpcklbw mm4, mm7 // mm4 frame buffer pixels
- movq mm6, mm4 // save the pixel values
- psubsw mm4, mm3 // subtracted pixel values
- pmullw mm4, mm4 // square mm4
- movd mm5, strength
- psrlw mm4, mm5 // should be strength
- pmullw mm4, threes // 3 * modifier
- movq mm5, sixteens // 16s
- psubusw mm5, mm4 // 16 - modifiers
- movq mm4, mm5 // save the modifiers
- pmullw mm4, mm6 // multiplier values
- paddusw mm1, mm4 // accumulator
- paddusw mm2, mm5 // count
- add esi, 4 // next frame
- dec ecx // next set of eight pixels
- jnz NextFrame
- movq counts, mm2
- psrlw mm2,1 // divide count by 2 for rounding
- paddusw mm1,mm2 // rounding added in
- mov frameptr,esi
- movq sums, mm1
- }
-
- for ( i=0; i<4; i++ )
- {
- int blurvalue = sums[i] * ppi->fixedDivide[counts[i]];
- blurvalue >>= 16;
- d[i] = blurvalue;
- }
- s += 4;
- d += 4;
- byte += 4;
- } while ( byte < bytes );
- }
- ++ppi->frame;
- __asm emms
- }
- #endif
- /****************************************************************************
- *
- * ROUTINE : DeletePreProc
- *
- * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Deletes a pre-processing instance.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
- void DeletePreProc ( PreProcInstance *ppi )
- {
- if ( ppi->frameBufferAlloc )
- duck_free ( ppi->frameBufferAlloc );
- ppi->frameBufferAlloc = 0;
- ppi->frameBuffer = 0;
- if( ppi->fixedDivideAlloc )
- duck_free ( ppi->fixedDivideAlloc );
- ppi->fixedDivideAlloc = 0;
- ppi->fixedDivide = 0;
- }
- /****************************************************************************
- *
- * ROUTINE : InitPreProc
- *
- * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
- * int FrameSize : Number of bytes in one frame.
- *
- * OUTPUTS : None.
- *
- * RETURNS : int: 1 if successful, 0 if failed.
- *
- * FUNCTION : Initializes prepprocessor instance.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
- int InitPreProc ( PreProcInstance *ppi, int FrameSize )
- {
- int i;
- int MmxEnabled;
- int XmmEnabled;
- int WmtEnabled;
- #ifndef MAPCA
- GetProcessorFlags ( &MmxEnabled, &XmmEnabled, &WmtEnabled );
- if ( WmtEnabled )
- tempFilter = tempFilter_wmt;
- else if ( MmxEnabled )
- tempFilter = tempFilter_mmx;
- else
- #endif
- tempFilter = tempFilter_c;
- DeletePreProc ( ppi );
- ppi->frameBufferAlloc = duck_malloc ( 32+FrameSize*7*sizeof(unsigned char), DMEM_GENERAL );
- if ( !ppi->frameBufferAlloc ) { DeletePreProc( ppi ); return 0; }
- ppi->frameBuffer = (unsigned char *) ROUNDUP32( ppi->frameBufferAlloc );
- ppi->fixedDivideAlloc = duck_malloc ( 32+255*sizeof(unsigned int), DMEM_GENERAL );
- if ( !ppi->fixedDivideAlloc ) { DeletePreProc( ppi ); return 0; }
- ppi->fixedDivide = (unsigned int *) ROUNDUP32( ppi->fixedDivideAlloc );
- for ( i=1; i<255; i++ )
- ppi->fixedDivide[i] = 0x10000 / i;
- return 1;
- }
- /****************************************************************************
- *
- * ROUTINE : spatialFilter_c
- *
- * INPUTS : PreProcInstance *ppi : Pointer to pre-processor instance.
- * unsigned char *s : Pointer to source frame.
- * unsigned char *d : Pointer to destination frame.
- * int width : Width of images.
- * int height : Height of images.
- * int pitch : Stride of images.
- * int strength : Strength of filter to apply.
- *
- * OUTPUTS : None.
- *
- * RETURNS : void
- *
- * FUNCTION : Performs a closesness adjusted temporal blur.
- *
- * SPECIAL NOTES : None.
- *
- ****************************************************************************/
- void spatialFilter_c
- (
- PreProcInstance *ppi,
- unsigned char *s,
- unsigned char *d,
- int width,
- int height,
- int pitch,
- int strength
- )
- {
- int modifier;
- int byte = 0;
- int row = 1;
- int PixelOffsets[9];
-
-
- PixelOffsets[0] = -pitch - 1;
- PixelOffsets[1] = -pitch;
- PixelOffsets[2] = -pitch + 1;
- PixelOffsets[3] = - 1;
- PixelOffsets[4] = 0;
- PixelOffsets[5] = + 1;
- PixelOffsets[6] = pitch - 1;
- PixelOffsets[7] = pitch ;
- PixelOffsets[8] = pitch + 1;
-
- memcpy ( d, s, width );
- d += pitch;
- s += pitch;
-
- do
- {
- int col = 1;
-
- d[0] = s[0];
- d[width - 1] = s[width - 1];
-
- do
- {
- int accumulator = 0;
- int count = 0;
- int neighbor = 0;
-
- do
- {
- int pixelValue = s[ col + PixelOffsets[neighbor] ];
-
- modifier = s[col];
- modifier -= pixelValue;
- modifier *= modifier;
- modifier >>= strength;
- modifier *= 3;
-
- if(modifier > 16)
- modifier = 16;
-
- modifier = 16 - modifier;
-
- accumulator += modifier * pixelValue;
-
- count += modifier;
-
- neighbor++;
- } while ( neighbor < sizeof(PixelOffsets)/sizeof(int) );
-
- accumulator += (count >> 1);
- accumulator *= ppi->fixedDivide[count];
- accumulator >>= 16;
-
- d[col] = accumulator;
-
- ++col;
- } while ( col < width-1 );
- d += pitch;
- s += pitch;
- ++row;
-
- } while ( row < height-1 );
-
- memcpy ( d, s, width );
- }
|