123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230 |
- /*!
- ************************************************************************
- * \file
- * global.h
- * \brief
- * global definitions for H.264 decoder.
- * \author
- * Copyright (C) 1999 Telenor Satellite Services,Norway
- * Ericsson Radio Systems, Sweden
- *
- * Inge Lille-Langoy <[email protected]>
- *
- * Telenor Satellite Services
- * Keysers gt.13 tel.: +47 23 13 86 98
- * N-0130 Oslo,Norway fax.: +47 22 77 79 80
- *
- * Rickard Sjoberg <[email protected]>
- *
- * Ericsson Radio Systems
- * KI/ERA/T/VV
- * 164 80 Stockholm, Sweden
- *
- ************************************************************************
- */
- #ifndef _GLOBAL_H_
- #define _GLOBAL_H_
- #include <stdlib.h>
- #include <stdarg.h>
- #include <string.h>
- #include <assert.h>
- #include <time.h>
- #include <sys/timeb.h>
- #include <bfc/platform/types.h>
- #include "win32.h"
- #include "defines.h"
- #include "ifunctions.h"
- #include "parsetcommon.h"
- #include "types.h"
- #include "frame.h"
- #include "nalucommon.h"
- #include "memcache.h"
- #include <mmintrin.h>
- #ifdef H264_IPP
- //#include "../tools/staticlib/ipp_px.h"
- #include "ippdefs.h"
- #include "ippcore.h"
- #include "ipps.h"
- #include "ippi.h"
- #include "ippvc.h"
- #endif
- /* benski> not the best place for this but it works for now */
- #ifdef _M_IX86
- // must be a multiple of 16
- #pragma warning(disable: 4799)
- static inline void memzero_cache32(void *dst, unsigned long i)
- {
-
- __asm {
- pxor mm0, mm0
- mov edi, dst
- loopwrite:
- movq 0[edi], mm0
- movq 8[edi], mm0
- movq 16[edi], mm0
- movq 24[edi], mm0
- lea edi, [edi+32]
- sub i, 32
- jg loopwrite
- }
- }
- static inline void memzero_fast32(void *dst, unsigned long i)
- {
-
- __asm {
- pxor mm0, mm0
- mov edi, dst
- loopwrite:
- movntq 0[edi], mm0
- movntq 8[edi], mm0
- movntq 16[edi], mm0
- movntq 24[edi], mm0
- lea edi, [edi+32]
- sub i, 32
- jg loopwrite
- }
- }
- static inline void memzero64(void *dst)
- {
- __asm {
- pxor mm0, mm0
- mov edi, dst
- movq 0[edi], mm0
- movq 8[edi], mm0
- movq 16[edi], mm0
- movq 24[edi], mm0
- movq 32[edi], mm0
- movq 40[edi], mm0
- movq 48[edi], mm0
- movq 56[edi], mm0
- }
- }
- static inline void memzero128(void *dst)
- {
- __asm {
- pxor mm0, mm0
- mov edi, dst
- movq 0[edi], mm0
- movq 8[edi], mm0
- movq 16[edi], mm0
- movq 24[edi], mm0
- movq 32[edi], mm0
- movq 40[edi], mm0
- movq 48[edi], mm0
- movq 56[edi], mm0
- movq 64[edi], mm0
- movq 72[edi], mm0
- movq 80[edi], mm0
- movq 88[edi], mm0
- movq 96[edi], mm0
- movq 104[edi], mm0
- movq 112[edi], mm0
- movq 120[edi], mm0
- }
- }
- static inline void memzero24(void *dst)
- {
- __asm {
- pxor mm0, mm0
- mov edi, dst
- movq 0[edi], mm0
- movq 8[edi], mm0
- movq 16[edi], mm0
- }
- }
- static inline void memzero48(void *dst)
- {
- __asm {
- pxor mm0, mm0
- mov edi, dst
- movq 0[edi], mm0
- movq 8[edi], mm0
- movq 16[edi], mm0
- movq 24[edi], mm0
- movq 32[edi], mm0
- movq 40[edi], mm0
- }
- }
- static inline void memzero16(void *dst)
- {
- __asm {
- pxor mm0, mm0
- mov edi, dst
- movq 0[edi], mm0
- movq 8[edi], mm0
- }
- }
- static inline void memzero8(void *dst)
- {
- __asm {
- pxor mm0, mm0
- mov edi, dst
- movq 0[edi], mm0
- }
- }
- static inline void memset_fast_end()
- {
- _mm_empty();
- }
- // Very optimized memcpy() routine for all AMD Athlon and Duron family.
- // This code uses any of FOUR different basic copy methods, depending
- // on the transfer size.
- // NOTE: Since this code uses MOVNTQ (also known as "Non-Temporal MOV" or
- // "Streaming Store"), and also uses the software prefetchnta instructions,
- // be sure youre running on Athlon/Duron or other recent CPU before calling!
- #define TINY_BLOCK_COPY 64 // upper limit for movsd type copy
- // The smallest copy uses the X86 "movsd" instruction, in an optimized
- // form which is an "unrolled loop".
- #define IN_CACHE_COPY 64 * 1024 // upper limit for movq/movq copy w/SW prefetch
- // Next is a copy that uses the MMX registers to copy 8 bytes at a time,
- // also using the "unrolled loop" optimization. This code uses
- // the software prefetch instruction to get the data into the cache.
- #define UNCACHED_COPY 197 * 1024 // upper limit for movq/movntq w/SW prefetch
- // For larger blocks, which will spill beyond the cache, its faster to
- // use the Streaming Store instruction MOVNTQ. This write instruction
- // bypasses the cache and writes straight to main memory. This code also
- // uses the software prefetch instruction to pre-read the data.
- // USE 64 * 1024 FOR THIS VALUE IF YOURE ALWAYS FILLING A "CLEAN CACHE"
- #define BLOCK_PREFETCH_COPY infinity // no limit for movq/movntq w/block prefetch
- #define CACHEBLOCK 80h // number of 64-byte blocks (cache lines) for block prefetch
- // For the largest size blocks, a special technique called Block Prefetch
- // can be used to accelerate the read operations. Block Prefetch reads
- // one address per cache line, for a series of cache lines, in a short loop.
- // This is faster than using software prefetch. The technique is great for
- // getting maximum read bandwidth, especially in DDR memory systems.
- // Inline assembly syntax for use with Visual C++
- static void * memcpy_amd(void *dest, const void *src, size_t n)
- {
- __asm {
- mov ecx, [n] // number of bytes to copy
- mov edi, [dest] // destination
- mov esi, [src] // source
- mov ebx, ecx // keep a copy of count
- cld
- cmp ecx, TINY_BLOCK_COPY
- jb $memcpy_ic_3 // tiny? skip mmx copy
- cmp ecx, 32*1024 // dont align between 32k-64k because
- jbe $memcpy_do_align // it appears to be slower
- cmp ecx, 64*1024
- jbe $memcpy_align_done
- $memcpy_do_align:
- mov ecx, 8 // a trick thats faster than rep movsb...
- sub ecx, edi // align destination to qword
- and ecx, 111b // get the low bits
- sub ebx, ecx // update copy count
- neg ecx // set up to jump into the array
- add ecx, offset $memcpy_align_done
- jmp ecx // jump to array of movsbs
- align 4
- movsb
- movsb
- movsb
- movsb
- movsb
- movsb
- movsb
- movsb
- $memcpy_align_done: // destination is dword aligned
- mov ecx, ebx // number of bytes left to copy
- shr ecx, 6 // get 64-byte block count
- jz $memcpy_ic_2 // finish the last few bytes
- cmp ecx, IN_CACHE_COPY/64 // too big 4 cache? use uncached copy
- jae $memcpy_uc_test
- // This is small block copy that uses the MMX registers to copy 8 bytes
- // at a time. It uses the "unrolled loop" optimization, and also uses
- // the software prefetch instruction to get the data into the cache.
- align 16
- $memcpy_ic_1: // 64-byte block copies, in-cache copy
- prefetchnta [esi + (200*64/34+192)] // start reading ahead
- movq mm0, [esi+0] // read 64 bits
- movq mm1, [esi+8]
- movq [edi+0], mm0 // write 64 bits
- movq [edi+8], mm1 // note: the normal movq writes the
- movq mm2, [esi+16] // data to cache// a cache line will be
- movq mm3, [esi+24] // allocated as needed, to store the data
- movq [edi+16], mm2
- movq [edi+24], mm3
- movq mm0, [esi+32]
- movq mm1, [esi+40]
- movq [edi+32], mm0
- movq [edi+40], mm1
- movq mm2, [esi+48]
- movq mm3, [esi+56]
- movq [edi+48], mm2
- movq [edi+56], mm3
- add esi, 64 // update source pointer
- add edi, 64 // update destination pointer
- dec ecx // count down
- jnz $memcpy_ic_1 // last 64-byte block?
- $memcpy_ic_2:
- mov ecx, ebx // has valid low 6 bits of the byte count
- $memcpy_ic_3:
- shr ecx, 2 // dword count
- and ecx, 1111b // only look at the "remainder" bits
- neg ecx // set up to jump into the array
- add ecx, offset $memcpy_last_few
- jmp ecx // jump to array of movsds
- $memcpy_uc_test:
- cmp ecx, UNCACHED_COPY/64 // big enough? use block prefetch copy
- jae $memcpy_bp_1
- $memcpy_64_test:
- or ecx, ecx // _tail end of block prefetch will jump here
- jz $memcpy_ic_2 // no more 64-byte blocks left
- // For larger blocks, which will spill beyond the cache, its faster to
- // use the Streaming Store instruction MOVNTQ. This write instruction
- // bypasses the cache and writes straight to main memory. This code also
- // uses the software prefetch instruction to pre-read the data.
- align 16
- $memcpy_uc_1: // 64-byte blocks, uncached copy
- prefetchnta [esi + (200*64/34+192)] // start reading ahead
- movq mm0,[esi+0] // read 64 bits
- add edi,64 // update destination pointer
- movq mm1,[esi+8]
- add esi,64 // update source pointer
- movq mm2,[esi-48]
- movntq [edi-64], mm0 // write 64 bits, bypassing the cache
- movq mm0,[esi-40] // note: movntq also prevents the CPU
- movntq [edi-56], mm1 // from READING the destination address
- movq mm1,[esi-32] // into the cache, only to be over-written
- movntq [edi-48], mm2 // so that also helps performance
- movq mm2,[esi-24]
- movntq [edi-40], mm0
- movq mm0,[esi-16]
- movntq [edi-32], mm1
- movq mm1,[esi-8]
- movntq [edi-24], mm2
- movntq [edi-16], mm0
- dec ecx
- movntq [edi-8], mm1
- jnz $memcpy_uc_1 // last 64-byte block?
- jmp $memcpy_ic_2 // almost done
- // For the largest size blocks, a special technique called Block Prefetch
- // can be used to accelerate the read operations. Block Prefetch reads
- // one address per cache line, for a series of cache lines, in a short loop.
- // This is faster than using software prefetch, in this case.
- // The technique is great for getting maximum read bandwidth,
- // especially in DDR memory systems.
- $memcpy_bp_1: // large blocks, block prefetch copy
- cmp ecx, CACHEBLOCK // big enough to run another prefetch loop?
- jl $memcpy_64_test // no, back to regular uncached copy
- mov eax, CACHEBLOCK / 2 // block prefetch loop, unrolled 2X
- add esi, CACHEBLOCK * 64 // move to the top of the block
- align 16
- $memcpy_bp_2:
- mov edx, [esi-64] // grab one address per cache line
- mov edx, [esi-128] // grab one address per cache line
- sub esi, 128 // go reverse order
- dec eax // count down the cache lines
- jnz $memcpy_bp_2 // keep grabbing more lines into cache
- mov eax, CACHEBLOCK // now that its in cache, do the copy
- align 16
- $memcpy_bp_3:
- movq mm0, [esi ] // read 64 bits
- movq mm1, [esi+ 8]
- movq mm2, [esi+16]
- movq mm3, [esi+24]
- movq mm4, [esi+32]
- movq mm5, [esi+40]
- movq mm6, [esi+48]
- movq mm7, [esi+56]
- add esi, 64 // update source pointer
- movntq [edi ], mm0 // write 64 bits, bypassing cache
- movntq [edi+ 8], mm1 // note: movntq also prevents the CPU
- movntq [edi+16], mm2 // from READING the destination address
- movntq [edi+24], mm3 // into the cache, only to be over-written,
- movntq [edi+32], mm4 // so that also helps performance
- movntq [edi+40], mm5
- movntq [edi+48], mm6
- movntq [edi+56], mm7
- add edi, 64 // update dest pointer
- dec eax // count down
- jnz $memcpy_bp_3 // keep copying
- sub ecx, CACHEBLOCK // update the 64-byte block count
- jmp $memcpy_bp_1 // keep processing chunks
- // The smallest copy uses the X86 "movsd" instruction, in an optimized
- // form which is an "unrolled loop". Then it handles the last few bytes.
- align 4
- movsd
- movsd // perform last 1-15 dword copies
- movsd
- movsd
- movsd
- movsd
- movsd
- movsd
- movsd
- movsd // perform last 1-7 dword copies
- movsd
- movsd
- movsd
- movsd
- movsd
- movsd
- $memcpy_last_few: // dword aligned from before movsds
- mov ecx, ebx // has valid low 2 bits of the byte count
- and ecx, 11b // the last few cows must come home
- jz $memcpy_final // no more, lets leave
- rep movsb // the last 1, 2, or 3 bytes
- $memcpy_final:
- // emms // clean up the MMX state
- sfence // flush the write buffer
- mov eax, [dest] // ret value = destination pointer
- }
- }
- #elif defined(_M_X64)
- static inline void memzero24(void *dst)
- {
- int32_t j;
- int32_t *d = (int32_t *)dst;
- for (j=0;j<24;j+=4)
- {
- d[j] = 0;
- }
- }
- static inline void memset_fast_end() {}
- #else
- static inline void memzero_fast16(void *dst, unsigned long i)
- {
- int32_t j;
- int32_t *d = (int32_t *)dst;
- for (j=0;j<i;j+=4)
- {
- d[j] = 0;
- }
- }
- static inline void memzero24(void *dst)
- {
- int32_t j;
- int32_t *d = (int32_t *)dst;
- for (j=0;j<24;j+=4)
- {
- d[j] = 0;
- }
- }
- static inline void memset_fast_end() {}
- #endif
- #define UNDEFINED_REFERENCE ((int)0x80000000)
- typedef int32_t h264_ref_t;
- #define ET_SIZE 300 //!< size of error text buffer
- extern char errortext[ET_SIZE]; //!< buffer for error message for exit with error()
- extern int sse2_flag, mmx_flag, sse_flag, sse3_flag, sse4_1_flag;
- /***********************************************************************
- * T y p e d e f i n i t i o n s f o r J M
- ***********************************************************************
- */
- typedef enum
- {
- LumaComp = 0,
- CrComp = 1,
- CbComp = 2
- } Color_Component;
- /***********************************************************************
- * D a t a t y p e s f o r C A B A C
- ***********************************************************************
- */
- typedef struct pix_pos
- {
- int available;
- int mb_addr;
- short x;
- short y;
- short pos_x;
- short pos_y;
- } PixelPos;
- //! struct to characterize the state of the arithmetic coding engine
- typedef struct
- {
- unsigned int Drange;
- unsigned int Dvalue;
- int DbitsLeft;
- byte *Dcodestrm;
- int *Dcodestrm_len;
- } DecodingEnvironment;
- typedef DecodingEnvironment *DecodingEnvironmentPtr;
- typedef short MotionVector[2];
- //! definition of motion parameters
- typedef struct pic_motion
- {
- h264_ref_t ref_pic_id;
- h264_ref_t ref_id;
- MotionVector mv;
- char ref_idx;
- } PicMotion;
- // TODO: benski> might be more efficient to make a [list][subblock_y][subblock_x] array of these values instead of parallel arrays
- typedef struct motion_params
- {
- PicMotion **motion[2];
- byte ** moving_block;
- } MotionParams;
- //! struct for context management
- typedef struct
- {
- uint16_t state; // index into state-table CP
- unsigned char MPS; // Least Probable Symbol 0/1 CP
- unsigned char dummy; // for alignment
- } BiContextType;
- typedef BiContextType *BiContextTypePtr;
- /**********************************************************************
- * C O N T E X T S F O R T M L S Y N T A X E L E M E N T S
- **********************************************************************
- */
- #define NUM_MB_TYPE_CTX 11
- #define NUM_B8_TYPE_CTX 9
- #define NUM_MV_RES_CTX 10
- #define NUM_REF_NO_CTX 6
- #define NUM_DELTA_QP_CTX 4
- #define NUM_MB_AFF_CTX 4
- #define NUM_TRANSFORM_SIZE_CTX 3
- // structures that will be declared somewhere else
- struct storable_picture;
- struct datapartition;
- struct syntaxelement;
- typedef struct
- {
- BiContextType mb_type_contexts [3][NUM_MB_TYPE_CTX];
- BiContextType b8_type_contexts [2][NUM_B8_TYPE_CTX];
- BiContextType mv_res_contexts [2][NUM_MV_RES_CTX];
- BiContextType ref_no_contexts [2][NUM_REF_NO_CTX];
- BiContextType delta_qp_contexts[NUM_DELTA_QP_CTX];
- BiContextType mb_aff_contexts [NUM_MB_AFF_CTX];
- } MotionInfoContexts;
- #define NUM_IPR_CTX 2
- #define NUM_CIPR_CTX 4
- #define NUM_CBP_CTX 4
- #define NUM_BCBP_CTX 4
- #define NUM_MAP_CTX 15
- #define NUM_LAST_CTX 15
- #define NUM_ONE_CTX 5
- #define NUM_ABS_CTX 5
- typedef struct
- {
- BiContextType transform_size_contexts [NUM_TRANSFORM_SIZE_CTX];
- BiContextType ipr_contexts [NUM_IPR_CTX];
- BiContextType cipr_contexts[NUM_CIPR_CTX];
- BiContextType cbp_contexts [3][NUM_CBP_CTX];
- BiContextType bcbp_contexts[NUM_BLOCK_TYPES][NUM_BCBP_CTX];
- BiContextType map_contexts [2][NUM_BLOCK_TYPES][NUM_MAP_CTX+1]; // +1 for better alignment
- BiContextType last_contexts[2][NUM_BLOCK_TYPES][NUM_LAST_CTX+1]; // +1 for better alignment
- BiContextType one_contexts [NUM_BLOCK_TYPES][NUM_ONE_CTX];
- BiContextType abs_contexts [NUM_BLOCK_TYPES][NUM_ABS_CTX];
- } TextureInfoContexts;
- //*********************** end of data type definition for CABAC *******************
- /***********************************************************************
- * N e w D a t a t y p e s f o r T M L
- ***********************************************************************
- */
- /*! Buffer structure for decoded reference picture marking commands */
- typedef struct DecRefPicMarking_s
- {
- int memory_management_control_operation;
- int difference_of_pic_nums_minus1;
- int long_term_pic_num;
- int long_term_frame_idx;
- int max_long_term_frame_idx_plus1;
- struct DecRefPicMarking_s *Next;
- } DecRefPicMarking_t;
- //! definition of pic motion parameters
- typedef struct pic_motion_params2
- {
- h264_ref_t ref_pic_id; //!< reference picture identifier [list][subblock_y][subblock_x]
- h264_ref_t ref_id; //!< reference picture identifier [list][subblock_y][subblock_x]
- short mv[2]; //!< motion vector [list][subblock_x][subblock_y][component]
- char ref_idx; //!< reference picture [list][subblock_y][subblock_x]
- byte mb_field; //!< field macroblock indicator
- byte field_frame; //!< indicates if co_located is field or frame.
- } PicMotionParams2;
- //! Macroblock
- typedef struct macroblock
- {
- struct slice *p_Slice; //!< pointer to the current slice
- struct img_par *p_Vid; //!< pointer to VideoParameters
- struct inp_par *p_Inp;
- int mbAddrX; //!< current MB address
- int mb_x;
- int mb_y;
- int block_x;
- int block_y;
- int block_y_aff;
- int pix_x;
- int pix_y;
- int pix_c_x;
- int pix_c_y;
- int subblock_x;
- int subblock_y;
- int qp; //!< QP luma
- int qpc[2]; //!< QP chroma
- int qp_scaled[MAX_PLANE]; //!< QP scaled for all comps.
- Boolean is_lossless;
- Boolean is_intra_block;
- Boolean is_v_block;
- short slice_nr;
- short delta_quant; //!< for rate control
- struct macroblock *mb_up; //!< pointer to neighboring MB (CABAC)
- struct macroblock *mb_left; //!< pointer to neighboring MB (CABAC)
- // some storage of macroblock syntax elements for global access
- int mb_type;
- short mvd[2][BLOCK_MULTIPLE][BLOCK_MULTIPLE][2]; //!< indices correspond to [forw,backw][block_y][block_x][x,y]
- int cbp;
- int64 cbp_blk [3];
- int64 cbp_bits [3];
- int64 cbp_bits_8x8[3];
- int i16mode;
- char b8mode[4];
- char b8pdir[4];
- char ei_flag; //!< error indicator flag that enables concealment
- char dpl_flag; //!< error indicator flag that signals a missing data partition
- char ipmode_DPCM;
- short DFDisableIdc;
- short DFAlphaC0Offset;
- short DFBetaOffset;
- char c_ipred_mode; //!< chroma intra prediction mode
- Boolean mb_field;
- int skip_flag;
- int mb_addr_left, mb_addr_up, mb_addr_upper_right, mb_addr_upper_left;
- Boolean mb_avail_left, mb_avail_up, mb_avail_upper_right, mb_avail_upper_left;
- Boolean luma_transform_size_8x8_flag;
- Boolean NoMbPartLessThan8x8Flag;
- void (*itrans_8x8)(struct macroblock *currMB, ColorPlane pl, int ioff, int joff);
- void (*GetMVPredictor) (struct macroblock *currMB, PixelPos *block,
- short pmv[2], short ref_frame, struct pic_motion **motion, int mb_x, int mb_y, int blockshape_x, int blockshape_y);
- int (*read_and_store_CBP_block_bit) (struct macroblock *currMB, DecodingEnvironmentPtr dep_dp, int type);
- char (*readRefPictureIdx) (struct syntaxelement *currSE, struct datapartition *dP, int list);
- } Macroblock;
- //! Syntaxelement
- typedef struct syntaxelement
- {
- int value1; //!< numerical value of syntax element
- int value2; //!< for blocked symbols, e.g. run/level
- int len; //!< length of code
- //int inf; //!< info part of CAVLC code
- #if TRACE
- #define TRACESTRING_SIZE 100 //!< size of trace string
- char tracestring[TRACESTRING_SIZE]; //!< trace string
- #endif
- //! for mapping of CAVLC to syntaxElement
- void (*mapping)(int len, int info, int *value1, int *value2);
- } SyntaxElement;
- //! Bitstream
- typedef struct
- {
- // CABAC Decoding
- int read_len; //!< actual position in the codebuffer, CABAC only
- int code_len; //!< overall codebuffer length, CABAC only
- // CAVLC Decoding
- int frame_bitoffset; //!< actual position in the codebuffer, bit-oriented, CAVLC only
- int bitstream_length; //!< over codebuffer lnegth, byte oriented, CAVLC only
- byte *streamBuffer; //!< actual codebuffer for read bytes
- } Bitstream;
- /* === 4x4 block typedefs === */
- // 32 bit precision
- typedef int h264_int_block_row_t[BLOCK_SIZE];
- typedef h264_int_block_row_t h264_int_block_t[BLOCK_SIZE];
- // 16 bit precision
- typedef int16_t h264_short_block_row_t[BLOCK_SIZE];
- typedef h264_short_block_row_t h264_short_block_t[BLOCK_SIZE];
- // 8 bit precision
- /* === 8x8 block typedefs === */
- // 32 bit precision
- typedef int h264_int_8x8block_row_t[BLOCK_SIZE_8x8];
- typedef h264_int_8x8block_row_t h264_int_8x8block_t[BLOCK_SIZE_8x8];
- // 16 bit precision
- typedef int16_t h264_short_8x8block_row_t[BLOCK_SIZE_8x8];
- typedef h264_short_8x8block_row_t h264_short_8x8block_t[BLOCK_SIZE_8x8];
- // 8 bit precision
- typedef imgpel h264_imgpel_8x8block_row_t[BLOCK_SIZE_8x8];
- typedef h264_imgpel_8x8block_row_t h264_imgpel_8x8block_t[BLOCK_SIZE_8x8];
- /* === 16x16 block typedefs === */
- // 32 bit precision
- typedef int h264_int_macroblock_row_t[MB_BLOCK_SIZE];
- typedef h264_int_macroblock_row_t h264_int_macroblock_t[MB_BLOCK_SIZE];
- // 16 bit precision
- typedef int16_t h264_short_macroblock_row_t[MB_BLOCK_SIZE];
- typedef h264_short_macroblock_row_t h264_short_macroblock_t[MB_BLOCK_SIZE];
- // 8 bit precision
- typedef imgpel h264_imgpel_macroblock_row_t[MB_BLOCK_SIZE];
- typedef h264_imgpel_macroblock_row_t h264_imgpel_macroblock_t[MB_BLOCK_SIZE];
- typedef int h264_pic_position[2];
- typedef byte h264_4x4_byte[BLOCK_SIZE][BLOCK_SIZE];
- typedef h264_4x4_byte h264_nz_coefficient[3];
- //! DataPartition
- typedef struct datapartition
- {
- Bitstream *bitstream;
- DecodingEnvironment de_cabac;
- } DataPartition;
- //! Slice
- typedef struct slice
- {
- struct img_par *p_Vid;
- struct inp_par *p_Inp;
- pic_parameter_set_rbsp_t *active_pps;
- seq_parameter_set_rbsp_t *active_sps;
- struct colocated_params *p_colocated;
- struct colocated_params *Co_located_JV[MAX_PLANE]; //!< p_colocated to be used during 4:4:4 independent mode decoding
- int mb_aff_frame_flag;
- int direct_spatial_mv_pred_flag; //!< Indicator for direct mode type (1 for Spatial, 0 for Temporal)
- int num_ref_idx_l0_active; //!< number of available list 0 references
- int num_ref_idx_l1_active; //!< number of available list 1 references
- int qp;
- int slice_qp_delta;
- int qs;
- int slice_qs_delta;
- int slice_type; //!< slice type
- int model_number; //!< cabac model number
- PictureStructure structure; //!< Identify picture structure type
- int start_mb_nr; //!< MUST be set by NAL even in case of ei_flag == 1
- int max_part_nr;
- int dp_mode; //!< data partitioning mode
- int last_dquant;
- // int last_mb_nr; //!< only valid when entropy coding == CABAC
- DataPartition *partArr; //!< array of partitions
- MotionInfoContexts *mot_ctx; //!< pointer to struct of context models for use in CABAC
- TextureInfoContexts *tex_ctx; //!< pointer to struct of context models for use in CABAC
- int mvscale[6][MAX_REFERENCE_PICTURES];
- int ref_pic_list_reordering_flag_l0;
- int *reordering_of_pic_nums_idc_l0;
- int *abs_diff_pic_num_minus1_l0;
- int *long_term_pic_idx_l0;
- int ref_pic_list_reordering_flag_l1;
- int *reordering_of_pic_nums_idc_l1;
- int *abs_diff_pic_num_minus1_l1;
- int *long_term_pic_idx_l1;
-
- short DFDisableIdc; //!< Disable deblocking filter on slice
- short DFAlphaC0Offset; //!< Alpha and C0 offset for filtering slice
- short DFBetaOffset; //!< Beta offset for filtering slice
- int pic_parameter_set_id; //!<the ID of the picture parameter set the slice is reffering to
- int dpB_NotPresent; //!< non-zero, if data partition B is lost
- int dpC_NotPresent; //!< non-zero, if data partition C is lost
- __declspec(align(32)) h264_imgpel_macroblock_t mb_pred[MAX_PLANE];
- __declspec(align(32)) h264_imgpel_macroblock_t mb_rec[MAX_PLANE];
- __declspec(align(32)) union
- {
- __declspec(align(32)) h264_short_8x8block_t mb_rres8[MAX_PLANE][4];
- __declspec(align(32)) h264_short_macroblock_t cof[MAX_PLANE];
- __declspec(align(32)) h264_short_block_t cof4[MAX_PLANE][16]; // TODO: get this to work, one of these days
- __declspec(align(32)) h264_short_macroblock_t ipcm[MAX_PLANE];
- };
- int cofu[16];
- // Scaling matrix info
- int InvLevelScale4x4_Intra[3][6][4][4];
- int InvLevelScale4x4_Inter[3][6][4][4];
- int InvLevelScale8x8_Intra[3][6][64];
- int InvLevelScale8x8_Inter[3][6][64];
- int *qmatrix[12];
- // Cabac
- // TODO: we could optimize coefficient reading by storing the levels/runs instead of coefficients
- int16_t coeff[64]; // one more for EOB
- int coeff_ctr;
- int pos;
- //weighted prediction
- unsigned int apply_weights;
- unsigned int luma_log2_weight_denom;
- unsigned int chroma_log2_weight_denom;
- int wp_weight[2][MAX_REFERENCE_PICTURES][3]; // weight in [list][index][component] order
- int wp_offset[6][MAX_REFERENCE_PICTURES][3]; // offset in [list][index][component] order
- int wbp_weight[6][MAX_REFERENCE_PICTURES][MAX_REFERENCE_PICTURES][3]; //weight in [list][fw_index][bw_index][component] order
- int wp_round_luma;
- int wp_round_chroma;
- void (*read_CBP_and_coeffs_from_NAL) (Macroblock *currMB);
- int (*decode_one_component ) (Macroblock *currMB, ColorPlane curr_plane, struct video_image *image, struct storable_picture *dec_picture);
- int (*readSlice ) (struct img_par *, struct inp_par *);
- int (*nal_startcode_follows ) (struct slice*, int );
- void (*read_motion_info_from_NAL) (Macroblock *currMB);
- void (*read_one_macroblock ) (Macroblock *currMB);
- void (*interpret_mb_mode ) (Macroblock *currMB);
- void (*compute_colocated ) (struct slice *currSlice, struct colocated_params *p, struct storable_picture **listX[6]);
- void (*linfo_cbp_intra) (int len,int info,int *cbp, int *dummy);
- void (*linfo_cbp_inter) (int len,int info,int *cbp, int *dummy);
- } Slice;
- //****************************** ~DM ***********************************
- // image parameters
- typedef struct img_par
- {
- struct inp_par *p_Inp;
- pic_parameter_set_rbsp_t *active_pps;
- seq_parameter_set_rbsp_t *active_sps;
- seq_parameter_set_rbsp_t SeqParSet[MAXSPS];
- pic_parameter_set_rbsp_t PicParSet[MAXPPS];
- struct sei_params *p_SEI;
- struct old_slice_par *old_slice;
- int number; //!< frame number
- unsigned int current_mb_nr; // bitstream order
- unsigned int num_dec_mb;
- short current_slice_nr;
- int *intra_block;
-
- int qp; //!< quant for the current frame
- int sp_switch; //!< 1 for switching sp, 0 for normal sp
- int type; //!< image type INTER/INTRA
- int width;
- int height;
- int width_cr; //!< width chroma
- int height_cr; //!< height chroma
- int mb_x;
- int mb_y;
- int block_x;
- int block_y;
- int pix_c_x;
- int pix_c_y;
- int allrefzero;
- byte **ipredmode; //!< prediction type [90][74]
- h264_nz_coefficient *nz_coeff;
- int **siblock;
- int cod_counter; //!< Current count of number of skipped macroblocks in a row
- int structure; //!< Identify picture structure type
- Slice *currentSlice; //!< pointer to current Slice data struct
- Macroblock *mb_data; //!< array containing all MBs of a whole frame
- Macroblock *mb_data_JV[MAX_PLANE]; //!< mb_data to be used for 4:4:4 independent mode
- int colour_plane_id; //!< colour_plane_id of the current coded slice
- int ChromaArrayType;
- // For MB level frame/field coding
- int mb_aff_frame_flag;
- // for signalling to the neighbour logic that this is a deblocker call
- int DeblockCall;
- byte mixedModeEdgeFlag;
- // picture error concealment
- // concealment_head points to first node in list, concealment_end points to
- // last node in list. Initialize both to NULL, meaning no nodes in list yet
- struct concealment_node *concealment_head;
- struct concealment_node *concealment_end;
- DecRefPicMarking_t *dec_ref_pic_marking_buffer; //!< stores the memory management control operations
- int num_ref_idx_l0_active; //!< number of forward reference
- int num_ref_idx_l1_active; //!< number of backward reference
- int slice_group_change_cycle;
- int redundant_pic_cnt;
- unsigned int pre_frame_num; //!< store the frame_num in the last decoded slice. For detecting gap in frame_num.
- int non_conforming_stream;
- // End JVT-D101
- // POC200301: from unsigned int to int
- int toppoc; //poc for this top field // POC200301
- int bottompoc; //poc of bottom field of frame
- int framepoc; //poc of this frame // POC200301
- unsigned int frame_num; //frame_num for this frame
- unsigned int field_pic_flag;
- byte bottom_field_flag;
- //the following is for slice header syntax elements of poc
- // for poc mode 0.
- unsigned int pic_order_cnt_lsb;
- int delta_pic_order_cnt_bottom;
- // for poc mode 1.
- int delta_pic_order_cnt[3];
- // ////////////////////////
- // for POC mode 0:
- signed int PrevPicOrderCntMsb;
- unsigned int PrevPicOrderCntLsb;
- signed int PicOrderCntMsb;
- // for POC mode 1:
- unsigned int AbsFrameNum;
- signed int ExpectedPicOrderCnt, PicOrderCntCycleCnt, FrameNumInPicOrderCntCycle;
- unsigned int PreviousFrameNum, FrameNumOffset;
- int ExpectedDeltaPerPicOrderCntCycle;
- int PreviousPOC, ThisPOC;
- int PreviousFrameNumOffset;
- // /////////////////////////
- int idr_flag;
- int nal_reference_idc; //!< nal_reference_idc from NAL unit
- int idr_pic_id;
- int MaxFrameNum;
- unsigned int PicWidthInMbs;
- unsigned int PicHeightInMapUnits;
- unsigned int FrameHeightInMbs;
- unsigned int PicHeightInMbs;
- unsigned int PicSizeInMbs;
- unsigned int FrameSizeInMbs;
- unsigned int oldFrameSizeInMbs;
- int no_output_of_prior_pics_flag;
- int long_term_reference_flag;
- int adaptive_ref_pic_buffering_flag;
- int last_has_mmco_5;
- int last_pic_bottom_field;
- // Fidelity Range Extensions Stuff
- short bitdepth_luma;
- short bitdepth_chroma;
- int bitdepth_scale[2];
- int bitdepth_luma_qp_scale;
- int bitdepth_chroma_qp_scale;
- unsigned int dc_pred_value_comp[MAX_PLANE]; //!< component value for DC prediction (depends on component pel bit depth)
- int max_pel_value_comp[MAX_PLANE]; //!< max value that one picture element (pixel) can take (depends on pic_unit_bitdepth)
- int Transform8x8Mode;
- int profile_idc;
- int yuv_format;
- int lossless_qpprime_flag;
- int num_blk8x8_uv;
- int num_uv_blocks;
- int num_cdc_coeff;
- int mb_cr_size_x;
- int mb_cr_size_y;
- int mb_cr_size_x_blk;
- int mb_cr_size_y_blk;
- int mb_size[3][2]; //!< component macroblock dimensions
- int mb_size_blk[3][2]; //!< component macroblock dimensions
- int mb_size_shift[3][2];
- int subpel_x;
- int subpel_y;
- int shiftpel_x;
- int shiftpel_y;
- int max_vmv_r; //!< maximum vertical motion vector range in luma quarter frame pixel units for the current level_idc
- int max_mb_vmv_r; //!< maximum vertical motion vector range in luma quarter pixel units for the current level_idc
- // picture error concealment
- int last_ref_pic_poc;
- int ref_poc_gap;
- int poc_gap;
- int earlier_missing_poc;
- unsigned int frame_to_conceal;
- int IDR_concealment_flag;
- int conceal_slice_type;
- // random access point decoding
- int recovery_point;
- int recovery_point_found;
- int recovery_frame_cnt;
- int recovery_frame_num;
- int recovery_poc;
- int separate_colour_plane_flag;
- int frame_number;
- int init_bl_done;
- // Redundant slices. Should be moved to another structure and allocated only if extended profile
- unsigned int previous_frame_num; //!< frame number of previous slice
- int ref_flag[17]; //!< 0: i-th previous frame is incorrect
- //!< non-zero: i-th previous frame is correct
- int Is_primary_correct; //!< if primary frame is correct, 0: incorrect
- int Is_redundant_correct; //!< if redundant frame is correct, 0:incorrect
- int redundant_slice_ref_idx; //!< reference index of redundant slice
- //FILE *p_log; //!< SNR file
- int LastAccessUnitExists;
- int NALUCount;
- Boolean global_init_done;
- int *qp_per_matrix;
- int *qp_rem_matrix;
- struct frame_store *last_out_fs;
- int pocs_in_dpb[100];
- struct storable_picture *dec_picture;
- struct storable_picture *dec_picture_JV[MAX_PLANE]; //!< dec_picture to be used during 4:4:4 independent mode decoding
- struct storable_picture *no_reference_picture; //!< dummy storable picture for recovery point
- struct storable_picture **listX[6];
- // Error parameters
- struct object_buffer *erc_object_list;
- struct ercVariables_s *erc_errorVar;
- int erc_mvperMB;
- struct img_par *erc_img;
- int ec_flag[SE_MAX_ELEMENTS]; //!< array to set errorconcealment
- struct memory_input_struct *mem_input;
- struct frame_store *out_buffer;
- struct storable_picture *pending_output;
- int pending_output_state;
- int recovery_flag;
- // dpb
- struct decoded_picture_buffer *p_Dpb;
- char listXsize[6];
- // report
- char cslice_type[9];
- // FMO
- int *MbToSliceGroupMap;
- int *MapUnitToSliceGroupMap;
- int NumberOfSliceGroups; // the number of slice groups -1 (0 == scan order, 7 == maximum)
- #if (ENABLE_OUTPUT_TONEMAPPING)
- struct tone_mapping_struct_s *seiToneMapping;
- #endif
- // benski> buffer of storablge pictures ready for output.
- // might be able to optimize a tad by making a ringbuffer, but i doubt it matters
- struct storable_picture **out_pictures;
- size_t size_out_pictures;
- size_t num_out_pictures;
- ImageCache image_cache[2]; // [0] is luma [1] is chroma (shared for both planes)
- MotionCache motion_cache;
- h264_pic_position *PicPos; //! Helper array to access macroblock positions.
- NALU_t *nalu; // a cache so we don't re-alloc every time
- void (*getNeighbour) (const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
- void (*getNeighbourPX_NoPos)(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
- void (*getNeighbourXP_NoPos)(const Macroblock *currMB, int xN, int yN, const int mb_size[2], PixelPos *pix);
- void (*getNeighbourLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
- void (*getNeighbourPXLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
- void (*getNeighbourXPLuma) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
- void (*getNeighbourLeftLuma)(const Macroblock *currMB, PixelPos *pix);
- void (*getNeighbourNXLuma) (const Macroblock *currMB, int yN, PixelPos *pix); // xN<0, yN full range
- void (*getNeighbourLeft) (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); // xN<0, yN=0
- void (*getNeighbourUp) (const Macroblock *currMB, const int mb_size[2], PixelPos *pix); // xN=0, yN<0
- void (*getNeighbourNX) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN full range
- void (*getNeighbourNP) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN>=0
- void (*getNeighbourNPChromaNB)(const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN<0, yN>=0
- void (*getNeighbour0X) (const Macroblock *currMB, int yN, const int mb_size[2], PixelPos *pix); // xN=0, yN full range
- void (*getNeighbour0XLuma) (const Macroblock *currMB, int yN, PixelPos *pix); // xN=0, yN full range
- void (*getNeighbourX0) (const Macroblock *currMB, int xN, const int mb_size[2], PixelPos *pix); // xN full range, yN = 0
- void (*getNeighbourUpLuma) (const Macroblock *currMB, PixelPos *pix); // xN=0, yN<0
- void (*getNeighbourNPLumaNB)(const Macroblock *currMB, int yN, PixelPos *pix);
- void (*getNeighbourPXLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
- void (*getNeighbourPXLumaNB_NoPos)(const Macroblock *currMB, int yN, PixelPos *pix);
- void (*getNeighbourPPLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
- void (*getNeighbourXPLumaNB) (const Macroblock *currMB, int xN, int yN, PixelPos *pix);
- void (*getNeighbourXPLumaNB_NoPos)(const Macroblock *currMB, int xN, int yN, PixelPos *pix);
- void (*get_mb_block_pos) (const h264_pic_position *PicPos, int mb_addr, short *x, short *y);
- void (*GetStrength) (byte Strength[16], Macroblock *MbQ, int dir,int edge, int mvlimit, struct storable_picture *p);
- void (*EdgeLoopLuma) (ColorPlane pl, struct video_image *image, const byte Strength[16], Macroblock *MbQ, int dir, int edge, struct storable_picture *p);
- void (*EdgeLoopChroma) (struct video_image *image, byte Strength[16], Macroblock *MbQ, int dir, int edge, int uv, struct storable_picture *p);
- } VideoParameters;
- // input parameters from configuration file
- typedef struct inp_par
- {
- int intra_profile_deblocking; //!< Loop filter usage determined by flags and parameters in bitstream
- // Output sequence format related variables
- FrameFormat output; //!< output related information
- #ifdef _LEAKYBUCKET_
- unsigned long R_decoder; //!< Decoder Rate in HRD Model
- unsigned long B_decoder; //!< Decoder Buffer size in HRD model
- unsigned long F_decoder; //!< Decoder Initial buffer fullness in HRD model
- char LeakyBucketParamFile[FILE_NAME_SIZE]; //!< LeakyBucketParamFile
- #endif
- // picture error concealment
- int ref_poc_gap;
- int poc_gap;
- } InputParameters;
- typedef struct old_slice_par
- {
- unsigned field_pic_flag;
- unsigned frame_num;
- int nal_ref_idc;
- unsigned pic_oder_cnt_lsb;
- int delta_pic_oder_cnt_bottom;
- int delta_pic_order_cnt[2];
- byte bottom_field_flag;
- byte idr_flag;
- int idr_pic_id;
- int pps_id;
- } OldSliceParams;
- typedef struct decoder_params
- {
- InputParameters *p_Inp; //!< Input Parameters
- VideoParameters *p_Vid; //!< Image Parameters
-
- } DecoderParams;
- #ifdef TRACE
- extern FILE *p_trace; //!< Trace file
- extern int bitcounter;
- #endif
- // prototypes
- extern void error(char *text, int code);
- // dynamic mem allocation
- extern int init_global_buffers(VideoParameters *p_Vid);
- extern void free_global_buffers(VideoParameters *p_Vid);
- extern int RBSPtoSODB(byte *streamBuffer, int last_byte_pos);
- extern int EBSPtoRBSP(byte *streamBuffer, int end_bytepos);
- void FreePartition (DataPartition *dp, int n);
- DataPartition *AllocPartition(int n);
- void tracebits(const char *trace_str, int len, int info,int value1);
- void tracebits2(const char *trace_str, int len, int info);
- unsigned CeilLog2 ( unsigned uiVal);
- unsigned CeilLog2_sf( unsigned uiVal);
- // For 4:4:4 independent mode
- extern void change_plane_JV( VideoParameters *p_Vid, int nplane );
- extern void make_frame_picture_JV(VideoParameters *p_Vid);
- #endif
|