encoder.c 18 KB


  1. /*
  2. * LAME MP3 encoding engine
  3. *
  4. * Copyright (c) 1999 Mark Taylor
  5. * Copyright (c) 2000-2002 Takehiro Tominaga
  6. * Copyright (c) 2000-2011 Robert Hegemann
  7. * Copyright (c) 2001 Gabriel Bouvigne
  8. * Copyright (c) 2001 John Dahlstrom
  9. *
  10. * This library is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Library General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2 of the License, or (at your option) any later version.
  14. *
  15. * This library is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Library General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Library General Public
  21. * License along with this library; if not, write to the
  22. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23. * Boston, MA 02111-1307, USA.
  24. */
  25. /* $Id: encoder.c,v 1.114 2017/08/26 10:54:57 robert Exp $ */
  26. #ifdef HAVE_CONFIG_H
  27. #include <config.h>
  28. #endif
  29. #include "lame.h"
  30. #include "machine.h"
  31. #include "encoder.h"
  32. #include "util.h"
  33. #include "lame_global_flags.h"
  34. #include "newmdct.h"
  35. #include "psymodel.h"
  36. #include "lame-analysis.h"
  37. #include "bitstream.h"
  38. #include "VbrTag.h"
  39. #include "quantize.h"
  40. #include "quantize_pvt.h"
  41. /*
  42. * auto-adjust of ATH, useful for low volume
  43. * Gabriel Bouvigne 3 feb 2001
  44. *
  45. * modifies some values in
  46. * gfp->internal_flags->ATH
  47. * (gfc->ATH)
  48. */
  49. static void
  50. adjust_ATH(lame_internal_flags const *const gfc)
  51. {
  52. SessionConfig_t const *const cfg = &gfc->cfg;
  53. FLOAT gr2_max, max_pow;
  54. if (gfc->ATH->use_adjust == 0) {
  55. gfc->ATH->adjust_factor = 1.0; /* no adjustment */
  56. return;
  57. }
  58. /* jd - 2001 mar 12, 27, jun 30 */
  59. /* loudness based on equal loudness curve; */
  60. /* use granule with maximum combined loudness */
  61. max_pow = gfc->ov_psy.loudness_sq[0][0];
  62. gr2_max = gfc->ov_psy.loudness_sq[1][0];
  63. if (cfg->channels_out == 2) {
  64. max_pow += gfc->ov_psy.loudness_sq[0][1];
  65. gr2_max += gfc->ov_psy.loudness_sq[1][1];
  66. }
  67. else {
  68. max_pow += max_pow;
  69. gr2_max += gr2_max;
  70. }
  71. if (cfg->mode_gr == 2) {
  72. max_pow = Max(max_pow, gr2_max);
  73. }
  74. max_pow *= 0.5; /* max_pow approaches 1.0 for full band noise */
  75. /* jd - 2001 mar 31, jun 30 */
  76. /* user tuning of ATH adjustment region */
  77. max_pow *= gfc->ATH->aa_sensitivity_p;
  78. /* adjust ATH depending on range of maximum value
  79. */
  80. /* jd - 2001 feb27, mar12,20, jun30, jul22 */
  81. /* continuous curves based on approximation */
  82. /* to GB's original values. */
  83. /* For an increase in approximate loudness, */
  84. /* set ATH adjust to adjust_limit immediately */
  85. /* after a delay of one frame. */
  86. /* For a loudness decrease, reduce ATH adjust */
  87. /* towards adjust_limit gradually. */
  88. /* max_pow is a loudness squared or a power. */
  89. if (max_pow > 0.03125) { /* ((1 - 0.000625)/ 31.98) from curve below */
  90. if (gfc->ATH->adjust_factor >= 1.0) {
  91. gfc->ATH->adjust_factor = 1.0;
  92. }
  93. else {
  94. /* preceding frame has lower ATH adjust; */
  95. /* ascend only to the preceding adjust_limit */
  96. /* in case there is leading low volume */
  97. if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
  98. gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
  99. }
  100. }
  101. gfc->ATH->adjust_limit = 1.0;
  102. }
  103. else { /* adjustment curve */
  104. /* about 32 dB maximum adjust (0.000625) */
  105. FLOAT const adj_lim_new = 31.98 * max_pow + 0.000625;
  106. if (gfc->ATH->adjust_factor >= adj_lim_new) { /* descend gradually */
  107. gfc->ATH->adjust_factor *= adj_lim_new * 0.075 + 0.925;
  108. if (gfc->ATH->adjust_factor < adj_lim_new) { /* stop descent */
  109. gfc->ATH->adjust_factor = adj_lim_new;
  110. }
  111. }
  112. else { /* ascend */
  113. if (gfc->ATH->adjust_limit >= adj_lim_new) {
  114. gfc->ATH->adjust_factor = adj_lim_new;
  115. }
  116. else { /* preceding frame has lower ATH adjust; */
  117. /* ascend only to the preceding adjust_limit */
  118. if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
  119. gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
  120. }
  121. }
  122. }
  123. gfc->ATH->adjust_limit = adj_lim_new;
  124. }
  125. }
  126. /***********************************************************************
  127. *
  128. * some simple statistics
  129. *
  130. * bitrate index 0: free bitrate -> not allowed in VBR mode
  131. * : bitrates, kbps depending on MPEG version
  132. * bitrate index 15: forbidden
  133. *
  134. * mode_ext:
  135. * 0: LR
  136. * 1: LR-i
  137. * 2: MS
  138. * 3: MS-i
  139. *
  140. ***********************************************************************/
  141. static void
  142. updateStats(lame_internal_flags * const gfc)
  143. {
  144. SessionConfig_t const *const cfg = &gfc->cfg;
  145. EncResult_t *eov = &gfc->ov_enc;
  146. int gr, ch;
  147. assert(0 <= eov->bitrate_index && eov->bitrate_index < 16);
  148. assert(0 <= eov->mode_ext && eov->mode_ext < 4);
  149. /* count bitrate indices */
  150. eov->bitrate_channelmode_hist[eov->bitrate_index][4]++;
  151. eov->bitrate_channelmode_hist[15][4]++;
  152. /* count 'em for every mode extension in case of 2 channel encoding */
  153. if (cfg->channels_out == 2) {
  154. eov->bitrate_channelmode_hist[eov->bitrate_index][eov->mode_ext]++;
  155. eov->bitrate_channelmode_hist[15][eov->mode_ext]++;
  156. }
  157. for (gr = 0; gr < cfg->mode_gr; ++gr) {
  158. for (ch = 0; ch < cfg->channels_out; ++ch) {
  159. int bt = gfc->l3_side.tt[gr][ch].block_type;
  160. if (gfc->l3_side.tt[gr][ch].mixed_block_flag)
  161. bt = 4;
  162. eov->bitrate_blocktype_hist[eov->bitrate_index][bt]++;
  163. eov->bitrate_blocktype_hist[eov->bitrate_index][5]++;
  164. eov->bitrate_blocktype_hist[15][bt]++;
  165. eov->bitrate_blocktype_hist[15][5]++;
  166. }
  167. }
  168. }
  169. static void
  170. lame_encode_frame_init(lame_internal_flags * gfc, const sample_t *const inbuf[2])
  171. {
  172. SessionConfig_t const *const cfg = &gfc->cfg;
  173. int ch, gr;
  174. if (gfc->lame_encode_frame_init == 0) {
  175. sample_t primebuff0[286 + 1152 + 576];
  176. sample_t primebuff1[286 + 1152 + 576];
  177. int const framesize = 576 * cfg->mode_gr;
  178. /* prime the MDCT/polyphase filterbank with a short block */
  179. int i, j;
  180. gfc->lame_encode_frame_init = 1;
  181. memset(primebuff0, 0, sizeof(primebuff0));
  182. memset(primebuff1, 0, sizeof(primebuff1));
  183. for (i = 0, j = 0; i < 286 + 576 * (1 + cfg->mode_gr); ++i) {
  184. if (i < framesize) {
  185. primebuff0[i] = 0;
  186. if (cfg->channels_out == 2)
  187. primebuff1[i] = 0;
  188. }
  189. else {
  190. primebuff0[i] = inbuf[0][j];
  191. if (cfg->channels_out == 2)
  192. primebuff1[i] = inbuf[1][j];
  193. ++j;
  194. }
  195. }
  196. /* polyphase filtering / mdct */
  197. for (gr = 0; gr < cfg->mode_gr; gr++) {
  198. for (ch = 0; ch < cfg->channels_out; ch++) {
  199. gfc->l3_side.tt[gr][ch].block_type = SHORT_TYPE;
  200. }
  201. }
  202. mdct_sub48(gfc, primebuff0, primebuff1);
  203. /* check FFT will not use a negative starting offset */
  204. #if 576 < FFTOFFSET
  205. # error FFTOFFSET greater than 576: FFT uses a negative offset
  206. #endif
  207. /* check if we have enough data for FFT */
  208. assert(gfc->sv_enc.mf_size >= (BLKSIZE + framesize - FFTOFFSET));
  209. /* check if we have enough data for polyphase filterbank */
  210. assert(gfc->sv_enc.mf_size >= (512 + framesize - 32));
  211. }
  212. }
  213. /************************************************************************
  214. *
  215. * encodeframe() Layer 3
  216. *
  217. * encode a single frame
  218. *
  219. ************************************************************************
  220. lame_encode_frame()
  221. gr 0 gr 1
  222. inbuf: |--------------|--------------|--------------|
  223. Polyphase (18 windows, each shifted 32)
  224. gr 0:
  225. window1 <----512---->
  226. window18 <----512---->
  227. gr 1:
  228. window1 <----512---->
  229. window18 <----512---->
  230. MDCT output: |--------------|--------------|--------------|
  231. FFT's <---------1024---------->
  232. <---------1024-------->
  233. inbuf = buffer of PCM data size=MP3 framesize
  234. encoder acts on inbuf[ch][0], but output is delayed by MDCTDELAY
  235. so the MDCT coefficints are from inbuf[ch][-MDCTDELAY]
  236. psy-model FFT has a 1 granule delay, so we feed it data for the
  237. next granule.
  238. FFT is centered over granule: 224+576+224
  239. So FFT starts at: 576-224-MDCTDELAY
  240. MPEG2: FFT ends at: BLKSIZE+576-224-MDCTDELAY (1328)
  241. MPEG1: FFT ends at: BLKSIZE+2*576-224-MDCTDELAY (1904)
  242. MPEG2: polyphase first window: [0..511]
  243. 18th window: [544..1055] (1056)
  244. MPEG1: 36th window: [1120..1631] (1632)
  245. data needed: 512+framesize-32
  246. A close look newmdct.c shows that the polyphase filterbank
  247. only uses data from [0..510] for each window. Perhaps because the window
  248. used by the filterbank is zero for the last point, so Takehiro's
  249. code doesn't bother to compute with it.
  250. FFT starts at 576-224-MDCTDELAY (304) = 576-FFTOFFSET
  251. */
  252. typedef FLOAT chgrdata[2][2];
  253. int
  254. lame_encode_mp3_frame( /* Output */
  255. lame_internal_flags * gfc, /* Context */
  256. sample_t const *inbuf_l, /* Input */
  257. sample_t const *inbuf_r, /* Input */
  258. unsigned char *mp3buf, /* Output */
  259. int mp3buf_size)
  260. { /* Output */
  261. SessionConfig_t const *const cfg = &gfc->cfg;
  262. int mp3count;
  263. III_psy_ratio masking_LR[2][2]; /*LR masking & energy */
  264. III_psy_ratio masking_MS[2][2]; /*MS masking & energy */
  265. const III_psy_ratio (*masking)[2]; /*pointer to selected maskings */
  266. const sample_t *inbuf[2];
  267. FLOAT tot_ener[2][4];
  268. FLOAT ms_ener_ratio[2] = { .5, .5 };
  269. FLOAT pe[2][2] = { {0., 0.}, {0., 0.} }, pe_MS[2][2] = { {
  270. 0., 0.}, {
  271. 0., 0.}};
  272. FLOAT (*pe_use)[2];
  273. int ch, gr;
  274. inbuf[0] = inbuf_l;
  275. inbuf[1] = inbuf_r;
  276. if (gfc->lame_encode_frame_init == 0) {
  277. /*first run? */
  278. lame_encode_frame_init(gfc, inbuf);
  279. }
  280. /********************** padding *****************************/
  281. /* padding method as described in
  282. * "MPEG-Layer3 / Bitstream Syntax and Decoding"
  283. * by Martin Sieler, Ralph Sperschneider
  284. *
  285. * note: there is no padding for the very first frame
  286. *
  287. * Robert Hegemann 2000-06-22
  288. */
  289. gfc->ov_enc.padding = FALSE;
  290. if ((gfc->sv_enc.slot_lag -= gfc->sv_enc.frac_SpF) < 0) {
  291. gfc->sv_enc.slot_lag += cfg->samplerate_out;
  292. gfc->ov_enc.padding = TRUE;
  293. }
  294. /****************************************
  295. * Stage 1: psychoacoustic model *
  296. ****************************************/
  297. {
  298. /* psychoacoustic model
  299. * psy model has a 1 granule (576) delay that we must compensate for
  300. * (mt 6/99).
  301. */
  302. int ret;
  303. const sample_t *bufp[2] = {0, 0}; /* address of beginning of left & right granule */
  304. int blocktype[2];
  305. for (gr = 0; gr < cfg->mode_gr; gr++) {
  306. for (ch = 0; ch < cfg->channels_out; ch++) {
  307. bufp[ch] = &inbuf[ch][576 + gr * 576 - FFTOFFSET];
  308. }
  309. ret = L3psycho_anal_vbr(gfc, bufp, gr,
  310. masking_LR, masking_MS,
  311. pe[gr], pe_MS[gr], tot_ener[gr], blocktype);
  312. if (ret != 0)
  313. return -4;
  314. if (cfg->mode == JOINT_STEREO) {
  315. ms_ener_ratio[gr] = tot_ener[gr][2] + tot_ener[gr][3];
  316. if (ms_ener_ratio[gr] > 0)
  317. ms_ener_ratio[gr] = tot_ener[gr][3] / ms_ener_ratio[gr];
  318. }
  319. /* block type flags */
  320. for (ch = 0; ch < cfg->channels_out; ch++) {
  321. gr_info *const cod_info = &gfc->l3_side.tt[gr][ch];
  322. cod_info->block_type = blocktype[ch];
  323. cod_info->mixed_block_flag = 0;
  324. }
  325. }
  326. }
  327. /* auto-adjust of ATH, useful for low volume */
  328. adjust_ATH(gfc);
  329. /****************************************
  330. * Stage 2: MDCT *
  331. ****************************************/
  332. /* polyphase filtering / mdct */
  333. mdct_sub48(gfc, inbuf[0], inbuf[1]);
  334. /****************************************
  335. * Stage 3: MS/LR decision *
  336. ****************************************/
  337. /* Here will be selected MS or LR coding of the 2 stereo channels */
  338. gfc->ov_enc.mode_ext = MPG_MD_LR_LR;
  339. if (cfg->force_ms) {
  340. gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
  341. }
  342. else if (cfg->mode == JOINT_STEREO) {
  343. /* ms_ratio = is scaled, for historical reasons, to look like
  344. a ratio of side_channel / total.
  345. 0 = signal is 100% mono
  346. .5 = L & R uncorrelated
  347. */
  348. /* [0] and [1] are the results for the two granules in MPEG-1,
  349. * in MPEG-2 it's only a faked averaging of the same value
  350. * _prev is the value of the last granule of the previous frame
  351. * _next is the value of the first granule of the next frame
  352. */
  353. FLOAT sum_pe_MS = 0;
  354. FLOAT sum_pe_LR = 0;
  355. for (gr = 0; gr < cfg->mode_gr; gr++) {
  356. for (ch = 0; ch < cfg->channels_out; ch++) {
  357. sum_pe_MS += pe_MS[gr][ch];
  358. sum_pe_LR += pe[gr][ch];
  359. }
  360. }
  361. /* based on PE: M/S coding would not use much more bits than L/R */
  362. if (sum_pe_MS <= 1.00 * sum_pe_LR) {
  363. gr_info const *const gi0 = &gfc->l3_side.tt[0][0];
  364. gr_info const *const gi1 = &gfc->l3_side.tt[cfg->mode_gr - 1][0];
  365. if (gi0[0].block_type == gi0[1].block_type && gi1[0].block_type == gi1[1].block_type) {
  366. gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
  367. }
  368. }
  369. }
  370. /* bit and noise allocation */
  371. if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
  372. masking = (const III_psy_ratio (*)[2])masking_MS; /* use MS masking */
  373. pe_use = pe_MS;
  374. }
  375. else {
  376. masking = (const III_psy_ratio (*)[2])masking_LR; /* use LR masking */
  377. pe_use = pe;
  378. }
  379. /* copy data for MP3 frame analyzer */
  380. if (cfg->analysis && gfc->pinfo != NULL) {
  381. for (gr = 0; gr < cfg->mode_gr; gr++) {
  382. for (ch = 0; ch < cfg->channels_out; ch++) {
  383. gfc->pinfo->ms_ratio[gr] = 0;
  384. gfc->pinfo->ms_ener_ratio[gr] = ms_ener_ratio[gr];
  385. gfc->pinfo->blocktype[gr][ch] = gfc->l3_side.tt[gr][ch].block_type;
  386. gfc->pinfo->pe[gr][ch] = pe_use[gr][ch];
  387. memcpy(gfc->pinfo->xr[gr][ch], &gfc->l3_side.tt[gr][ch].xr[0], sizeof(FLOAT) * 576);
  388. /* in psymodel, LR and MS data was stored in pinfo.
  389. switch to MS data: */
  390. if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
  391. gfc->pinfo->ers[gr][ch] = gfc->pinfo->ers[gr][ch + 2];
  392. memcpy(gfc->pinfo->energy[gr][ch], gfc->pinfo->energy[gr][ch + 2],
  393. sizeof(gfc->pinfo->energy[gr][ch]));
  394. }
  395. }
  396. }
  397. }
  398. /****************************************
  399. * Stage 4: quantization loop *
  400. ****************************************/
  401. if (cfg->vbr == vbr_off || cfg->vbr == vbr_abr) {
  402. static FLOAT const fircoef[9] = {
  403. -0.0207887 * 5, -0.0378413 * 5, -0.0432472 * 5, -0.031183 * 5,
  404. 7.79609e-18 * 5, 0.0467745 * 5, 0.10091 * 5, 0.151365 * 5,
  405. 0.187098 * 5
  406. };
  407. int i;
  408. FLOAT f;
  409. for (i = 0; i < 18; i++)
  410. gfc->sv_enc.pefirbuf[i] = gfc->sv_enc.pefirbuf[i + 1];
  411. f = 0.0;
  412. for (gr = 0; gr < cfg->mode_gr; gr++)
  413. for (ch = 0; ch < cfg->channels_out; ch++)
  414. f += pe_use[gr][ch];
  415. gfc->sv_enc.pefirbuf[18] = f;
  416. f = gfc->sv_enc.pefirbuf[9];
  417. for (i = 0; i < 9; i++)
  418. f += (gfc->sv_enc.pefirbuf[i] + gfc->sv_enc.pefirbuf[18 - i]) * fircoef[i];
  419. f = (670 * 5 * cfg->mode_gr * cfg->channels_out) / f;
  420. for (gr = 0; gr < cfg->mode_gr; gr++) {
  421. for (ch = 0; ch < cfg->channels_out; ch++) {
  422. pe_use[gr][ch] *= f;
  423. }
  424. }
  425. }
  426. switch (cfg->vbr)
  427. {
  428. default:
  429. case vbr_off:
  430. CBR_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
  431. break;
  432. case vbr_abr:
  433. ABR_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
  434. break;
  435. case vbr_rh:
  436. VBR_old_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
  437. break;
  438. case vbr_mt:
  439. case vbr_mtrh:
  440. VBR_new_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
  441. break;
  442. }
  443. /****************************************
  444. * Stage 5: bitstream formatting *
  445. ****************************************/
  446. /* write the frame to the bitstream */
  447. (void) format_bitstream(gfc);
  448. /* copy mp3 bit buffer into array */
  449. mp3count = copy_buffer(gfc, mp3buf, mp3buf_size, 1);
  450. if (cfg->write_lame_tag) {
  451. AddVbrFrame(gfc);
  452. }
  453. if (cfg->analysis && gfc->pinfo != NULL) {
  454. int framesize = 576 * cfg->mode_gr;
  455. for (ch = 0; ch < cfg->channels_out; ch++) {
  456. int j;
  457. for (j = 0; j < FFTOFFSET; j++)
  458. gfc->pinfo->pcmdata[ch][j] = gfc->pinfo->pcmdata[ch][j + framesize];
  459. for (j = FFTOFFSET; j < 1600; j++) {
  460. gfc->pinfo->pcmdata[ch][j] = inbuf[ch][j - FFTOFFSET];
  461. }
  462. }
  463. gfc->sv_qnt.masking_lower = 1.0;
  464. set_frame_pinfo(gfc, masking);
  465. }
  466. ++gfc->ov_enc.frame_number;
  467. updateStats(gfc);
  468. return mp3count;
  469. }