1
0

blake2sp.cpp 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. /*
  2. BLAKE2 reference source code package - reference C implementations
  3. Written in 2012 by Samuel Neves <[email protected]>
  4. To the extent possible under law, the author(s) have dedicated all copyright
  5. and related and neighboring rights to this software to the public domain
  6. worldwide. This software is distributed without any warranty.
  7. You should have received a copy of the CC0 Public Domain Dedication along with
  8. this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
  9. */
  10. #define PARALLELISM_DEGREE 8
  11. void blake2sp_init( blake2sp_state *S )
  12. {
  13. memset( S->buf, 0, sizeof( S->buf ) );
  14. S->buflen = 0;
  15. blake2s_init_param( &S->R, 0, 1 ); // Init root.
  16. for( uint i = 0; i < PARALLELISM_DEGREE; ++i )
  17. blake2s_init_param( &S->S[i], i, 0 ); // Init leaf.
  18. S->R.last_node = 1;
  19. S->S[PARALLELISM_DEGREE - 1].last_node = 1;
  20. }
  21. struct Blake2ThreadData
  22. {
  23. void Update();
  24. blake2s_state *S;
  25. const byte *in;
  26. size_t inlen;
  27. };
  28. void Blake2ThreadData::Update()
  29. {
  30. size_t inlen__ = inlen;
  31. const byte *in__ = ( const byte * )in;
  32. while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
  33. {
  34. #ifdef USE_SSE
  35. // We gain 5% in i7 SSE mode by prefetching next data block.
  36. if (_SSE_Version>=SSE_SSE && inlen__ >= 2 * PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES)
  37. _mm_prefetch((char*)(in__ + PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES), _MM_HINT_T0);
  38. #endif
  39. blake2s_update( S, in__, BLAKE2S_BLOCKBYTES );
  40. in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
  41. inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
  42. }
  43. }
  44. #ifdef RAR_SMP
  45. THREAD_PROC(Blake2Thread)
  46. {
  47. Blake2ThreadData *td=(Blake2ThreadData *)Data;
  48. td->Update();
  49. }
  50. #endif
  51. void blake2sp_update( blake2sp_state *S, const byte *in, size_t inlen )
  52. {
  53. size_t left = S->buflen;
  54. size_t fill = sizeof( S->buf ) - left;
  55. if( left && inlen >= fill )
  56. {
  57. memcpy( S->buf + left, in, fill );
  58. for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
  59. blake2s_update( &S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
  60. in += fill;
  61. inlen -= fill;
  62. left = 0;
  63. }
  64. Blake2ThreadData btd_array[PARALLELISM_DEGREE];
  65. #ifdef RAR_SMP
  66. uint ThreadNumber = inlen < 0x1000 ? 1 : S->MaxThreads;
  67. if (ThreadNumber==6 || ThreadNumber==7) // 6 and 7 threads work slower than 4 here.
  68. ThreadNumber=4;
  69. #else
  70. uint ThreadNumber=1;
  71. #endif
  72. for (size_t id__=0;id__<PARALLELISM_DEGREE;)
  73. {
  74. for (uint Thread=0;Thread<ThreadNumber && id__<PARALLELISM_DEGREE;Thread++)
  75. {
  76. Blake2ThreadData *btd=btd_array+Thread;
  77. btd->inlen = inlen;
  78. btd->in = in + id__ * BLAKE2S_BLOCKBYTES;
  79. btd->S = &S->S[id__];
  80. #ifdef RAR_SMP
  81. if (ThreadNumber>1)
  82. S->ThPool->AddTask(Blake2Thread,(void*)btd);
  83. else
  84. btd->Update();
  85. #else
  86. btd->Update();
  87. #endif
  88. id__++;
  89. }
  90. #ifdef RAR_SMP
  91. if (S->ThPool!=NULL) // Can be NULL in -mt1 mode.
  92. S->ThPool->WaitDone();
  93. #endif // RAR_SMP
  94. }
  95. in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
  96. inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
  97. if( inlen > 0 )
  98. memcpy( S->buf + left, in, (size_t)inlen );
  99. S->buflen = left + (size_t)inlen;
  100. }
  101. void blake2sp_final( blake2sp_state *S, byte *digest )
  102. {
  103. byte hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
  104. for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
  105. {
  106. if( S->buflen > i * BLAKE2S_BLOCKBYTES )
  107. {
  108. size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
  109. if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES;
  110. blake2s_update( &S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
  111. }
  112. blake2s_final( &S->S[i], hash[i] );
  113. }
  114. for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
  115. blake2s_update( &S->R, hash[i], BLAKE2S_OUTBYTES );
  116. blake2s_final( &S->R, digest );
  117. }