blake2s.cpp 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. // Based on public domain code written in 2012 by Samuel Neves
  2. #include "rar.hpp"
  3. #ifdef USE_SSE
  4. #include "blake2s_sse.cpp"
  5. #endif
  6. static void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth);
  7. static void blake2s_update( blake2s_state *S, const byte *in, size_t inlen );
  8. static void blake2s_final( blake2s_state *S, byte *digest );
  9. #include "blake2sp.cpp"
  10. static const uint32 blake2s_IV[8] =
  11. {
  12. 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
  13. 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
  14. };
  15. static const byte blake2s_sigma[10][16] =
  16. {
  17. { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
  18. { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
  19. { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
  20. { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
  21. { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
  22. { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
  23. { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
  24. { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
  25. { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
  26. { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
  27. };
  28. static inline void blake2s_set_lastnode( blake2s_state *S )
  29. {
  30. S->f[1] = ~0U;
  31. }
  32. /* Some helper functions, not necessarily useful */
  33. static inline void blake2s_set_lastblock( blake2s_state *S )
  34. {
  35. if( S->last_node ) blake2s_set_lastnode( S );
  36. S->f[0] = ~0U;
  37. }
  38. static inline void blake2s_increment_counter( blake2s_state *S, const uint32 inc )
  39. {
  40. S->t[0] += inc;
  41. S->t[1] += ( S->t[0] < inc );
  42. }
  43. /* init2 xors IV with input parameter block */
  44. void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth)
  45. {
  46. #ifdef USE_SSE
  47. if (_SSE_Version>=SSE_SSE2)
  48. blake2s_init_sse();
  49. #endif
  50. S->init(); // Clean data.
  51. for( int i = 0; i < 8; ++i )
  52. S->h[i] = blake2s_IV[i];
  53. S->h[0] ^= 0x02080020; // We use BLAKE2sp parameters block.
  54. S->h[2] ^= node_offset;
  55. S->h[3] ^= (node_depth<<16)|0x20000000;
  56. }
  57. #define G(r,i,m,a,b,c,d) \
  58. a = a + b + m[blake2s_sigma[r][2*i+0]]; \
  59. d = rotr32(d ^ a, 16); \
  60. c = c + d; \
  61. b = rotr32(b ^ c, 12); \
  62. a = a + b + m[blake2s_sigma[r][2*i+1]]; \
  63. d = rotr32(d ^ a, 8); \
  64. c = c + d; \
  65. b = rotr32(b ^ c, 7);
  66. static void blake2s_compress( blake2s_state *S, const byte block[BLAKE2S_BLOCKBYTES] )
  67. {
  68. uint32 m[16];
  69. uint32 v[16];
  70. for( size_t i = 0; i < 16; ++i )
  71. m[i] = RawGet4( block + i * 4 );
  72. for( size_t i = 0; i < 8; ++i )
  73. v[i] = S->h[i];
  74. v[ 8] = blake2s_IV[0];
  75. v[ 9] = blake2s_IV[1];
  76. v[10] = blake2s_IV[2];
  77. v[11] = blake2s_IV[3];
  78. v[12] = S->t[0] ^ blake2s_IV[4];
  79. v[13] = S->t[1] ^ blake2s_IV[5];
  80. v[14] = S->f[0] ^ blake2s_IV[6];
  81. v[15] = S->f[1] ^ blake2s_IV[7];
  82. for ( uint r = 0; r <= 9; ++r ) // No gain on i7 if unrolled, but exe size grows.
  83. {
  84. G(r,0,m,v[ 0],v[ 4],v[ 8],v[12]);
  85. G(r,1,m,v[ 1],v[ 5],v[ 9],v[13]);
  86. G(r,2,m,v[ 2],v[ 6],v[10],v[14]);
  87. G(r,3,m,v[ 3],v[ 7],v[11],v[15]);
  88. G(r,4,m,v[ 0],v[ 5],v[10],v[15]);
  89. G(r,5,m,v[ 1],v[ 6],v[11],v[12]);
  90. G(r,6,m,v[ 2],v[ 7],v[ 8],v[13]);
  91. G(r,7,m,v[ 3],v[ 4],v[ 9],v[14]);
  92. }
  93. for( size_t i = 0; i < 8; ++i )
  94. S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
  95. }
  96. void blake2s_update( blake2s_state *S, const byte *in, size_t inlen )
  97. {
  98. while( inlen > 0 )
  99. {
  100. size_t left = S->buflen;
  101. size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
  102. if( inlen > fill )
  103. {
  104. memcpy( S->buf + left, in, fill ); // Fill buffer
  105. S->buflen += fill;
  106. blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
  107. #ifdef USE_SSE
  108. #ifdef _WIN_32 // We use SSSE3 _mm_shuffle_epi8 only in x64 mode.
  109. if (_SSE_Version>=SSE_SSE2)
  110. #else
  111. if (_SSE_Version>=SSE_SSSE3)
  112. #endif
  113. blake2s_compress_sse( S, S->buf );
  114. else
  115. blake2s_compress( S, S->buf ); // Compress
  116. #else
  117. blake2s_compress( S, S->buf ); // Compress
  118. #endif
  119. memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
  120. S->buflen -= BLAKE2S_BLOCKBYTES;
  121. in += fill;
  122. inlen -= fill;
  123. }
  124. else // inlen <= fill
  125. {
  126. memcpy( S->buf + left, in, (size_t)inlen );
  127. S->buflen += (size_t)inlen; // Be lazy, do not compress
  128. in += inlen;
  129. inlen = 0;
  130. }
  131. }
  132. }
  133. void blake2s_final( blake2s_state *S, byte *digest )
  134. {
  135. if( S->buflen > BLAKE2S_BLOCKBYTES )
  136. {
  137. blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
  138. blake2s_compress( S, S->buf );
  139. S->buflen -= BLAKE2S_BLOCKBYTES;
  140. memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
  141. }
  142. blake2s_increment_counter( S, ( uint32 )S->buflen );
  143. blake2s_set_lastblock( S );
  144. memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
  145. blake2s_compress( S, S->buf );
  146. for( int i = 0; i < 8; ++i ) /* Output full hash */
  147. RawPut4( S->h[i], digest + 4 * i );
  148. }