184 lines
4.9 KiB
C++
184 lines
4.9 KiB
C++
|
// Based on public domain code written in 2012 by Samuel Neves
|
||
|
|
||
|
#include "rar.hpp"
|
||
|
|
||
|
#ifdef USE_SSE
|
||
|
#include "blake2s_sse.cpp"
|
||
|
#endif
|
||
|
|
||
|
static void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth);
|
||
|
static void blake2s_update( blake2s_state *S, const byte *in, size_t inlen );
|
||
|
static void blake2s_final( blake2s_state *S, byte *digest );
|
||
|
|
||
|
#include "blake2sp.cpp"
|
||
|
|
||
|
static const uint32 blake2s_IV[8] =
|
||
|
{
|
||
|
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
|
||
|
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
|
||
|
};
|
||
|
|
||
|
static const byte blake2s_sigma[10][16] =
|
||
|
{
|
||
|
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
||
|
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
||
|
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
||
|
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
||
|
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
||
|
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
||
|
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
||
|
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
||
|
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
||
|
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
||
|
};
|
||
|
|
||
|
static inline void blake2s_set_lastnode( blake2s_state *S )
|
||
|
{
|
||
|
S->f[1] = ~0U;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Some helper functions, not necessarily useful */
|
||
|
static inline void blake2s_set_lastblock( blake2s_state *S )
|
||
|
{
|
||
|
if( S->last_node ) blake2s_set_lastnode( S );
|
||
|
|
||
|
S->f[0] = ~0U;
|
||
|
}
|
||
|
|
||
|
|
||
|
static inline void blake2s_increment_counter( blake2s_state *S, const uint32 inc )
|
||
|
{
|
||
|
S->t[0] += inc;
|
||
|
S->t[1] += ( S->t[0] < inc );
|
||
|
}
|
||
|
|
||
|
|
||
|
/* init2 xors IV with input parameter block */
|
||
|
void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth)
|
||
|
{
|
||
|
#ifdef USE_SSE
|
||
|
if (_SSE_Version>=SSE_SSE2)
|
||
|
blake2s_init_sse();
|
||
|
#endif
|
||
|
|
||
|
S->init(); // Clean data.
|
||
|
for( int i = 0; i < 8; ++i )
|
||
|
S->h[i] = blake2s_IV[i];
|
||
|
|
||
|
S->h[0] ^= 0x02080020; // We use BLAKE2sp parameters block.
|
||
|
S->h[2] ^= node_offset;
|
||
|
S->h[3] ^= (node_depth<<16)|0x20000000;
|
||
|
}
|
||
|
|
||
|
|
||
|
#define G(r,i,m,a,b,c,d) \
|
||
|
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
|
||
|
d = rotr32(d ^ a, 16); \
|
||
|
c = c + d; \
|
||
|
b = rotr32(b ^ c, 12); \
|
||
|
a = a + b + m[blake2s_sigma[r][2*i+1]]; \
|
||
|
d = rotr32(d ^ a, 8); \
|
||
|
c = c + d; \
|
||
|
b = rotr32(b ^ c, 7);
|
||
|
|
||
|
|
||
|
static void blake2s_compress( blake2s_state *S, const byte block[BLAKE2S_BLOCKBYTES] )
|
||
|
{
|
||
|
uint32 m[16];
|
||
|
uint32 v[16];
|
||
|
|
||
|
for( size_t i = 0; i < 16; ++i )
|
||
|
m[i] = RawGet4( block + i * 4 );
|
||
|
|
||
|
for( size_t i = 0; i < 8; ++i )
|
||
|
v[i] = S->h[i];
|
||
|
|
||
|
v[ 8] = blake2s_IV[0];
|
||
|
v[ 9] = blake2s_IV[1];
|
||
|
v[10] = blake2s_IV[2];
|
||
|
v[11] = blake2s_IV[3];
|
||
|
v[12] = S->t[0] ^ blake2s_IV[4];
|
||
|
v[13] = S->t[1] ^ blake2s_IV[5];
|
||
|
v[14] = S->f[0] ^ blake2s_IV[6];
|
||
|
v[15] = S->f[1] ^ blake2s_IV[7];
|
||
|
|
||
|
for ( uint r = 0; r <= 9; ++r ) // No gain on i7 if unrolled, but exe size grows.
|
||
|
{
|
||
|
G(r,0,m,v[ 0],v[ 4],v[ 8],v[12]);
|
||
|
G(r,1,m,v[ 1],v[ 5],v[ 9],v[13]);
|
||
|
G(r,2,m,v[ 2],v[ 6],v[10],v[14]);
|
||
|
G(r,3,m,v[ 3],v[ 7],v[11],v[15]);
|
||
|
G(r,4,m,v[ 0],v[ 5],v[10],v[15]);
|
||
|
G(r,5,m,v[ 1],v[ 6],v[11],v[12]);
|
||
|
G(r,6,m,v[ 2],v[ 7],v[ 8],v[13]);
|
||
|
G(r,7,m,v[ 3],v[ 4],v[ 9],v[14]);
|
||
|
}
|
||
|
|
||
|
for( size_t i = 0; i < 8; ++i )
|
||
|
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||
|
}
|
||
|
|
||
|
|
||
|
void blake2s_update( blake2s_state *S, const byte *in, size_t inlen )
|
||
|
{
|
||
|
while( inlen > 0 )
|
||
|
{
|
||
|
size_t left = S->buflen;
|
||
|
size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
|
||
|
|
||
|
if( inlen > fill )
|
||
|
{
|
||
|
memcpy( S->buf + left, in, fill ); // Fill buffer
|
||
|
S->buflen += fill;
|
||
|
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
|
||
|
|
||
|
#ifdef USE_SSE
|
||
|
#ifdef _WIN_32 // We use SSSE3 _mm_shuffle_epi8 only in x64 mode.
|
||
|
if (_SSE_Version>=SSE_SSE2)
|
||
|
#else
|
||
|
if (_SSE_Version>=SSE_SSSE3)
|
||
|
#endif
|
||
|
blake2s_compress_sse( S, S->buf );
|
||
|
else
|
||
|
blake2s_compress( S, S->buf ); // Compress
|
||
|
#else
|
||
|
blake2s_compress( S, S->buf ); // Compress
|
||
|
#endif
|
||
|
|
||
|
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
|
||
|
S->buflen -= BLAKE2S_BLOCKBYTES;
|
||
|
in += fill;
|
||
|
inlen -= fill;
|
||
|
}
|
||
|
else // inlen <= fill
|
||
|
{
|
||
|
memcpy( S->buf + left, in, (size_t)inlen );
|
||
|
S->buflen += (size_t)inlen; // Be lazy, do not compress
|
||
|
in += inlen;
|
||
|
inlen = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
void blake2s_final( blake2s_state *S, byte *digest )
|
||
|
{
|
||
|
if( S->buflen > BLAKE2S_BLOCKBYTES )
|
||
|
{
|
||
|
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
|
||
|
blake2s_compress( S, S->buf );
|
||
|
S->buflen -= BLAKE2S_BLOCKBYTES;
|
||
|
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
|
||
|
}
|
||
|
|
||
|
blake2s_increment_counter( S, ( uint32 )S->buflen );
|
||
|
blake2s_set_lastblock( S );
|
||
|
memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
|
||
|
blake2s_compress( S, S->buf );
|
||
|
|
||
|
for( int i = 0; i < 8; ++i ) /* Output full hash */
|
||
|
RawPut4( S->h[i], digest + 4 * i );
|
||
|
}
|
||
|
|