From cd8f45d313fe760d7f71a425bdbb567afac219d1 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov" Date: Thu, 28 May 2015 01:51:26 +0400 Subject: Initial version of GOST 34.11-2012 (aka Streebog) hash core --- streebog_hash/ip/adder_s6.xco | 73 ++++++ streebog_hash/streebog_core_adder_s6.v | 152 ++++++++++++ streebog_hash/streebog_core_lps.v | 405 +++++++++++++++++++++++++++++++ streebog_hash/streebog_hash_top.v | 421 +++++++++++++++++++++++++++++++++ streebog_hash/streebog_rom_a_matrix.v | 152 ++++++++++++ streebog_hash/streebog_rom_c_array.v | 58 +++++ streebog_hash/streebog_rom_s_table.v | 299 +++++++++++++++++++++++ streebog_hash/tb/streebog_tb.v | 198 ++++++++++++++++ 8 files changed, 1758 insertions(+) create mode 100644 streebog_hash/ip/adder_s6.xco create mode 100644 streebog_hash/streebog_core_adder_s6.v create mode 100644 streebog_hash/streebog_core_lps.v create mode 100644 streebog_hash/streebog_hash_top.v create mode 100644 streebog_hash/streebog_rom_a_matrix.v create mode 100644 streebog_hash/streebog_rom_c_array.v create mode 100644 streebog_hash/streebog_rom_s_table.v create mode 100644 streebog_hash/tb/streebog_tb.v (limited to 'streebog_hash') diff --git a/streebog_hash/ip/adder_s6.xco b/streebog_hash/ip/adder_s6.xco new file mode 100644 index 0000000..23b7d94 --- /dev/null +++ b/streebog_hash/ip/adder_s6.xco @@ -0,0 +1,73 @@ +############################################################## +# +# Xilinx Core Generator version 14.7 +# Date: Tue Mar 24 19:41:47 2015 +# +############################################################## +# +# This file contains the customisation parameters for a +# Xilinx CORE Generator IP GUI. It is strongly recommended +# that you do not manually alter this file as it may cause +# unexpected and unsupported behavior. +# +############################################################## +# +# Generated from component: xilinx.com:ip:c_addsub:11.0 +# +############################################################## +# +# BEGIN Project Options +SET addpads = false +SET asysymbol = true +SET busformat = BusFormatAngleBracketNotRipped +SET createndf = false +SET designentry = Verilog +SET device = xc6slx45 +SET devicefamily = spartan6 +SET flowvendor = Other +SET formalverification = false +SET foundationsym = false +SET implementationfiletype = Ngc +SET package = csg324 +SET removerpms = false +SET simulationfiles = Behavioral +SET speedgrade = -3 +SET verilogsim = true +SET vhdlsim = false +# END Project Options +# BEGIN Select +SELECT Adder_Subtracter xilinx.com:ip:c_addsub:11.0 +# END Select +# BEGIN Parameters +CSET a_type=Unsigned +CSET a_width=32 +CSET add_mode=Add +CSET ainit_value=0 +CSET b_constant=false +CSET b_type=Unsigned +CSET b_value=00000000000000000000000000000000 +CSET b_width=32 +CSET borrow_sense=Active_Low +CSET bypass=false +CSET bypass_ce_priority=CE_Overrides_Bypass +CSET bypass_sense=Active_High +CSET c_in=true +CSET c_out=true +CSET ce=true +CSET component_name=adder_s6 +CSET implementation=DSP48 +CSET latency=1 +CSET latency_configuration=Manual +CSET out_width=32 +CSET sclr=false +CSET sinit=false +CSET sinit_value=0 +CSET sset=false +CSET sync_ce_priority=Sync_Overrides_CE +CSET sync_ctrl_priority=Reset_Overrides_Set +# END Parameters +# BEGIN Extra information +MISC pkg_timestamp=2013-07-22T10:35:41Z +# END Extra information +GENERATE +# CRC: 13f690be diff --git a/streebog_hash/streebog_core_adder_s6.v b/streebog_hash/streebog_core_adder_s6.v new file mode 100644 index 0000000..3c254eb --- /dev/null +++ b/streebog_hash/streebog_core_adder_s6.v @@ -0,0 +1,152 @@ +`timescale 1ns / 1ps + +module streebog_core_adder_s6 + ( + clk, + ena, rdy, + x, y, sum + ); + + + // + // Ports + // + input wire clk; // core clock + input wire ena; // start addition flag + output wire rdy; // addition done flag (sum is valid) + input wire [511:0] x; // item x + input wire [511:0] y; // item y + output wire [511:0] sum; // x+y + + + /* + * ISE cannot synthesize adders using fabric that are more than 256 bits wide. Items X and Y are 512-bit wide, so + * Spartan-6 DSP blocks are used to overcome this issue. Every DSP block is configured to add 32 bits at a time, + * so total of 512/32=16 DSP blocks are required to implement addition. Every DSP block is configured to expose + * carry input and output ports. Overflow at 512-bit boundary should be ignored according to the specification, + * that's why only 15 intermediate carry lines are required. + * + * +-------------------+-------------------+- -+-------------------+ + * [X] | 511 : 480 | 479 : 448 | ... | 31 : 0 | + * +------*------------+------*------------+- -+------*------------+ + * | | | + * +------|------------+------|------------+- -+------|------------+ + * [Y] | | 511 : 480 | | 479 : 448 | ... | | 31 : 0 | + * +------|-----*------+------|------------+- -+------|------------+ + * | | | | | | + * | | | | | | + * v v v v v v + * +---+-+---+ +---+-+---+ +---+-+---+ + * | A | | B | | A | | B | | A | | B | + * +---------+ +---+-+---+ +---+-+---+ + * | DSP #15 | | DSP #15 | | DSP #0 | + * |---------| |---------| |---------| + * | Carry | | Carry | | Carry | + * X --<-Out In-<--C[14]--<-Out In-<--C[13]- ... -C[ 0]--<-Out In-<-- 0 + * +---------+ +---------+ +---------+ + * | S | | S | | S | + * +---------+ +---------+ +---------+ + * | | | + * v v v + * +---------*---------+---------*---------+- -+---------*---------+ + * [Z] | 511 : 480 | 479 : 448 | ... | 31 : 0 | + * +-------------------+-------------------+- -+-------------------+ + * + */ + + + // + // Internals + // + wire [511:0] z; // concatenated outputs of adders + wire [14:0] z_carry; // carry lines + reg [511:0] sum_reg; // output register + + assign sum = sum_reg; + + + // + // Shift Register + // + + /* + * This shift register is re-loaded with "walking one" bit pattern whenever enable + * input is active and adder core is ready. The most significant bit [17] acts as a + * ready flag. Lower 16 bits [15:0] control DSP blocks (Clock Enable). Intermediate + * bit [16] is required to compensate for 1-cycle latency of DSP blocks. + * + */ + + reg [17: 0] ce_shreg = {1'b1, 1'b0, 16'h0000}; + + assign rdy = ce_shreg[17]; + + + // + // Shift Register Logic + // + always @(posedge clk) + // + if (! rdy) ce_shreg <= {ce_shreg[16:0], 1'b0}; + else if (ena) ce_shreg <= {1'b0, 1'b0, 16'h0001}; + + + // + // Output Register Logic + // + always @(posedge clk) + // + if (ce_shreg[16] == 1'b1) sum_reg <= z; + + + // + // LSB Adder + // + adder_s6 adder_s6_lsb + ( + .clk (clk), // + .ce (ce_shreg[0]), // clock enable [0] + .a (x[ 31: 0]), // + .b (y[ 31: 0]), // + .s (z[ 31: 0]), // + .c_in (1'b0), // carry input tied to 0 + .c_out (z_carry[0]) // carry[0] to next adder + ); + + + // + // MSB Adder + // + adder_s6 adder_s6_msb + ( + .clk (clk), // + .ce (ce_shreg[15]), // clock enable [15] + .a (x[511:480]), // + .b (y[511:480]), // + .s (z[511:480]), // + .c_in (z_carry[14]), // carry[14] from previous adder + .c_out () // carry output not connected + ); + + + // + // Intermediate Adders + // + genvar i; + generate for (i=1; i<=14; i=i+1) + begin: gen_adder_s6 + adder_s6 adder_s6_int + ( + .clk (clk), // + .ce (ce_shreg[i]), // clock enable [1..14] + .a (x[32*i+31:32*i]), // + .b (y[32*i+31:32*i]), // + .s (z[32*i+31:32*i]), // + .c_in (z_carry[i-1]), // carry[0..13] from previous adder + .c_out (z_carry[i]) // carry[1..14] to next adder + ); + end + endgenerate + + +endmodule diff --git a/streebog_hash/streebog_core_lps.v b/streebog_hash/streebog_core_lps.v new file mode 100644 index 0000000..a668f16 --- /dev/null +++ b/streebog_hash/streebog_core_lps.v @@ -0,0 +1,405 @@ +`timescale 1ns / 1ps + +module streebog_core_lps + ( + clk, + ena, rdy, last, + din, dout + ); + + + // + // Parameters + // + parameter PS_PIPELINE_STAGES = 8; // 2, 4, 8 + parameter L_PIPELINE_STAGES = 8; // 2, 4, 8, 16, 32, 64 + + + // + // Ports + // + input wire clk; // core clock + input wire ena; // start transformation flag + output wire rdy; // transformation done flag (dout is valid) + output wire last; // transformation about to complete (rdy flag will be asserted during the next cycle) + input wire [511:0] din; // input data to transform + output wire [511:0] dout; // output data (result of transformation) + + + /* + * This LPS core has parametrized internal pipeline. P and S transformations are combined into one PS transformation and + * have common pipeline. L transformation has its own separate pipeline. The total latency of this core is thus + * PS_PIPELINE_STAGES*L_PIPELINE_STAGES. The fastest version completes the tranformation in 2*2=4 cycles, the slowest + * version requires 8*64=512 cycles. S transformation substitutes bytes according to a lookup table. P transformation does + * permutation of input bytes. L transformation multiplies input data by a special predefined matrix. If you don't understand + * how matrices are multiplied, you should not try to understand how the following code works. This may damage your brain. + * You've been warned. Seriously. + * + */ + + + // + // Constants + // + + /* + * PS transformation operates on 64-bit words. Input data contains 512/64=8 such words. + * Depending on PS pipeline stage count we can transform 1, 2 or 4 words at a time. + * + * L transformation operates on 64-bit words. Depending on L pipeline stage count we + * can transform 1, 2, 4, 8, 16 or 32 bits of a word at a time. + * + */ + + localparam PS_WORDS_AT_ONCE = 8 / PS_PIPELINE_STAGES; + localparam L_BITS_AT_ONCE = 64 / L_PIPELINE_STAGES; + + /* + * These functions return number of bytes needed to store pipeline stage counters. They will + * also prevent users from specifying illegal pipeline widths . This module will not synthesize + * with invalid pipeline stage count, because counter width will not be explicitely defined. + * + */ + + function integer PS_NUM_COUNT_BITS; + input integer x; + begin + case (x) + 2: PS_NUM_COUNT_BITS = 1; + 4: PS_NUM_COUNT_BITS = 2; + 8: PS_NUM_COUNT_BITS = 3; + endcase + end + endfunction + + function integer L_NUM_COUNT_BITS; + input integer y; + begin + case (y) + 2: L_NUM_COUNT_BITS = 1; + 4: L_NUM_COUNT_BITS = 2; + 8: L_NUM_COUNT_BITS = 3; + 16: L_NUM_COUNT_BITS = 4; + 32: L_NUM_COUNT_BITS = 5; + 64: L_NUM_COUNT_BITS = 6; + endcase + end + endfunction + + + // + // Counter Widths + // + localparam L_CNT_BITS = L_NUM_COUNT_BITS(L_PIPELINE_STAGES); // width of L counter + localparam PS_CNT_BITS = PS_NUM_COUNT_BITS(PS_PIPELINE_STAGES); // width of PS counter + + + // + // Input Multiplexor + // + wire [63: 0] din_mux[0:7]; // eight 64-bit words + + /* + * This multiplexor does the P transformation. P transformation is effectively a matrix + * transposition. Input 512-bit word is treated as a 8x8 byte matrix. Multiplexor outputs + * a set of 8 64-bit words. These words are columns of the original matrix (transposition + * turns rows into colums). + * + */ + + genvar i, j; + generate for (i=0; i<8; i=i+1) + begin: gen_din_mux_i + for (j=0; j<8; j=j+1) begin: gen_din_mux_j + assign din_mux[i][8*j + 7 : 8*j] = din[64*j + 8*i + 7 : 64*j + 8*i]; + end + end + endgenerate + + + // + // Output Multiplexor + // + reg [63: 0] dout_mux[0:7]; // eight 64-bit words + + /* + * Output 64-bit subwords are concatenated to form output 512-bit word. + * + */ + + genvar k; + generate for (k=0; k<8; k=k+1) + begin: gen_dout_mux + assign dout[64*k+63:64*k] = dout_mux[k]; + end + endgenerate + + + // + // PS and L Counters + // + + /* + * These counters control internal data flow of this core. For example, if PS has 2 stages and + * L has 4 stages, then the count will look like this: + * ____ + * ENA \\\________________________________ + * _____ _ + * RDY ^ \_______________________________/ + * | | | | | | | | | | + * +----+---+---+---+---+---+---+---+---+---+- + * | PS | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | + * +----+---+---+---+---+---+---+---+---+---+- + * | L | 0 | 1 | 2 | 3 | 0 | 1 | 2 | 3 | 0 | + * +----+---+---+---+---+---+---+---+---+---+- + * ^ ^ | + * | | +--> both counters will be zero during the last cycle + * | | + * +---------------+------------------> preloading of new word(s) into S lookup table(s) + * + */ + + reg [ L_CNT_BITS-1:0] l_count = { L_CNT_BITS{1'b0}}; + reg [PS_CNT_BITS-1:0] ps_count = {PS_CNT_BITS{1'b0}}; + + + // + // Handy Flags + // + + /* + * These flags are used instead of lengthy (z_count == {Z_CNT_BITS{1'bZ}}) comparisons. + * + */ + + wire l_count_done = ( l_count == { L_CNT_BITS{1'b1}}) ? 1 : 0; + wire ps_count_done = (ps_count == {PS_CNT_BITS{1'b1}}) ? 1 : 0; + + wire l_count_zero = ( l_count == { L_CNT_BITS{1'b0}}) ? 1 : 0; + wire ps_count_zero = (ps_count == {PS_CNT_BITS{1'b0}}) ? 1 : 0; + + + // + // Preload Flags + // + + /* + * These flags are used as clock enables for S lookup table. + * + */ + + wire ps_preload_first = (rdy && ena); + wire ps_preload_next = (!rdy && !ps_count_zero && l_count_zero); + + + // + // Last Flag + // + + /* + * This flag indicates that core operation is about to complete. + * + */ + assign last = !rdy && ps_count_zero && l_count_zero; + + + // + // Counter Logic + // + always @(posedge clk) begin + // + if (!rdy && l_count_done) ps_count <= ps_count + 1'b1; // next word(s) + // + if (rdy && ena) l_count <= l_count + 1'b1; // start of transformation + // + if (!rdy && !(ps_count_zero && l_count_zero)) l_count <= l_count + 1'b1; // next part of word(s) + // + end + + + // + // Ready Output Register + // + reg rdy_reg = 1'b1; + assign rdy = rdy_reg; + + + // + // Ready Set and Clear Logic + // + always @(posedge clk) begin + // + if (rdy && ena) rdy_reg <= 0; // start of transformation + // + if (!rdy && l_count_zero && ps_count_zero) rdy_reg <= 1; // end of transformation + // + end + + + // + // S Table Indices + // + + /* + * To transform several words at once a set of indices is required. + * + */ + + wire [ 2: 0] s_in_offset [0:PS_WORDS_AT_ONCE-1]; // indices of words being transformed + wire [63: 0] s_out [0:PS_WORDS_AT_ONCE-1]; // output words of S transformation + + assign s_in_offset[0] = ps_count * PS_WORDS_AT_ONCE; // the first index is defined by PS counter, + // following indices are linearly increasing + + genvar sw, sb; // word and byte counter + generate for (sw=1; sw 4'd0) ? 1 : 0; // transformation has been started + wire round_count_not_done = (round_count < STREEBOG_NUM_ROUNDS) ? 1 : 0; // transformation has not been finished + + + /* + * Compression procedure includes 13 rounds. To perform every round we need to know + * round key. This implementation uses two parallel LPS cores. The first LPS core (key core) + * is used to produce round keys, the second LPS core (data core) is used to encrypt message block. + * + * Data core is not activated during the first round, because round key is not yet known during + * the first round. During the second round, key core computes next (second) round key, while data core encrypts + * mesage block using first round key and so on. The last compression round doesn't include encryption step. + * Instead of it simple XOR operation is used. + * + * Compression procedure requires 13 key calculations and 12 data encryptions. LPS cores operate according to + * the following schedule: + * + * + * +----------+----------+----------+- -+----------+ + * Round Count | 0 | 1 | 2 | ... | 12 | + * +----------+----------+----------+- -+----------+ + * Key Core | KEY #0 | KEY #1 | KEY #2 | ... | KEY #12 | + * +----------+----------+----------+- -+----------+ + * Data Core | Idle | DATA #0 | DATA #1 | ... | DATA #11 | + * +----------+----------+----------+- -+----------+ + * + */ + + + // + // LPS Core for Round Key Calculation + // + reg [511:0] lps_key_in; // + wire [511:0] lps_key_out; // + wire lps_key_ena; // + wire lps_key_last; // + wire lps_key_rdy; // + + wire lps_key_ena_update = (fsm_state == FSM_STATE_UPDATE_LPS_TRIG) ? 1 : 0; + wire lps_key_ena_final_n = (fsm_state == FSM_STATE_FINAL_N_LPS_TRIG) ? 1 : 0; + wire lps_key_ena_final_sigma = (fsm_state == FSM_STATE_FINAL_SIGMA_LPS_TRIG) ? 1 : 0; + + assign lps_key_ena = lps_key_ena_update || lps_key_ena_final_n || lps_key_ena_final_sigma; + + streebog_core_lps # + ( + .PS_PIPELINE_STAGES (PS_PIPELINE_STAGES), + .L_PIPELINE_STAGES (L_PIPELINE_STAGES) + ) + lps_key + ( + .clk (clock), + .ena (lps_key_ena), + .rdy (lps_key_rdy), + .last (lps_key_last), + .din (lps_key_in), + .dout (lps_key_out) + ); + + + // + // LPS Core for Block Compression + // + reg [511:0] lps_data_in; + wire [511:0] lps_data_out; + wire lps_data_ena; + wire lps_data_last; + wire lps_data_rdy; + + assign lps_data_ena = lps_key_ena & round_count_active; + + streebog_core_lps # + ( + .PS_PIPELINE_STAGES (PS_PIPELINE_STAGES), + .L_PIPELINE_STAGES (L_PIPELINE_STAGES) + ) + lps_data + ( + .clk (clock), + .ena (lps_data_ena), + .rdy (lps_data_rdy), + .last (lps_data_last), + .din (lps_data_in), + .dout (lps_data_out) + ); + + + /* + * According to specification, internal state must be updated after compression, this + * involves addition of two pairs of 512-bit numbers. This operation is done in two + * parallel summation cores. The first core updates N register, the second core updates + * Sigma register. Summation is triggered before LPS cores are activated. Actual update + * of N and Sigma occurs after completion of compression procedure. + * + */ + + + // + // Summation Trigger Flag + // + wire adder_trig = (fsm_state == FSM_STATE_UPDATE_ADDER_TRIG) ? 1 : 0; + + + // + // Block Length Adder (N = N + |M|) + // + wire [511:0] adder_n_sum; + wire adder_n_rdy; + + streebog_core_adder_s6 adder_n + ( + .clk (clock), + .ena (adder_trig), + .rdy (adder_n_rdy), + .x (N), + .y ({{502{1'b0}}, block_length}), + .sum (adder_n_sum) + ); + + + // + // Message Adder (Sigma = Sigma + M) + // + wire [511:0] adder_sigma_sum; + wire adder_sigma_rdy; + + streebog_core_adder_s6 adder_sigma + ( + .clk (clock), + .ena (adder_trig), + .rdy (adder_sigma_rdy), + .x (Sigma), + .y (block), + .sum (adder_sigma_sum) + ); + + + // + // Handy Flags + // + wire lps_last_both = lps_key_last & lps_data_last; + wire lps_rdy_both = lps_key_rdy & lps_data_rdy; + wire adder_rdy_both = adder_n_rdy & adder_sigma_rdy; + + + /* + * Operation of this core is controlled by FSM logic. Ready flag is embedded in state encoding. FSM goes out of + * idle state when init/update/final flags become active. Init flag has priority over update and final flags. + * Update flag has priority over final flag. + * + */ + + + // + // FSM States + // + localparam FSM_STATE_IDLE = 4'b1_00_0; // core is idle + // + localparam FSM_STATE_UPDATE_LPS_TRIG = 4'b0_00_0; // core is triggering gN(h,m) transformation + localparam FSM_STATE_UPDATE_LPS_WAIT = 4'b0_00_1; // core is waiting for transformation to complete + // + localparam FSM_STATE_UPDATE_ADDER_TRIG = 4'b0_11_0; // core is triggering summation + localparam FSM_STATE_UPDATE_ADDER_WAIT = 4'b0_11_1; // core is waiting for summation to complete + // + localparam FSM_STATE_FINAL_N_LPS_TRIG = 4'b0_01_0; // core is triggering g0(h,N) transformation + localparam FSM_STATE_FINAL_N_LPS_WAIT = 4'b0_01_1; // core is waiting for transformation to complete + // + localparam FSM_STATE_FINAL_SIGMA_LPS_TRIG = 4'b0_10_0; // core is triggering g0(h,Sigma) transformation + localparam FSM_STATE_FINAL_SIGMA_LPS_WAIT = 4'b0_10_1; // core is waiting for transformation of complete + + + // + // FSM State Register and Core Ready Flag + // + reg [ 3: 0] fsm_state = FSM_STATE_IDLE; + assign ready = fsm_state[3]; + + + // + // FSM Transition Logic + // + always @(posedge clock) begin + // + case (fsm_state) + // + // init + // + FSM_STATE_IDLE: begin + if (!init && update) fsm_state <= FSM_STATE_UPDATE_ADDER_TRIG; + if (!init && !update && final) fsm_state <= FSM_STATE_FINAL_N_LPS_TRIG; + end + // + // update -> gN(h,m) + // + FSM_STATE_UPDATE_ADDER_TRIG: fsm_state <= FSM_STATE_UPDATE_LPS_TRIG; + FSM_STATE_UPDATE_LPS_TRIG: fsm_state <= FSM_STATE_UPDATE_LPS_WAIT; + FSM_STATE_UPDATE_LPS_WAIT: + if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_UPDATE_LPS_TRIG : FSM_STATE_UPDATE_ADDER_WAIT; + FSM_STATE_UPDATE_ADDER_WAIT: + if (adder_rdy_both) fsm_state <= FSM_STATE_IDLE; + // + // final -> g0(h,N) + // + FSM_STATE_FINAL_N_LPS_TRIG: fsm_state <= FSM_STATE_FINAL_N_LPS_WAIT; + FSM_STATE_FINAL_N_LPS_WAIT: + if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_FINAL_N_LPS_TRIG : FSM_STATE_FINAL_SIGMA_LPS_TRIG; + // + // final -> g0(h,Sigma) + // + FSM_STATE_FINAL_SIGMA_LPS_TRIG: fsm_state <= FSM_STATE_FINAL_SIGMA_LPS_WAIT; + FSM_STATE_FINAL_SIGMA_LPS_WAIT: + if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_FINAL_SIGMA_LPS_TRIG : FSM_STATE_IDLE; + // + // default + // + default: fsm_state <= FSM_STATE_IDLE; + // + endcase + // + end + + + /* + * Key calculation involves 12 round constants. These constants are stored in an array. The first key + * (calculated during the first round) does not require a constant. New constant is preloaded during the last + * cycle of LPS transformation. LPS cores have dedicated output flag indicating that operation is about to complete. + * This flag is used as Clock Enable. Constants are preloaded during rounds 1-12 and are used during rounds 2-13. + * + */ + + // + // Round Constants + // + wire [511:0] c_array_out; + + wire c_array_ena_update = (fsm_state == FSM_STATE_UPDATE_LPS_WAIT) ? 1 : 0; + wire c_array_ena_final_n = (fsm_state == FSM_STATE_FINAL_N_LPS_WAIT) ? 1 : 0; + wire c_array_ena_final_sigma = (fsm_state == FSM_STATE_FINAL_SIGMA_LPS_WAIT) ? 1 : 0; + + wire c_array_ena = lps_key_last && round_count_not_done && (c_array_ena_update || c_array_ena_final_n || c_array_ena_final_sigma); + + // + (* ROM_STYLE="BLOCK" *) + // + streebog_rom_c_array c_array + ( + .clk (clock), + .ena (c_array_ena), + .din (round_count), + .dout (c_array_out) + ); + + /* + * The following pieces of code take care of LPS and summation inputs and outputs, they also take care + * of output digest register and corresponding valid flag. + * + */ + + + // + // Internal State Control Logic + // + always @(posedge clock) + // + case (fsm_state) + + FSM_STATE_IDLE: if (init) begin + h <= (short_mode == 1'b1) ? STREEBOG_IV_256 : STREEBOG_IV_512; + N <= {512{1'b0}}; + Sigma <= {512{1'b0}}; + end + + FSM_STATE_UPDATE_ADDER_WAIT: if (adder_rdy_both) begin + N <= adder_n_sum; + Sigma <= adder_sigma_sum; + end + + FSM_STATE_UPDATE_LPS_WAIT: + if (lps_key_rdy && !round_count_not_done) + h <= lps_key_out ^ lps_data_out ^ h ^ block; + + FSM_STATE_FINAL_N_LPS_WAIT: + if (lps_key_rdy && !round_count_not_done) + h <= lps_key_out ^ lps_data_out ^ h ^ N; + + endcase + + + // + // Output Register Control Logic + // + always @(posedge clock) + // + case (fsm_state) + + FSM_STATE_IDLE: if (init) begin + digest_reg <= {512{1'bX}}; + digest_valid_reg <= 1'b0; + end + + FSM_STATE_FINAL_SIGMA_LPS_WAIT: + if (lps_key_rdy && !round_count_not_done) begin + digest_reg <= lps_key_out ^ lps_data_out ^ h ^ Sigma; + digest_valid_reg <= 1'b1; + end + + endcase + + + // + // Round Count Logic + // + always @(posedge clock) + // + case (fsm_state) + + FSM_STATE_IDLE: + if (update || final) round_count <= 4'd0; + + FSM_STATE_UPDATE_LPS_WAIT, + FSM_STATE_FINAL_N_LPS_WAIT, + FSM_STATE_FINAL_SIGMA_LPS_WAIT: + if (lps_key_rdy) round_count <= round_count_not_done ? round_count + 1'b1 : 4'd0; + + endcase + + + // + // Key and Data LPS Cores Logic + // + always @(posedge clock) + // + case (fsm_state) + + FSM_STATE_IDLE: if (!init) begin + if (update) lps_key_in <= h ^ N; + if (!update && final) lps_key_in <= h; + end + + FSM_STATE_UPDATE_LPS_WAIT: + if (lps_key_rdy && round_count_not_done) begin + lps_key_in <= lps_key_out ^ c_array_out; + lps_data_in <= lps_key_out ^ (round_count_active ? lps_data_out : block); + end + + FSM_STATE_FINAL_N_LPS_WAIT: if (lps_key_rdy) begin + lps_key_in <= lps_key_out ^ (round_count_not_done ? c_array_out : lps_data_out ^ h ^ N); + lps_data_in <= round_count_not_done ? lps_key_out ^ (round_count_active ? lps_data_out : N) : {512{1'bX}}; + end + + FSM_STATE_FINAL_SIGMA_LPS_WAIT: + if (lps_key_rdy && round_count_not_done) begin + lps_key_in <= lps_key_out ^ c_array_out; + lps_data_in <= round_count_active ? lps_key_out ^ lps_data_out : lps_key_out ^ Sigma; + end + + endcase + + +endmodule diff --git a/streebog_hash/streebog_rom_a_matrix.v b/streebog_hash/streebog_rom_a_matrix.v new file mode 100644 index 0000000..ba3607b --- /dev/null +++ b/streebog_hash/streebog_rom_a_matrix.v @@ -0,0 +1,152 @@ +`timescale 1ns / 1ps + +module streebog_rom_a_matrix + ( + clk, + din, dout + ); + + + // + // Ports + // + input wire clk; + input wire [ 5: 0] din; + output wire [63: 0] dout; + + + // + // Output Register + // + reg [63: 0] dout_reg; + assign dout = dout_reg; + + + // + // A Transformation Matrix + // + + /* + * Original matrix from the standard was transformed to allow efficient implementation of + * hardware multiplication. The following matrix is effectively the transposed version + * of the original matrix A with reversed row order. + * + * Original 64x64 bit matrix from the standard has the following form: + * + * a[i,j] is 1-bit matrix element + * + * A_row(i) is 64-bit row of matrix + * A_col(j) is 64-bit column of matrix + * + * + * A_col(0) A_col(1) A_col(62) A_col(63) + * | | | | + * | | | | + * +----------------------------------------------+ + * | a[ 0,63] a[ 0,62] ... a[ 0, 1] a[ 0, 0] | --A_row(0) + * | a[ 1,63] a[ 1,62] ... a[ 1, 1] a[ 1, 0] | --A_row(1) + * | ... | + * | a[62,63] a[62,62] ... a[62, 1] a[62, 0] | --A_row(62) + * | a[63,63] a[63,62] ... a[63, 1] a[63, 0] | --A_row(63) + * +----------------------------------------------+ + * + * + * A_row(0)...A_row(63) are given in the original specification. Instead of row vectors we need a set of + * column vectors A_col(0)...A_col(63). A_col() can be obtained by transposing A_row(). + * + * + * A_row(0) A_row(1) A_row(62) A_row(63) + * | | | | + * | | | | + * +---------------------------------------------+ + * | a[ 0,63] a[ 1,63] ... a[62,63] a[63,63] | --A_col(0) + * | a[ 0,62] a[ 1,62] ... a[62,62] a[63,62] | --A_col(1) + * | ... | + * | a[ 0, 1] a[ 1, 1] ... a[62, 1] a[63, 1] | --A_col(62) + * | a[ 0, 0] a[ 1, 0] ... a[62, 0] a[63, 0] | --A_col(63) + * +---------------------------------------------+ + * + * + * The only problem with A_col() is that original 64-bit A_row() values in the standard are written from MSB to LSB. That implies that + * original matrix columns are numbered from 63 to 0, while matrix rows are numbered from 0 to 63. Because of that we need to reverse + * row order after transposition. Original matrix had element a[0,0] in A_row(0), but after transposition element a[0,0] turns out + * to be in A_col(63), not in A_col(0). Because of that addresses inside of case() below are reversed. This effectively reverses + * the order in which A_col() follow. + * + */ + + always @(posedge clk) begin + // + case (din) + // + 6'h3F: dout_reg <= 64'hB18285C0BA4F9506; + 6'h3E: dout_reg <= 64'h584142605DA7CA83; + 6'h3D: dout_reg <= 64'h2CA021302E53E5C1; + 6'h3C: dout_reg <= 64'h16509098172972E0; + 6'h3B: dout_reg <= 64'hBA2A4D8C315B2C76; + 6'h3A: dout_reg <= 64'hEC172386A2E2833D; + 6'h39: dout_reg <= 64'hC7091403EB3E5418; + 6'h38: dout_reg <= 64'h63040A81759F2A0C; + 6'h37: dout_reg <= 64'h025DA344601EA1B8; + 6'h36: dout_reg <= 64'h012ED1A2308FD05C; + 6'h35: dout_reg <= 64'h8017685198C7E8AE; + 6'h34: dout_reg <= 64'h408BB4284C63F457; + 6'h33: dout_reg <= 64'h2218F9D046AFDB13; + 6'h32: dout_reg <= 64'h13515FACC3C94CB1; + 6'h31: dout_reg <= 64'h0B758C12817A87E0; + 6'h30: dout_reg <= 64'h05BA4689C03D4370; + 6'h2F: dout_reg <= 64'hA1F0C986411102CC; + 6'h2E: dout_reg <= 64'hD0F864C3A0080166; + 6'h2D: dout_reg <= 64'hE87CB2E1508480B3; + 6'h2C: dout_reg <= 64'hF4BED9F0A8C24059; + 6'h2B: dout_reg <= 64'hDB2F257E95702260; + 6'h2A: dout_reg <= 64'h4C67DB398BA913FC; + 6'h29: dout_reg <= 64'h87C3241A04450B32; + 6'h28: dout_reg <= 64'h43E1920D82220599; + 6'h27: dout_reg <= 64'hE0802541868B1232; + 6'h26: dout_reg <= 64'h704012A0C3458999; + 6'h25: dout_reg <= 64'hB8208950E12244CC; + 6'h24: dout_reg <= 64'h5C1044A8F011A266; + 6'h23: dout_reg <= 64'h4E0887957E834381; + 6'h22: dout_reg <= 64'hC704668B394AB3F2; + 6'h21: dout_reg <= 64'h830296041A2E4BCB; + 6'h20: dout_reg <= 64'hC1014B820D172565; + 6'h1F: dout_reg <= 64'h7DD80C6D98218914; + 6'h1E: dout_reg <= 64'h3E6C06B64C90440A; + 6'h1D: dout_reg <= 64'h9F36835B26C8A285; + 6'h1C: dout_reg <= 64'h4F1BC1AD93E45142; + 6'h1B: dout_reg <= 64'hDA55ECBBD1D3A135; + 6'h1A: dout_reg <= 64'h10727AB0F048598E; + 6'h19: dout_reg <= 64'hF56131B560852553; + 6'h18: dout_reg <= 64'hFAB018DA30421229; + 6'h17: dout_reg <= 64'h82B12139880C7F01; + 6'h16: dout_reg <= 64'h4158909CC4063F80; + 6'h15: dout_reg <= 64'hA02CC8CEE2831F40; + 6'h14: dout_reg <= 64'h5016E46771C10F20; + 6'h13: dout_reg <= 64'h2ABAD30AB0ECF811; + 6'h12: dout_reg <= 64'h17EC48BC507A0309; + 6'h11: dout_reg <= 64'h09C785E72031FE05; + 6'h10: dout_reg <= 64'h046342731018FF02; + 6'h0F: dout_reg <= 64'h91E9E113A54E2B57; + 6'h0E: dout_reg <= 64'h4874F009522715AB; + 6'h0D: dout_reg <= 64'hA43AF804A9138A55; + 6'h0C: dout_reg <= 64'hD21D7C825409C5AA; + 6'h0B: dout_reg <= 64'h78E75F528F4A4982; + 6'h0A: dout_reg <= 64'hAD9ACEBA62EB0F16; + 6'h09: dout_reg <= 64'h47A4864E943BAC5C; + 6'h08: dout_reg <= 64'h23D2C3274A9D56AE; + 6'h07: dout_reg <= 64'h06016A5C89D498B1; + 6'h06: dout_reg <= 64'h8380B5AE446A4C58; + 6'h05: dout_reg <= 64'hC140DA57A2B5262C; + 6'h04: dout_reg <= 64'hE0206DAB51DA9316; + 6'h03: dout_reg <= 64'h7611DC09A1B9D1BA; + 6'h02: dout_reg <= 64'h3D0984585908F0EC; + 6'h01: dout_reg <= 64'h1805A870255060C7; + 6'h00: dout_reg <= 64'h0C02D4B812A83063; + // + endcase // case(din) + // + end // always @(posedge clk) + + +endmodule diff --git a/streebog_hash/streebog_rom_c_array.v b/streebog_hash/streebog_rom_c_array.v new file mode 100644 index 0000000..e31b5c0 --- /dev/null +++ b/streebog_hash/streebog_rom_c_array.v @@ -0,0 +1,58 @@ +`timescale 1ns / 1ps + +module streebog_rom_c_array + ( + clk, ena, + din, dout + ); + + + // + // Ports + // + input wire clk; + input wire ena; + input wire [ 3:0] din; + output wire [511:0] dout; + + + // + // Output Register + // + reg [511:0] dout_reg; + assign dout = dout_reg; + + + // + // C Round Constants Array + // + always @(posedge clk) begin + // + if (ena) begin + // + case (din) + // + 4'h0: dout_reg <= 512'hB1085BDA1ECADAE9EBCB2F81C0657C1F2F6A76432E45D016714EB88D7585C4FC4B7CE09192676901A2422A08A460D31505767436CC744D23DD806559F2A64507; + 4'h1: dout_reg <= 512'h6FA3B58AA99D2F1A4FE39D460F70B5D7F3FEEA720A232B9861D55E0F16B501319AB5176B12D699585CB561C2DB0AA7CA55DDA21BD7CBCD56E679047021B19BB7; + 4'h2: dout_reg <= 512'hF574DCAC2BCE2FC70A39FC286A3D843506F15E5F529C1F8BF2EA7514B1297B7BD3E20FE490359EB1C1C93A376062DB09C2B6F443867ADB31991E96F50ABA0AB2; + 4'h3: dout_reg <= 512'hEF1FDFB3E81566D2F948E1A05D71E4DD488E857E335C3C7D9D721CAD685E353FA9D72C82ED03D675D8B71333935203BE3453EAA193E837F1220CBEBC84E3D12E; + // + 4'h4: dout_reg <= 512'h4BEA6BACAD4747999A3F410C6CA923637F151C1F1686104A359E35D7800FFFBDBFCD1747253AF5A3DFFF00B723271A167A56A27EA9EA63F5601758FD7C6CFE57; + 4'h5: dout_reg <= 512'hAE4FAEAE1D3AD3D96FA4C33B7A3039C02D66C4F95142A46C187F9AB49AF08EC6CFFAA6B71C9AB7B40AF21F66C2BEC6B6BF71C57236904F35FA68407A46647D6E; + 4'h6: dout_reg <= 512'hF4C70E16EEAAC5EC51AC86FEBF240954399EC6C7E6BF87C9D3473E33197A93C90992ABC52D822C3706476983284A05043517454CA23C4AF38886564D3A14D493; + 4'h7: dout_reg <= 512'h9B1F5B424D93C9A703E7AA020C6E41414EB7F8719C36DE1E89B4443B4DDBC49AF4892BCB929B069069D18D2BD1A5C42F36ACC2355951A8D9A47F0DD4BF02E71E; + // + 4'h8: dout_reg <= 512'h378F5A541631229B944C9AD8EC165FDE3A7D3A1B258942243CD955B7E00D0984800A440BDBB2CEB17B2B8A9AA6079C540E38DC92CB1F2A607261445183235ADB; + 4'h9: dout_reg <= 512'hABBEDEA680056F52382AE548B2E4F3F38941E71CFF8A78DB1FFFE18A1B3361039FE76702AF69334B7A1E6C303B7652F43698FAD1153BB6C374B4C7FB98459CED; + 4'hA: dout_reg <= 512'h7BCD9ED0EFC889FB3002C6CD635AFE94D8FA6BBBEBAB076120018021148466798A1D71EFEA48B9CAEFBACD1D7D476E98DEA2594AC06FD85D6BCAA4CD81F32D1B; + 4'hB: dout_reg <= 512'h378EE767F11631BAD21380B00449B17ACDA43C32BCDF1D77F82012D430219F9B5D80EF9D1891CC86E71DA4AA88E12852FAF417D5D9B21B9948BC924AF11BD720; + // + default: dout_reg <= {512{1'bX}}; + // + endcase // case (din) + // + end // if (ena) + // + end // always @(posedge clk) + +endmodule diff --git a/streebog_hash/streebog_rom_s_table.v b/streebog_hash/streebog_rom_s_table.v new file mode 100644 index 0000000..9779b0f --- /dev/null +++ b/streebog_hash/streebog_rom_s_table.v @@ -0,0 +1,299 @@ +`timescale 1ns / 1ps + +module streebog_rom_s_table + ( + clk, ena, + din, dout + ); + + + // + // Ports + // + input wire clk; + input wire ena; + input wire [ 7: 0] din; + output wire [ 7: 0] dout; + + + // + // Output Register + // + reg [ 7: 0] dout_reg; + assign dout = dout_reg; + + + // + // S Transformation Lookup Table + // + always @(posedge clk) begin + // + if (ena) begin + // + case (din) + // + 8'h00: dout_reg <= 8'hFC; + 8'h01: dout_reg <= 8'hEE; + 8'h02: dout_reg <= 8'hDD; + 8'h03: dout_reg <= 8'h11; + 8'h04: dout_reg <= 8'hCF; + 8'h05: dout_reg <= 8'h6E; + 8'h06: dout_reg <= 8'h31; + 8'h07: dout_reg <= 8'h16; + 8'h08: dout_reg <= 8'hFB; + 8'h09: dout_reg <= 8'hC4; + 8'h0A: dout_reg <= 8'hFA; + 8'h0B: dout_reg <= 8'hDA; + 8'h0C: dout_reg <= 8'h23; + 8'h0D: dout_reg <= 8'hC5; + 8'h0E: dout_reg <= 8'h04; + 8'h0F: dout_reg <= 8'h4D; + 8'h10: dout_reg <= 8'hE9; + 8'h11: dout_reg <= 8'h77; + 8'h12: dout_reg <= 8'hF0; + 8'h13: dout_reg <= 8'hDB; + 8'h14: dout_reg <= 8'h93; + 8'h15: dout_reg <= 8'h2E; + 8'h16: dout_reg <= 8'h99; + 8'h17: dout_reg <= 8'hBA; + 8'h18: dout_reg <= 8'h17; + 8'h19: dout_reg <= 8'h36; + 8'h1A: dout_reg <= 8'hF1; + 8'h1B: dout_reg <= 8'hBB; + 8'h1C: dout_reg <= 8'h14; + 8'h1D: dout_reg <= 8'hCD; + 8'h1E: dout_reg <= 8'h5F; + 8'h1F: dout_reg <= 8'hC1; + 8'h20: dout_reg <= 8'hF9; + 8'h21: dout_reg <= 8'h18; + 8'h22: dout_reg <= 8'h65; + 8'h23: dout_reg <= 8'h5A; + 8'h24: dout_reg <= 8'hE2; + 8'h25: dout_reg <= 8'h5C; + 8'h26: dout_reg <= 8'hEF; + 8'h27: dout_reg <= 8'h21; + 8'h28: dout_reg <= 8'h81; + 8'h29: dout_reg <= 8'h1C; + 8'h2A: dout_reg <= 8'h3C; + 8'h2B: dout_reg <= 8'h42; + 8'h2C: dout_reg <= 8'h8B; + 8'h2D: dout_reg <= 8'h01; + 8'h2E: dout_reg <= 8'h8E; + 8'h2F: dout_reg <= 8'h4F; + 8'h30: dout_reg <= 8'h05; + 8'h31: dout_reg <= 8'h84; + 8'h32: dout_reg <= 8'h02; + 8'h33: dout_reg <= 8'hAE; + 8'h34: dout_reg <= 8'hE3; + 8'h35: dout_reg <= 8'h6A; + 8'h36: dout_reg <= 8'h8F; + 8'h37: dout_reg <= 8'hA0; + 8'h38: dout_reg <= 8'h06; + 8'h39: dout_reg <= 8'h0B; + 8'h3A: dout_reg <= 8'hED; + 8'h3B: dout_reg <= 8'h98; + 8'h3C: dout_reg <= 8'h7F; + 8'h3D: dout_reg <= 8'hD4; + 8'h3E: dout_reg <= 8'hD3; + 8'h3F: dout_reg <= 8'h1F; + 8'h40: dout_reg <= 8'hEB; + 8'h41: dout_reg <= 8'h34; + 8'h42: dout_reg <= 8'h2C; + 8'h43: dout_reg <= 8'h51; + 8'h44: dout_reg <= 8'hEA; + 8'h45: dout_reg <= 8'hC8; + 8'h46: dout_reg <= 8'h48; + 8'h47: dout_reg <= 8'hAB; + 8'h48: dout_reg <= 8'hF2; + 8'h49: dout_reg <= 8'h2A; + 8'h4A: dout_reg <= 8'h68; + 8'h4B: dout_reg <= 8'hA2; + 8'h4C: dout_reg <= 8'hFD; + 8'h4D: dout_reg <= 8'h3A; + 8'h4E: dout_reg <= 8'hCE; + 8'h4F: dout_reg <= 8'hCC; + 8'h50: dout_reg <= 8'hB5; + 8'h51: dout_reg <= 8'h70; + 8'h52: dout_reg <= 8'h0E; + 8'h53: dout_reg <= 8'h56; + 8'h54: dout_reg <= 8'h08; + 8'h55: dout_reg <= 8'h0C; + 8'h56: dout_reg <= 8'h76; + 8'h57: dout_reg <= 8'h12; + 8'h58: dout_reg <= 8'hBF; + 8'h59: dout_reg <= 8'h72; + 8'h5A: dout_reg <= 8'h13; + 8'h5B: dout_reg <= 8'h47; + 8'h5C: dout_reg <= 8'h9C; + 8'h5D: dout_reg <= 8'hB7; + 8'h5E: dout_reg <= 8'h5D; + 8'h5F: dout_reg <= 8'h87; + 8'h60: dout_reg <= 8'h15; + 8'h61: dout_reg <= 8'hA1; + 8'h62: dout_reg <= 8'h96; + 8'h63: dout_reg <= 8'h29; + 8'h64: dout_reg <= 8'h10; + 8'h65: dout_reg <= 8'h7B; + 8'h66: dout_reg <= 8'h9A; + 8'h67: dout_reg <= 8'hC7; + 8'h68: dout_reg <= 8'hF3; + 8'h69: dout_reg <= 8'h91; + 8'h6A: dout_reg <= 8'h78; + 8'h6B: dout_reg <= 8'h6F; + 8'h6C: dout_reg <= 8'h9D; + 8'h6D: dout_reg <= 8'h9E; + 8'h6E: dout_reg <= 8'hB2; + 8'h6F: dout_reg <= 8'hB1; + 8'h70: dout_reg <= 8'h32; + 8'h71: dout_reg <= 8'h75; + 8'h72: dout_reg <= 8'h19; + 8'h73: dout_reg <= 8'h3D; + 8'h74: dout_reg <= 8'hFF; + 8'h75: dout_reg <= 8'h35; + 8'h76: dout_reg <= 8'h8A; + 8'h77: dout_reg <= 8'h7E; + 8'h78: dout_reg <= 8'h6D; + 8'h79: dout_reg <= 8'h54; + 8'h7A: dout_reg <= 8'hC6; + 8'h7B: dout_reg <= 8'h80; + 8'h7C: dout_reg <= 8'hC3; + 8'h7D: dout_reg <= 8'hBD; + 8'h7E: dout_reg <= 8'h0D; + 8'h7F: dout_reg <= 8'h57; + 8'h80: dout_reg <= 8'hDF; + 8'h81: dout_reg <= 8'hF5; + 8'h82: dout_reg <= 8'h24; + 8'h83: dout_reg <= 8'hA9; + 8'h84: dout_reg <= 8'h3E; + 8'h85: dout_reg <= 8'hA8; + 8'h86: dout_reg <= 8'h43; + 8'h87: dout_reg <= 8'hC9; + 8'h88: dout_reg <= 8'hD7; + 8'h89: dout_reg <= 8'h79; + 8'h8A: dout_reg <= 8'hD6; + 8'h8B: dout_reg <= 8'hF6; + 8'h8C: dout_reg <= 8'h7C; + 8'h8D: dout_reg <= 8'h22; + 8'h8E: dout_reg <= 8'hB9; + 8'h8F: dout_reg <= 8'h03; + 8'h90: dout_reg <= 8'hE0; + 8'h91: dout_reg <= 8'h0F; + 8'h92: dout_reg <= 8'hEC; + 8'h93: dout_reg <= 8'hDE; + 8'h94: dout_reg <= 8'h7A; + 8'h95: dout_reg <= 8'h94; + 8'h96: dout_reg <= 8'hB0; + 8'h97: dout_reg <= 8'hBC; + 8'h98: dout_reg <= 8'hDC; + 8'h99: dout_reg <= 8'hE8; + 8'h9A: dout_reg <= 8'h28; + 8'h9B: dout_reg <= 8'h50; + 8'h9C: dout_reg <= 8'h4E; + 8'h9D: dout_reg <= 8'h33; + 8'h9E: dout_reg <= 8'h0A; + 8'h9F: dout_reg <= 8'h4A; + 8'hA0: dout_reg <= 8'hA7; + 8'hA1: dout_reg <= 8'h97; + 8'hA2: dout_reg <= 8'h60; + 8'hA3: dout_reg <= 8'h73; + 8'hA4: dout_reg <= 8'h1E; + 8'hA5: dout_reg <= 8'h00; + 8'hA6: dout_reg <= 8'h62; + 8'hA7: dout_reg <= 8'h44; + 8'hA8: dout_reg <= 8'h1A; + 8'hA9: dout_reg <= 8'hB8; + 8'hAA: dout_reg <= 8'h38; + 8'hAB: dout_reg <= 8'h82; + 8'hAC: dout_reg <= 8'h64; + 8'hAD: dout_reg <= 8'h9F; + 8'hAE: dout_reg <= 8'h26; + 8'hAF: dout_reg <= 8'h41; + 8'hB0: dout_reg <= 8'hAD; + 8'hB1: dout_reg <= 8'h45; + 8'hB2: dout_reg <= 8'h46; + 8'hB3: dout_reg <= 8'h92; + 8'hB4: dout_reg <= 8'h27; + 8'hB5: dout_reg <= 8'h5E; + 8'hB6: dout_reg <= 8'h55; + 8'hB7: dout_reg <= 8'h2F; + 8'hB8: dout_reg <= 8'h8C; + 8'hB9: dout_reg <= 8'hA3; + 8'hBA: dout_reg <= 8'hA5; + 8'hBB: dout_reg <= 8'h7D; + 8'hBC: dout_reg <= 8'h69; + 8'hBD: dout_reg <= 8'hD5; + 8'hBE: dout_reg <= 8'h95; + 8'hBF: dout_reg <= 8'h3B; + 8'hC0: dout_reg <= 8'h07; + 8'hC1: dout_reg <= 8'h58; + 8'hC2: dout_reg <= 8'hB3; + 8'hC3: dout_reg <= 8'h40; + 8'hC4: dout_reg <= 8'h86; + 8'hC5: dout_reg <= 8'hAC; + 8'hC6: dout_reg <= 8'h1D; + 8'hC7: dout_reg <= 8'hF7; + 8'hC8: dout_reg <= 8'h30; + 8'hC9: dout_reg <= 8'h37; + 8'hCA: dout_reg <= 8'h6B; + 8'hCB: dout_reg <= 8'hE4; + 8'hCC: dout_reg <= 8'h88; + 8'hCD: dout_reg <= 8'hD9; + 8'hCE: dout_reg <= 8'hE7; + 8'hCF: dout_reg <= 8'h89; + 8'hD0: dout_reg <= 8'hE1; + 8'hD1: dout_reg <= 8'h1B; + 8'hD2: dout_reg <= 8'h83; + 8'hD3: dout_reg <= 8'h49; + 8'hD4: dout_reg <= 8'h4C; + 8'hD5: dout_reg <= 8'h3F; + 8'hD6: dout_reg <= 8'hF8; + 8'hD7: dout_reg <= 8'hFE; + 8'hD8: dout_reg <= 8'h8D; + 8'hD9: dout_reg <= 8'h53; + 8'hDA: dout_reg <= 8'hAA; + 8'hDB: dout_reg <= 8'h90; + 8'hDC: dout_reg <= 8'hCA; + 8'hDD: dout_reg <= 8'hD8; + 8'hDE: dout_reg <= 8'h85; + 8'hDF: dout_reg <= 8'h61; + 8'hE0: dout_reg <= 8'h20; + 8'hE1: dout_reg <= 8'h71; + 8'hE2: dout_reg <= 8'h67; + 8'hE3: dout_reg <= 8'hA4; + 8'hE4: dout_reg <= 8'h2D; + 8'hE5: dout_reg <= 8'h2B; + 8'hE6: dout_reg <= 8'h09; + 8'hE7: dout_reg <= 8'h5B; + 8'hE8: dout_reg <= 8'hCB; + 8'hE9: dout_reg <= 8'h9B; + 8'hEA: dout_reg <= 8'h25; + 8'hEB: dout_reg <= 8'hD0; + 8'hEC: dout_reg <= 8'hBE; + 8'hED: dout_reg <= 8'hE5; + 8'hEE: dout_reg <= 8'h6C; + 8'hEF: dout_reg <= 8'h52; + 8'hF0: dout_reg <= 8'h59; + 8'hF1: dout_reg <= 8'hA6; + 8'hF2: dout_reg <= 8'h74; + 8'hF3: dout_reg <= 8'hD2; + 8'hF4: dout_reg <= 8'hE6; + 8'hF5: dout_reg <= 8'hF4; + 8'hF6: dout_reg <= 8'hB4; + 8'hF7: dout_reg <= 8'hC0; + 8'hF8: dout_reg <= 8'hD1; + 8'hF9: dout_reg <= 8'h66; + 8'hFA: dout_reg <= 8'hAF; + 8'hFB: dout_reg <= 8'hC2; + 8'hFC: dout_reg <= 8'h39; + 8'hFD: dout_reg <= 8'h4B; + 8'hFE: dout_reg <= 8'h63; + 8'hFF: dout_reg <= 8'hB6; + // + endcase // case (din) + // + end // if (ena) + // + end // always @(posedge clk) + + +endmodule diff --git a/streebog_hash/tb/streebog_tb.v b/streebog_hash/tb/streebog_tb.v new file mode 100644 index 0000000..291f11c --- /dev/null +++ b/streebog_hash/tb/streebog_tb.v @@ -0,0 +1,198 @@ +`timescale 1ns / 1ps + +module streebog_tb; + + + localparam STREEBOG_MODE_SHORT = 1; + localparam STREEBOG_MODE_LONG = 0; + + // short message that fits into one block + localparam [511:0] MSG_SINGLE = 512'h01323130393837363534333231303938373635343332313039383736353433323130393837363534333231303938373635343332313039383736353433323130; + + // length of short message in bits + localparam [ 9:0] MSG_SINGLE_LENGTH = 10'd504; + + // correct 512-bit digest of short message + localparam [511:0] MSG_SINGLE_DIGEST_LONG = 512'h486f64c1917879417fef082b3381a4e211c324f074654c38823a7b76f830ad00fa1fbae42b1285c0352f227524bc9ab16254288dd6863dccd5b9f54a1ad0541b; + + // correct 256-bit digest of short message + localparam [255:0] MSG_SINGLE_DIGEST_SHORT = 256'h00557be5e584fd52a449b16b0251d05d27f94ab76cbaa6da890b59d8ef1e159d; + + + // first block of long message + localparam [511:0] MSG_DOUBLE_FIRST = 512'hfbeafaebef20fffbf0e1e0f0f520e0ed20e8ece0ebe5f0f2f120fff0eeec20f120faf2fee5e2202ce8f6f3ede220e8e6eee1e8f0f2d1202ce8f0f2e5e220e5d1; + + // second block of long message + localparam [511:0] MSG_DOUBLE_SECOND = 512'h0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001fbe2e5f0eee3c820; + + // length of first part of long message in bits + localparam [ 9:0] MSG_DOUBLE_FIRST_LENGTH = 10'd512; + + // length of second part of long message in bits + localparam [ 9:0] MSG_DOUBLE_SECOND_LENGTH = 10'd64; + + // correct 512-bit digest of long message + localparam [511:0] MSG_DOUBLE_DIGEST_LONG = 512'h28fbc9bada033b1460642bdcddb90c3fb3e56c497ccd0f62b8a2ad4935e85f037613966de4ee00531ae60f3b5a47f8dae06915d5f2f194996fcabf2622e6881e; + + // correct 256-bit digest of short message + localparam [511:0] MSG_DOUBLE_DIGEST_SHORT = 256'h508f7e553c06501d749a66fc28c6cac0b005746d97537fa85d9e40904efed29d; + + + // + // Inputs + // + reg clock; + reg [511:0] block; + reg [ 9:0] block_length; + reg init = 0; + reg update = 0; + reg final = 0; + reg short_mode; + + + // + // Outputs + // + wire [511:0] digest; + wire digest_valid; + wire ready; + + + // + // UUT + // + streebog_hash_top uut + ( + .clock (clock), + + .block (block), + .block_length (block_length), + .init (init), + .update (update), + .final (final), + .short_mode (short_mode), + .digest (digest), + .digest_valid (digest_valid), + .ready (ready) + ); + + // + // Clock + // + initial clock = 1'b0; + always #5 clock = ~clock; + + reg [511:0] hash; + wire [255:0] hash_short = hash[511:256]; + + initial begin + // + #100; + // + $display("Checking 512-bit mode on short message..."); + // + streebog_init(STREEBOG_MODE_LONG); + streebog_set_block(MSG_SINGLE, MSG_SINGLE_LENGTH); + streebog_update(); + streebog_final(); + // + if (hash == MSG_SINGLE_DIGEST_LONG) $display("OK"); + else $display("ERROR: hash == %0128h", hash); + // + #100; + // + $display("Checking 256-bit mode on short message..."); + // + streebog_init(STREEBOG_MODE_SHORT); + streebog_set_block(MSG_SINGLE, MSG_SINGLE_LENGTH); + streebog_update(); + streebog_final(); + // + if (hash_short == MSG_SINGLE_DIGEST_SHORT) $display("OK"); + else $display("ERROR: hash_short == %064h", hash_short); + // + #100; + // + $display("Checking 512-bit mode on long message..."); + // + streebog_init(STREEBOG_MODE_LONG); + streebog_set_block(MSG_DOUBLE_FIRST, MSG_DOUBLE_FIRST_LENGTH); + streebog_update(); + streebog_set_block(MSG_DOUBLE_SECOND, MSG_DOUBLE_SECOND_LENGTH); + streebog_update(); + streebog_final(); + // + if (hash == MSG_DOUBLE_DIGEST_LONG) $display("OK"); + else $display("ERROR: hash == %0128h", hash); + // + #100; + // + $display("Checking 256-bit mode on long message..."); + // + streebog_init(STREEBOG_MODE_SHORT); + streebog_set_block(MSG_DOUBLE_FIRST, MSG_DOUBLE_FIRST_LENGTH); + streebog_update(); + streebog_set_block(MSG_DOUBLE_SECOND, MSG_DOUBLE_SECOND_LENGTH); + streebog_update(); + streebog_final(); + // + if (hash_short == MSG_DOUBLE_DIGEST_SHORT) $display("OK"); + else $display("ERROR: hash_short == %064h", hash_short); + // + #100; + // + $finish; + end + + + task streebog_init; + input use_short_mode; + begin + short_mode = use_short_mode; + init = 1; + #10; + init = 0; + #10; + end + endtask + + + task streebog_set_block; + input [511:0] new_block; + input [ 9:0] new_block_length; + begin + block = new_block; + block_length = new_block_length; + + end + endtask; + + + task streebog_update; + begin + update = 1; + #10; + update = 0; + #10 + while (!ready) #10; + #10; + end + endtask + + + task streebog_final; + begin + final = 1; + #10; + final = 0; + #10 + while (!digest_valid) #10; + hash = digest; + #10; + while (!ready) #10; + #10; + end + endtask + +endmodule + -- cgit v1.2.3