summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov <meisterpaul1@yandex.ru>2015-05-28 01:51:26 +0400
committerPavel V. Shatov <meisterpaul1@yandex.ru>2015-05-28 13:01:54 +0400
commitcd8f45d313fe760d7f71a425bdbb567afac219d1 (patch)
tree7a6fc1aad7bd8a1665f9ac76e1305bb0af8ef441
Initial version of GOST 34.11-2012 (aka Streebog) hash coreHEADmaster
-rw-r--r--streebog.v233
-rw-r--r--streebog_hash/ip/adder_s6.xco73
-rw-r--r--streebog_hash/streebog_core_adder_s6.v152
-rw-r--r--streebog_hash/streebog_core_lps.v405
-rw-r--r--streebog_hash/streebog_hash_top.v421
-rw-r--r--streebog_hash/streebog_rom_a_matrix.v152
-rw-r--r--streebog_hash/streebog_rom_c_array.v58
-rw-r--r--streebog_hash/streebog_rom_s_table.v299
-rw-r--r--streebog_hash/tb/streebog_tb.v198
-rw-r--r--streebog_wrapper.v241
10 files changed, 2232 insertions, 0 deletions
diff --git a/streebog.v b/streebog.v
new file mode 100644
index 0000000..cd622e1
--- /dev/null
+++ b/streebog.v
@@ -0,0 +1,233 @@
+module streebog_wrapper
+ (
+ input wire clk,
+ input wire rst,
+
+ input wire cs,
+ input wire we,
+
+ input wire [ 7: 0] address,
+ input wire [31: 0] write_data,
+ output wire [31: 0] read_data
+ );
+
+ //----------------------------------------------------------------
+ // Internal constant and parameter definitions.
+ //----------------------------------------------------------------
+ localparam ADDR_NAME0 = 8'h00;
+ localparam ADDR_NAME1 = 8'h01;
+ localparam ADDR_VERSION = 8'h02;
+
+ localparam ADDR_CTRL = 8'h08; // {short, final, update, init}
+ localparam ADDR_STATUS = 8'h09; // {valid, ready}
+ localparam ADDR_BLOCK_BITS = 8'h0a; // block length in bits
+
+ localparam ADDR_BLOCK0 = 8'h10;
+ localparam ADDR_BLOCK1 = 8'h11;
+ localparam ADDR_BLOCK2 = 8'h12;
+ localparam ADDR_BLOCK3 = 8'h13;
+ localparam ADDR_BLOCK4 = 8'h14;
+ localparam ADDR_BLOCK5 = 8'h15;
+ localparam ADDR_BLOCK6 = 8'h16;
+ localparam ADDR_BLOCK7 = 8'h17;
+ localparam ADDR_BLOCK8 = 8'h18;
+ localparam ADDR_BLOCK9 = 8'h19;
+ localparam ADDR_BLOCK10 = 8'h1a;
+ localparam ADDR_BLOCK11 = 8'h1b;
+ localparam ADDR_BLOCK12 = 8'h1c;
+ localparam ADDR_BLOCK13 = 8'h1d;
+ localparam ADDR_BLOCK14 = 8'h1e;
+ localparam ADDR_BLOCK15 = 8'h1f;
+
+ localparam ADDR_DIGEST0 = 8'h20;
+ localparam ADDR_DIGEST1 = 8'h21;
+ localparam ADDR_DIGEST2 = 8'h22;
+ localparam ADDR_DIGEST3 = 8'h23;
+ localparam ADDR_DIGEST4 = 8'h24;
+ localparam ADDR_DIGEST5 = 8'h25;
+ localparam ADDR_DIGEST6 = 8'h26;
+ localparam ADDR_DIGEST7 = 8'h27;
+ localparam ADDR_DIGEST8 = 8'h28;
+ localparam ADDR_DIGEST9 = 8'h29;
+ localparam ADDR_DIGEST10 = 8'h2a;
+ localparam ADDR_DIGEST11 = 8'h2b;
+ localparam ADDR_DIGEST12 = 8'h2c;
+ localparam ADDR_DIGEST13 = 8'h2d;
+ localparam ADDR_DIGEST14 = 8'h2e;
+ localparam ADDR_DIGEST15 = 8'h2f;
+
+
+ localparam CTRL_INIT_BIT = 0;
+ localparam CTRL_UPDATE_BIT = 1;
+ localparam CTRL_FINAL_BIT = 2;
+ localparam CTRL_SHORT_BIT = 3;
+
+ localparam STATUS_READY_BIT = 0;
+ localparam STATUS_VALID_BIT = 1;
+
+ localparam CORE_NAME0 = 32'h73747265; // "stre"
+ localparam CORE_NAME1 = 32'h65626F67; // "ebog"
+ localparam CORE_VERSION = 32'h302E3130; // "0.10"
+
+
+ //----------------------------------------------------------------
+ // Control register
+ //----------------------------------------------------------------
+ reg [3:0] reg_ctrl; // core input
+ reg [9:0] reg_block_bits; // input block length
+
+
+ //----------------------------------------------------------------
+ // Init, Update and Final 1-Cycle Pulses
+ //----------------------------------------------------------------
+ reg [3:0] reg_ctrl_dly;
+ always @(posedge clk) reg_ctrl_dly <= reg_ctrl;
+
+ wire core_init_pulse = (reg_ctrl[CTRL_INIT_BIT] == 1'b1) && (reg_ctrl_dly[CTRL_INIT_BIT] == 1'b0);
+ wire core_update_pulse = (reg_ctrl[CTRL_UPDATE_BIT] == 1'b1) && (reg_ctrl_dly[CTRL_UPDATE_BIT] == 1'b0);
+ wire core_final_pulse = (reg_ctrl[CTRL_FINAL_BIT] == 1'b1) && (reg_ctrl_dly[CTRL_FINAL_BIT] == 1'b0);
+
+
+ //----------------------------------------------------------------
+ // Status register
+ //----------------------------------------------------------------
+ wire core_ready; // core output
+ wire digest_valid; // core output
+
+ wire [1:0] reg_status = {digest_valid, core_ready};
+
+
+ //----------------------------------------------------------------
+ // Block and Digest
+ //----------------------------------------------------------------
+ reg [511 : 0] core_block; // core input
+ wire [511 : 0] core_digest; // core output
+
+
+ //----------------------------------------------------------------
+ // core instantiation.
+ //----------------------------------------------------------------
+ streebog_hash_top streebog
+ (
+ .clock (clk),
+
+ .block (core_block),
+ .block_length (reg_block_bits),
+
+ .init (core_init_pulse),
+ .update (core_update_pulse),
+ .final (core_final_pulse),
+
+ .short_mode (reg_ctrl[CTRL_SHORT_BIT]),
+ .digest (core_digest),
+ .digest_valid (digest_valid),
+ .ready (core_ready)
+ );
+
+ //----------------------------------------------------------------
+ // Read Latch
+ //----------------------------------------------------------------
+ reg [31: 0] tmp_read_data;
+
+ assign read_data = tmp_read_data;
+
+
+ //----------------------------------------------------------------
+ // Read/Write Interface
+ //----------------------------------------------------------------
+ always @(posedge clk)
+ //
+ if (rst) begin
+ //
+ reg_ctrl <= 2'b00;
+ core_block <= {512{1'b0}};
+ tmp_read_data <= 32'h00000000;
+ //
+ end else if (cs) begin
+ //
+ if (we) begin
+ //
+ // Write Handler
+ //
+ case (address)
+ ADDR_CTRL: reg_ctrl <= write_data[3:0];
+ ADDR_BLOCK_BITS: reg_block_bits <= write_data[9:0];
+ ADDR_BLOCK0: core_block[511:480] <= write_data;
+ ADDR_BLOCK1: core_block[479:448] <= write_data;
+ ADDR_BLOCK2: core_block[447:416] <= write_data;
+ ADDR_BLOCK3: core_block[415:384] <= write_data;
+ ADDR_BLOCK4: core_block[383:352] <= write_data;
+ ADDR_BLOCK5: core_block[351:320] <= write_data;
+ ADDR_BLOCK6: core_block[319:288] <= write_data;
+ ADDR_BLOCK7: core_block[287:256] <= write_data;
+ ADDR_BLOCK8: core_block[255:224] <= write_data;
+ ADDR_BLOCK9: core_block[223:192] <= write_data;
+ ADDR_BLOCK10: core_block[191:160] <= write_data;
+ ADDR_BLOCK11: core_block[159:128] <= write_data;
+ ADDR_BLOCK12: core_block[127: 96] <= write_data;
+ ADDR_BLOCK13: core_block[ 95: 64] <= write_data;
+ ADDR_BLOCK14: core_block[ 63: 32] <= write_data;
+ ADDR_BLOCK15: core_block[ 31: 0] <= write_data;
+ endcase
+ //
+ end else begin
+ //
+ // Read Handler
+ //
+ case (address)
+ ADDR_NAME0: tmp_read_data <= CORE_NAME0;
+ ADDR_NAME1: tmp_read_data <= CORE_NAME1;
+ ADDR_VERSION: tmp_read_data <= CORE_VERSION;
+ ADDR_CTRL: tmp_read_data <= {{28{1'b0}}, reg_ctrl};
+ ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status};
+ ADDR_BLOCK_BITS: tmp_read_data <= {{22{1'b0}}, reg_block_bits};
+ //
+ ADDR_BLOCK0: tmp_read_data <= core_block[511:480];
+ ADDR_BLOCK1: tmp_read_data <= core_block[479:448];
+ ADDR_BLOCK2: tmp_read_data <= core_block[447:416];
+ ADDR_BLOCK3: tmp_read_data <= core_block[415:384];
+ ADDR_BLOCK4: tmp_read_data <= core_block[383:352];
+ ADDR_BLOCK5: tmp_read_data <= core_block[351:320];
+ ADDR_BLOCK6: tmp_read_data <= core_block[319:288];
+ ADDR_BLOCK7: tmp_read_data <= core_block[287:256];
+ ADDR_BLOCK8: tmp_read_data <= core_block[255:224];
+ ADDR_BLOCK9: tmp_read_data <= core_block[223:192];
+ ADDR_BLOCK10: tmp_read_data <= core_block[191:160];
+ ADDR_BLOCK11: tmp_read_data <= core_block[159:128];
+ ADDR_BLOCK12: tmp_read_data <= core_block[127: 96];
+ ADDR_BLOCK13: tmp_read_data <= core_block[ 95: 64];
+ ADDR_BLOCK14: tmp_read_data <= core_block[ 63: 32];
+ ADDR_BLOCK15: tmp_read_data <= core_block[ 31: 0];
+ //
+ ADDR_DIGEST0: tmp_read_data <= core_digest[511:480];
+ ADDR_DIGEST1: tmp_read_data <= core_digest[479:448];
+ ADDR_DIGEST2: tmp_read_data <= core_digest[447:416];
+ ADDR_DIGEST3: tmp_read_data <= core_digest[415:384];
+ ADDR_DIGEST4: tmp_read_data <= core_digest[383:352];
+ ADDR_DIGEST5: tmp_read_data <= core_digest[351:320];
+ ADDR_DIGEST6: tmp_read_data <= core_digest[319:288];
+ ADDR_DIGEST7: tmp_read_data <= core_digest[287:256];
+ ADDR_DIGEST8: tmp_read_data <= core_digest[255:224];
+ ADDR_DIGEST9: tmp_read_data <= core_digest[223:192];
+ ADDR_DIGEST10: tmp_read_data <= core_digest[191:160];
+ ADDR_DIGEST11: tmp_read_data <= core_digest[159:128];
+ ADDR_DIGEST12: tmp_read_data <= core_digest[127: 96];
+ ADDR_DIGEST13: tmp_read_data <= core_digest[ 95: 64];
+ ADDR_DIGEST14: tmp_read_data <= core_digest[ 63: 32];
+ ADDR_DIGEST15: tmp_read_data <= core_digest[ 31: 0];
+ //
+ default: tmp_read_data <= 32'h00000000;
+ //
+ endcase
+ //
+ end
+ //
+ end
+
+
+endmodule // streebog_wrapper
+
+
+//======================================================================
+// EOF streebog_wrapper.v
+//======================================================================
diff --git a/streebog_hash/ip/adder_s6.xco b/streebog_hash/ip/adder_s6.xco
new file mode 100644
index 0000000..23b7d94
--- /dev/null
+++ b/streebog_hash/ip/adder_s6.xco
@@ -0,0 +1,73 @@
+##############################################################
+#
+# Xilinx Core Generator version 14.7
+# Date: Tue Mar 24 19:41:47 2015
+#
+##############################################################
+#
+# This file contains the customisation parameters for a
+# Xilinx CORE Generator IP GUI. It is strongly recommended
+# that you do not manually alter this file as it may cause
+# unexpected and unsupported behavior.
+#
+##############################################################
+#
+# Generated from component: xilinx.com:ip:c_addsub:11.0
+#
+##############################################################
+#
+# BEGIN Project Options
+SET addpads = false
+SET asysymbol = true
+SET busformat = BusFormatAngleBracketNotRipped
+SET createndf = false
+SET designentry = Verilog
+SET device = xc6slx45
+SET devicefamily = spartan6
+SET flowvendor = Other
+SET formalverification = false
+SET foundationsym = false
+SET implementationfiletype = Ngc
+SET package = csg324
+SET removerpms = false
+SET simulationfiles = Behavioral
+SET speedgrade = -3
+SET verilogsim = true
+SET vhdlsim = false
+# END Project Options
+# BEGIN Select
+SELECT Adder_Subtracter xilinx.com:ip:c_addsub:11.0
+# END Select
+# BEGIN Parameters
+CSET a_type=Unsigned
+CSET a_width=32
+CSET add_mode=Add
+CSET ainit_value=0
+CSET b_constant=false
+CSET b_type=Unsigned
+CSET b_value=00000000000000000000000000000000
+CSET b_width=32
+CSET borrow_sense=Active_Low
+CSET bypass=false
+CSET bypass_ce_priority=CE_Overrides_Bypass
+CSET bypass_sense=Active_High
+CSET c_in=true
+CSET c_out=true
+CSET ce=true
+CSET component_name=adder_s6
+CSET implementation=DSP48
+CSET latency=1
+CSET latency_configuration=Manual
+CSET out_width=32
+CSET sclr=false
+CSET sinit=false
+CSET sinit_value=0
+CSET sset=false
+CSET sync_ce_priority=Sync_Overrides_CE
+CSET sync_ctrl_priority=Reset_Overrides_Set
+# END Parameters
+# BEGIN Extra information
+MISC pkg_timestamp=2013-07-22T10:35:41Z
+# END Extra information
+GENERATE
+# CRC: 13f690be
diff --git a/streebog_hash/streebog_core_adder_s6.v b/streebog_hash/streebog_core_adder_s6.v
new file mode 100644
index 0000000..3c254eb
--- /dev/null
+++ b/streebog_hash/streebog_core_adder_s6.v
@@ -0,0 +1,152 @@
+`timescale 1ns / 1ps
+
+module streebog_core_adder_s6
+ (
+ clk,
+ ena, rdy,
+ x, y, sum
+ );
+
+
+ //
+ // Ports
+ //
+ input wire clk; // core clock
+ input wire ena; // start addition flag
+ output wire rdy; // addition done flag (sum is valid)
+ input wire [511:0] x; // item x
+ input wire [511:0] y; // item y
+ output wire [511:0] sum; // x+y
+
+
+ /*
+ * ISE cannot synthesize adders using fabric that are more than 256 bits wide. Items X and Y are 512-bit wide, so
+ * Spartan-6 DSP blocks are used to overcome this issue. Every DSP block is configured to add 32 bits at a time,
+ * so total of 512/32=16 DSP blocks are required to implement addition. Every DSP block is configured to expose
+ * carry input and output ports. Overflow at 512-bit boundary should be ignored according to the specification,
+ * that's why only 15 intermediate carry lines are required.
+ *
+ * +-------------------+-------------------+- -+-------------------+
+ * [X] | 511 : 480 | 479 : 448 | ... | 31 : 0 |
+ * +------*------------+------*------------+- -+------*------------+
+ * | | |
+ * +------|------------+------|------------+- -+------|------------+
+ * [Y] | | 511 : 480 | | 479 : 448 | ... | | 31 : 0 |
+ * +------|-----*------+------|------------+- -+------|------------+
+ * | | | | | |
+ * | | | | | |
+ * v v v v v v
+ * +---+-+---+ +---+-+---+ +---+-+---+
+ * | A | | B | | A | | B | | A | | B |
+ * +---------+ +---+-+---+ +---+-+---+
+ * | DSP #15 | | DSP #15 | | DSP #0 |
+ * |---------| |---------| |---------|
+ * | Carry | | Carry | | Carry |
+ * X --<-Out In-<--C[14]--<-Out In-<--C[13]- ... -C[ 0]--<-Out In-<-- 0
+ * +---------+ +---------+ +---------+
+ * | S | | S | | S |
+ * +---------+ +---------+ +---------+
+ * | | |
+ * v v v
+ * +---------*---------+---------*---------+- -+---------*---------+
+ * [Z] | 511 : 480 | 479 : 448 | ... | 31 : 0 |
+ * +-------------------+-------------------+- -+-------------------+
+ *
+ */
+
+
+ //
+ // Internals
+ //
+ wire [511:0] z; // concatenated outputs of adders
+ wire [14:0] z_carry; // carry lines
+ reg [511:0] sum_reg; // output register
+
+ assign sum = sum_reg;
+
+
+ //
+ // Shift Register
+ //
+
+ /*
+ * This shift register is re-loaded with "walking one" bit pattern whenever enable
+ * input is active and adder core is ready. The most significant bit [17] acts as a
+ * ready flag. Lower 16 bits [15:0] control DSP blocks (Clock Enable). Intermediate
+ * bit [16] is required to compensate for 1-cycle latency of DSP blocks.
+ *
+ */
+
+ reg [17: 0] ce_shreg = {1'b1, 1'b0, 16'h0000};
+
+ assign rdy = ce_shreg[17];
+
+
+ //
+ // Shift Register Logic
+ //
+ always @(posedge clk)
+ //
+ if (! rdy) ce_shreg <= {ce_shreg[16:0], 1'b0};
+ else if (ena) ce_shreg <= {1'b0, 1'b0, 16'h0001};
+
+
+ //
+ // Output Register Logic
+ //
+ always @(posedge clk)
+ //
+ if (ce_shreg[16] == 1'b1) sum_reg <= z;
+
+
+ //
+ // LSB Adder
+ //
+ adder_s6 adder_s6_lsb
+ (
+ .clk (clk), //
+ .ce (ce_shreg[0]), // clock enable [0]
+ .a (x[ 31: 0]), //
+ .b (y[ 31: 0]), //
+ .s (z[ 31: 0]), //
+ .c_in (1'b0), // carry input tied to 0
+ .c_out (z_carry[0]) // carry[0] to next adder
+ );
+
+
+ //
+ // MSB Adder
+ //
+ adder_s6 adder_s6_msb
+ (
+ .clk (clk), //
+ .ce (ce_shreg[15]), // clock enable [15]
+ .a (x[511:480]), //
+ .b (y[511:480]), //
+ .s (z[511:480]), //
+ .c_in (z_carry[14]), // carry[14] from previous adder
+ .c_out () // carry output not connected
+ );
+
+
+ //
+ // Intermediate Adders
+ //
+ genvar i;
+ generate for (i=1; i<=14; i=i+1)
+ begin: gen_adder_s6
+ adder_s6 adder_s6_int
+ (
+ .clk (clk), //
+ .ce (ce_shreg[i]), // clock enable [1..14]
+ .a (x[32*i+31:32*i]), //
+ .b (y[32*i+31:32*i]), //
+ .s (z[32*i+31:32*i]), //
+ .c_in (z_carry[i-1]), // carry[0..13] from previous adder
+ .c_out (z_carry[i]) // carry[1..14] to next adder
+ );
+ end
+ endgenerate
+
+
+endmodule
diff --git a/streebog_hash/streebog_core_lps.v b/streebog_hash/streebog_core_lps.v
new file mode 100644
index 0000000..a668f16
--- /dev/null
+++ b/streebog_hash/streebog_core_lps.v
@@ -0,0 +1,405 @@
+`timescale 1ns / 1ps
+
+module streebog_core_lps
+ (
+ clk,
+ ena, rdy, last,
+ din, dout
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter PS_PIPELINE_STAGES = 8; // 2, 4, 8
+ parameter L_PIPELINE_STAGES = 8; // 2, 4, 8, 16, 32, 64
+
+
+ //
+ // Ports
+ //
+ input wire clk; // core clock
+ input wire ena; // start transformation flag
+ output wire rdy; // transformation done flag (dout is valid)
+ output wire last; // transformation about to complete (rdy flag will be asserted during the next cycle)
+ input wire [511:0] din; // input data to transform
+ output wire [511:0] dout; // output data (result of transformation)
+
+
+ /*
+ * This LPS core has parametrized internal pipeline. P and S transformations are combined into one PS transformation and
+ * have common pipeline. L transformation has its own separate pipeline. The total latency of this core is thus
+ * PS_PIPELINE_STAGES*L_PIPELINE_STAGES. The fastest version completes the tranformation in 2*2=4 cycles, the slowest
+ * version requires 8*64=512 cycles. S transformation substitutes bytes according to a lookup table. P transformation does
+ * permutation of input bytes. L transformation multiplies input data by a special predefined matrix. If you don't understand
+ * how matrices are multiplied, you should not try to understand how the following code works. This may damage your brain.
+ * You've been warned. Seriously.
+ *
+ */
+
+
+ //
+ // Constants
+ //
+
+ /*
+ * PS transformation operates on 64-bit words. Input data contains 512/64=8 such words.
+ * Depending on PS pipeline stage count we can transform 1, 2 or 4 words at a time.
+ *
+ * L transformation operates on 64-bit words. Depending on L pipeline stage count we
+ * can transform 1, 2, 4, 8, 16 or 32 bits of a word at a time.
+ *
+ */
+
+ localparam PS_WORDS_AT_ONCE = 8 / PS_PIPELINE_STAGES;
+ localparam L_BITS_AT_ONCE = 64 / L_PIPELINE_STAGES;
+
+ /*
+ * These functions return number of bytes needed to store pipeline stage counters. They will
+ * also prevent users from specifying illegal pipeline widths . This module will not synthesize
+ * with invalid pipeline stage count, because counter width will not be explicitely defined.
+ *
+ */
+
+ function integer PS_NUM_COUNT_BITS;
+ input integer x;
+ begin
+ case (x)
+ 2: PS_NUM_COUNT_BITS = 1;
+ 4: PS_NUM_COUNT_BITS = 2;
+ 8: PS_NUM_COUNT_BITS = 3;
+ endcase
+ end
+ endfunction
+
+ function integer L_NUM_COUNT_BITS;
+ input integer y;
+ begin
+ case (y)
+ 2: L_NUM_COUNT_BITS = 1;
+ 4: L_NUM_COUNT_BITS = 2;
+ 8: L_NUM_COUNT_BITS = 3;
+ 16: L_NUM_COUNT_BITS = 4;
+ 32: L_NUM_COUNT_BITS = 5;
+ 64: L_NUM_COUNT_BITS = 6;
+ endcase
+ end
+ endfunction
+
+
+ //
+ // Counter Widths
+ //
+ localparam L_CNT_BITS = L_NUM_COUNT_BITS(L_PIPELINE_STAGES); // width of L counter
+ localparam PS_CNT_BITS = PS_NUM_COUNT_BITS(PS_PIPELINE_STAGES); // width of PS counter
+
+
+ //
+ // Input Multiplexor
+ //
+ wire [63: 0] din_mux[0:7]; // eight 64-bit words
+
+ /*
+ * This multiplexor does the P transformation. P transformation is effectively a matrix
+ * transposition. Input 512-bit word is treated as a 8x8 byte matrix. Multiplexor outputs
+ * a set of 8 64-bit words. These words are columns of the original matrix (transposition
+ * turns rows into colums).
+ *
+ */
+
+ genvar i, j;
+ generate for (i=0; i<8; i=i+1)
+ begin: gen_din_mux_i
+ for (j=0; j<8; j=j+1) begin: gen_din_mux_j
+ assign din_mux[i][8*j + 7 : 8*j] = din[64*j + 8*i + 7 : 64*j + 8*i];
+ end
+ end
+ endgenerate
+
+
+ //
+ // Output Multiplexor
+ //
+ reg [63: 0] dout_mux[0:7]; // eight 64-bit words
+
+ /*
+ * Output 64-bit subwords are concatenated to form output 512-bit word.
+ *
+ */
+
+ genvar k;
+ generate for (k=0; k<8; k=k+1)
+ begin: gen_dout_mux
+ assign dout[64*k+63:64*k] = dout_mux[k];
+ end
+ endgenerate
+
+
+ //
+ // PS and L Counters
+ //
+
+ /*
+ * These counters control internal data flow of this core. For example, if PS has 2 stages and
+ * L has 4 stages, then the count will look like this:
+ * ____
+ * ENA \\\________________________________
+ * _____ _
+ * RDY ^ \_______________________________/
+ * | | | | | | | | | |
+ * +----+---+---+---+---+---+---+---+---+---+-
+ * | PS | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 |
+ * +----+---+---+---+---+---+---+---+---+---+-
+ * | L | 0 | 1 | 2 | 3 | 0 | 1 | 2 | 3 | 0 |
+ * +----+---+---+---+---+---+---+---+---+---+-
+ * ^ ^ |
+ * | | +--> both counters will be zero during the last cycle
+ * | |
+ * +---------------+------------------> preloading of new word(s) into S lookup table(s)
+ *
+ */
+
+ reg [ L_CNT_BITS-1:0] l_count = { L_CNT_BITS{1'b0}};
+ reg [PS_CNT_BITS-1:0] ps_count = {PS_CNT_BITS{1'b0}};
+
+
+ //
+ // Handy Flags
+ //
+
+ /*
+ * These flags are used instead of lengthy (z_count == {Z_CNT_BITS{1'bZ}}) comparisons.
+ *
+ */
+
+ wire l_count_done = ( l_count == { L_CNT_BITS{1'b1}}) ? 1 : 0;
+ wire ps_count_done = (ps_count == {PS_CNT_BITS{1'b1}}) ? 1 : 0;
+
+ wire l_count_zero = ( l_count == { L_CNT_BITS{1'b0}}) ? 1 : 0;
+ wire ps_count_zero = (ps_count == {PS_CNT_BITS{1'b0}}) ? 1 : 0;
+
+
+ //
+ // Preload Flags
+ //
+
+ /*
+ * These flags are used as clock enables for S lookup table.
+ *
+ */
+
+ wire ps_preload_first = (rdy && ena);
+ wire ps_preload_next = (!rdy && !ps_count_zero && l_count_zero);
+
+
+ //
+ // Last Flag
+ //
+
+ /*
+ * This flag indicates that core operation is about to complete.
+ *
+ */
+ assign last = !rdy && ps_count_zero && l_count_zero;
+
+
+ //
+ // Counter Logic
+ //
+ always @(posedge clk) begin
+ //
+ if (!rdy && l_count_done) ps_count <= ps_count + 1'b1; // next word(s)
+ //
+ if (rdy && ena) l_count <= l_count + 1'b1; // start of transformation
+ //
+ if (!rdy && !(ps_count_zero && l_count_zero)) l_count <= l_count + 1'b1; // next part of word(s)
+ //
+ end
+
+
+ //
+ // Ready Output Register
+ //
+ reg rdy_reg = 1'b1;
+ assign rdy = rdy_reg;
+
+
+ //
+ // Ready Set and Clear Logic
+ //
+ always @(posedge clk) begin
+ //
+ if (rdy && ena) rdy_reg <= 0; // start of transformation
+ //
+ if (!rdy && l_count_zero && ps_count_zero) rdy_reg <= 1; // end of transformation
+ //
+ end
+
+
+ //
+ // S Table Indices
+ //
+
+ /*
+ * To transform several words at once a set of indices is required.
+ *
+ */
+
+ wire [ 2: 0] s_in_offset [0:PS_WORDS_AT_ONCE-1]; // indices of words being transformed
+ wire [63: 0] s_out [0:PS_WORDS_AT_ONCE-1]; // output words of S transformation
+
+ assign s_in_offset[0] = ps_count * PS_WORDS_AT_ONCE; // the first index is defined by PS counter,
+ // following indices are linearly increasing
+
+ genvar sw, sb; // word and byte counter
+ generate for (sw=1; sw<PS_WORDS_AT_ONCE; sw=sw+1)
+ begin: gen_s_in_offset
+ assign s_in_offset[sw] = s_in_offset[sw-1] + 1'b1;
+ end
+ endgenerate
+
+
+ //
+ // S Lookup Table
+ //
+ generate for (sw=0; sw<PS_WORDS_AT_ONCE; sw=sw+1)
+ begin: gen_s_out_word
+ for (sb=0; sb<8; sb=sb+1) begin: gen_s_out_byte
+ //
+ (* ROM_STYLE="BLOCK" *)
+ //
+ streebog_rom_s_table s_table
+ (
+ .clk (clk),
+ .ena (ps_preload_first | ps_preload_next),
+ .din (din_mux[s_in_offset[sw]][8*sb + 7 : 8*sb]),
+ .dout (s_out[sw][8*sb + 7 : 8*sb])
+ );
+ //
+ end
+ end
+ endgenerate
+
+
+
+ //
+ // A Matrix Indices
+ //
+
+ /*
+ * To transform several bits at once a set of indices is required.
+ *
+ */
+
+ wire [ 5: 0] l_in_offset [0:L_BITS_AT_ONCE-1]; // indices of bits being transformed
+ wire [63: 0] l_out [0:L_BITS_AT_ONCE-1]; // output bits of L transformation
+
+ assign l_in_offset[0] = l_count * L_BITS_AT_ONCE; // the first index is defined by L counter,
+ // following indices are linearly increasing
+
+ genvar l;
+ generate for (l=1; l<L_BITS_AT_ONCE; l=l+1)
+ begin: gen_l_in_offset
+ assign l_in_offset[l] = l_in_offset[l-1] + 1'b1;
+ end
+ endgenerate
+
+
+ //
+ // A Matrix
+ //
+ generate for (l=0; l<L_BITS_AT_ONCE; l=l+1)
+ begin: gen_l_out
+ //
+ (* ROM_STYLE="BLOCK" *)
+ //
+ streebog_rom_a_matrix a_matrix
+ (
+ .clk (clk),
+ .din (l_in_offset[l]),
+ .dout (l_out[l])
+ );
+ //
+ end
+ endgenerate
+
+
+ //
+ // Multiplication Logic
+ //
+
+ /*
+ * Original specification describes multiplication method that effectively adds
+ * matrix rows based on source vector items. Instead of that multiplication is
+ * done column-by-column.
+ *
+ */
+
+ wire [L_BITS_AT_ONCE-1:0] l_out_part[0:PS_WORDS_AT_ONCE-1];
+
+ genvar lw, lb;
+ generate for (lw=0; lw<PS_WORDS_AT_ONCE; lw=lw+1)
+ begin: gen_l_out_part
+ for (lb=0; lb<L_BITS_AT_ONCE; lb=lb+1) begin: gen_l_out_bit
+ //
+ assign l_out_part[lw][lb] = ^(l_out[lb] & s_out[lw]);
+ //
+ end
+ end
+ endgenerate
+
+
+ /*
+ * PS and L transformations have 1-cycle latency, so delayed versions
+ * of offsets are needed to update output registers accordingly.
+ *
+ */
+
+ reg [PS_CNT_BITS-1:0] ps_count_dly = 0; // delayed PS counter
+ reg [ L_CNT_BITS-1:0] l_count_dly = 0; // delayed L counter
+
+ always @(posedge clk) ps_count_dly <= ps_count;
+ always @(posedge clk) l_count_dly <= l_count;
+
+
+ //
+ // Output Offset Tables
+ //
+ wire [ 2: 0] dout_offset_word [0:PS_WORDS_AT_ONCE-1];
+ wire [ 5: 0] dout_offset_bit [0:L_BITS_AT_ONCE -1];
+
+ assign dout_offset_word[0] = ps_count_dly * PS_WORDS_AT_ONCE;
+ assign dout_offset_bit[0] = l_count_dly * L_BITS_AT_ONCE;
+
+ genvar z;
+
+ generate for (z=1; z<PS_WORDS_AT_ONCE; z=z+1)
+ begin: gen_dout_offset_word
+ assign dout_offset_word[z] = dout_offset_word[z-1] + 1'b1;
+ end
+ endgenerate
+
+ generate for (z=1; z<L_BITS_AT_ONCE; z=z+1)
+ begin: gen_dout_offset_bit
+ assign dout_offset_bit[z] = dout_offset_bit[z-1] + 1'b1;
+ end
+ endgenerate
+
+
+
+ //
+ // Output Logic
+ //
+ integer lps_w, lps_b;
+
+ always @(posedge clk)
+ //
+ if (! rdy)
+ //
+ for (lps_w=0; lps_w<PS_WORDS_AT_ONCE; lps_w=lps_w+1)
+ for (lps_b=0; lps_b<L_BITS_AT_ONCE; lps_b=lps_b+1)
+ dout_mux[dout_offset_word[lps_w]][dout_offset_bit[lps_b]] <= l_out_part[lps_w][lps_b];
+ //dout_mux[dout_offset_word[lps_w]][L_BITS_AT_ONCE*l_count_dly+lps_b] <= l_out_part[lps_w][lps_b];
+
+
+endmodule
diff --git a/streebog_hash/streebog_hash_top.v b/streebog_hash/streebog_hash_top.v
new file mode 100644
index 0000000..1cd1bbe
--- /dev/null
+++ b/streebog_hash/streebog_hash_top.v
@@ -0,0 +1,421 @@
+`timescale 1ns / 1ps
+
+module streebog_hash_top
+ (
+ clock,
+ block, block_length,
+ init, update, final,
+ short_mode,
+ digest, digest_valid,
+ ready
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter PS_PIPELINE_STAGES = 2; // 2, 4, 8
+ parameter L_PIPELINE_STAGES = 2; // 2, 4, 8, 16, 32, 64
+
+
+ //
+ // Ports
+ //
+ input wire clock; // core clock
+ input wire [511:0] block; // input message block
+ input wire [ 9:0] block_length; // length of input block in bits (0..512)
+ input wire init; // flag to start calculation of new message hash
+ input wire update; // flag to compress next message block
+ input wire final; // flag to run final transformation after last message block
+ input wire short_mode; // 0 = produce 512-bit hash, 1 = produce 256-bit hash
+ output wire [511:0] digest; // message digest output
+ output wire digest_valid; // hash is ready (digest output value is valid)
+ output wire ready; // core is ready (init/update/final can be asserted)
+
+
+ //
+ // Initialization Vectors and Round Count
+ //
+ localparam STREEBOG_IV_512 = {512{1'b0}};
+ localparam STREEBOG_IV_256 = {64{8'h01}};
+ localparam STREEBOG_NUM_ROUNDS = 4'd12;
+
+
+ //
+ // State Registers
+ //
+ reg [511:0] h; // |
+ reg [511:0] Sigma; // | Internal State Registers
+ reg [511:0] N; // |
+
+ reg [511:0] digest_reg;
+ reg digest_valid_reg = 1'b0;
+ reg [ 3:0] round_count = 4'd0;
+
+ assign digest = digest_reg;
+ assign digest_valid = digest_valid_reg;
+
+
+ //
+ // Handy Internal Flags
+ //
+ wire round_count_active = (round_count > 4'd0) ? 1 : 0; // transformation has been started
+ wire round_count_not_done = (round_count < STREEBOG_NUM_ROUNDS) ? 1 : 0; // transformation has not been finished
+
+
+ /*
+ * Compression procedure includes 13 rounds. To perform every round we need to know
+ * round key. This implementation uses two parallel LPS cores. The first LPS core (key core)
+ * is used to produce round keys, the second LPS core (data core) is used to encrypt message block.
+ *
+ * Data core is not activated during the first round, because round key is not yet known during
+ * the first round. During the second round, key core computes next (second) round key, while data core encrypts
+ * mesage block using first round key and so on. The last compression round doesn't include encryption step.
+ * Instead of it simple XOR operation is used.
+ *
+ * Compression procedure requires 13 key calculations and 12 data encryptions. LPS cores operate according to
+ * the following schedule:
+ *
+ *
+ * +----------+----------+----------+- -+----------+
+ * Round Count | 0 | 1 | 2 | ... | 12 |
+ * +----------+----------+----------+- -+----------+
+ * Key Core | KEY #0 | KEY #1 | KEY #2 | ... | KEY #12 |
+ * +----------+----------+----------+- -+----------+
+ * Data Core | Idle | DATA #0 | DATA #1 | ... | DATA #11 |
+ * +----------+----------+----------+- -+----------+
+ *
+ */
+
+
+ //
+ // LPS Core for Round Key Calculation
+ //
+ reg [511:0] lps_key_in; //
+ wire [511:0] lps_key_out; //
+ wire lps_key_ena; //
+ wire lps_key_last; //
+ wire lps_key_rdy; //
+
+ wire lps_key_ena_update = (fsm_state == FSM_STATE_UPDATE_LPS_TRIG) ? 1 : 0;
+ wire lps_key_ena_final_n = (fsm_state == FSM_STATE_FINAL_N_LPS_TRIG) ? 1 : 0;
+ wire lps_key_ena_final_sigma = (fsm_state == FSM_STATE_FINAL_SIGMA_LPS_TRIG) ? 1 : 0;
+
+ assign lps_key_ena = lps_key_ena_update || lps_key_ena_final_n || lps_key_ena_final_sigma;
+
+ streebog_core_lps #
+ (
+ .PS_PIPELINE_STAGES (PS_PIPELINE_STAGES),
+ .L_PIPELINE_STAGES (L_PIPELINE_STAGES)
+ )
+ lps_key
+ (
+ .clk (clock),
+ .ena (lps_key_ena),
+ .rdy (lps_key_rdy),
+ .last (lps_key_last),
+ .din (lps_key_in),
+ .dout (lps_key_out)
+ );
+
+
+ //
+ // LPS Core for Block Compression
+ //
+ reg [511:0] lps_data_in;
+ wire [511:0] lps_data_out;
+ wire lps_data_ena;
+ wire lps_data_last;
+ wire lps_data_rdy;
+
+ assign lps_data_ena = lps_key_ena & round_count_active;
+
+ streebog_core_lps #
+ (
+ .PS_PIPELINE_STAGES (PS_PIPELINE_STAGES),
+ .L_PIPELINE_STAGES (L_PIPELINE_STAGES)
+ )
+ lps_data
+ (
+ .clk (clock),
+ .ena (lps_data_ena),
+ .rdy (lps_data_rdy),
+ .last (lps_data_last),
+ .din (lps_data_in),
+ .dout (lps_data_out)
+ );
+
+
+ /*
+ * According to specification, internal state must be updated after compression, this
+ * involves addition of two pairs of 512-bit numbers. This operation is done in two
+ * parallel summation cores. The first core updates N register, the second core updates
+ * Sigma register. Summation is triggered before LPS cores are activated. Actual update
+ * of N and Sigma occurs after completion of compression procedure.
+ *
+ */
+
+
+ //
+ // Summation Trigger Flag
+ //
+ wire adder_trig = (fsm_state == FSM_STATE_UPDATE_ADDER_TRIG) ? 1 : 0;
+
+
+ //
+ // Block Length Adder (N = N + |M|)
+ //
+ wire [511:0] adder_n_sum;
+ wire adder_n_rdy;
+
+ streebog_core_adder_s6 adder_n
+ (
+ .clk (clock),
+ .ena (adder_trig),
+ .rdy (adder_n_rdy),
+ .x (N),
+ .y ({{502{1'b0}}, block_length}),
+ .sum (adder_n_sum)
+ );
+
+
+ //
+ // Message Adder (Sigma = Sigma + M)
+ //
+ wire [511:0] adder_sigma_sum;
+ wire adder_sigma_rdy;
+
+ streebog_core_adder_s6 adder_sigma
+ (
+ .clk (clock),
+ .ena (adder_trig),
+ .rdy (adder_sigma_rdy),
+ .x (Sigma),
+ .y (block),
+ .sum (adder_sigma_sum)
+ );
+
+
+ //
+ // Handy Flags
+ //
+ wire lps_last_both = lps_key_last & lps_data_last;
+ wire lps_rdy_both = lps_key_rdy & lps_data_rdy;
+ wire adder_rdy_both = adder_n_rdy & adder_sigma_rdy;
+
+
+ /*
+ * Operation of this core is controlled by FSM logic. Ready flag is embedded in state encoding. FSM goes out of
+ * idle state when init/update/final flags become active. Init flag has priority over update and final flags.
+ * Update flag has priority over final flag.
+ *
+ */
+
+
+ //
+ // FSM States
+ //
+ localparam FSM_STATE_IDLE = 4'b1_00_0; // core is idle
+ //
+ localparam FSM_STATE_UPDATE_LPS_TRIG = 4'b0_00_0; // core is triggering gN(h,m) transformation
+ localparam FSM_STATE_UPDATE_LPS_WAIT = 4'b0_00_1; // core is waiting for transformation to complete
+ //
+ localparam FSM_STATE_UPDATE_ADDER_TRIG = 4'b0_11_0; // core is triggering summation
+ localparam FSM_STATE_UPDATE_ADDER_WAIT = 4'b0_11_1; // core is waiting for summation to complete
+ //
+ localparam FSM_STATE_FINAL_N_LPS_TRIG = 4'b0_01_0; // core is triggering g0(h,N) transformation
+ localparam FSM_STATE_FINAL_N_LPS_WAIT = 4'b0_01_1; // core is waiting for transformation to complete
+ //
+ localparam FSM_STATE_FINAL_SIGMA_LPS_TRIG = 4'b0_10_0; // core is triggering g0(h,Sigma) transformation
+ localparam FSM_STATE_FINAL_SIGMA_LPS_WAIT = 4'b0_10_1; // core is waiting for transformation of complete
+
+
+ //
+ // FSM State Register and Core Ready Flag
+ //
+ reg [ 3: 0] fsm_state = FSM_STATE_IDLE;
+ assign ready = fsm_state[3];
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @(posedge clock) begin
+ //
+ case (fsm_state)
+ //
+ // init
+ //
+ FSM_STATE_IDLE: begin
+ if (!init && update) fsm_state <= FSM_STATE_UPDATE_ADDER_TRIG;
+ if (!init && !update && final) fsm_state <= FSM_STATE_FINAL_N_LPS_TRIG;
+ end
+ //
+ // update -> gN(h,m)
+ //
+ FSM_STATE_UPDATE_ADDER_TRIG: fsm_state <= FSM_STATE_UPDATE_LPS_TRIG;
+ FSM_STATE_UPDATE_LPS_TRIG: fsm_state <= FSM_STATE_UPDATE_LPS_WAIT;
+ FSM_STATE_UPDATE_LPS_WAIT:
+ if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_UPDATE_LPS_TRIG : FSM_STATE_UPDATE_ADDER_WAIT;
+ FSM_STATE_UPDATE_ADDER_WAIT:
+ if (adder_rdy_both) fsm_state <= FSM_STATE_IDLE;
+ //
+ // final -> g0(h,N)
+ //
+ FSM_STATE_FINAL_N_LPS_TRIG: fsm_state <= FSM_STATE_FINAL_N_LPS_WAIT;
+ FSM_STATE_FINAL_N_LPS_WAIT:
+ if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_FINAL_N_LPS_TRIG : FSM_STATE_FINAL_SIGMA_LPS_TRIG;
+ //
+ // final -> g0(h,Sigma)
+ //
+ FSM_STATE_FINAL_SIGMA_LPS_TRIG: fsm_state <= FSM_STATE_FINAL_SIGMA_LPS_WAIT;
+ FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+ if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_FINAL_SIGMA_LPS_TRIG : FSM_STATE_IDLE;
+ //
+ // default
+ //
+ default: fsm_state <= FSM_STATE_IDLE;
+ //
+ endcase
+ //
+ end
+
+
+ /*
+ * Key calculation involves 12 round constants. These constants are stored in an array. The first key
+ * (calculated during the first round) does not require a constant. New constant is preloaded during the last
+ * cycle of LPS transformation. LPS cores have dedicated output flag indicating that operation is about to complete.
+ * This flag is used as Clock Enable. Constants are preloaded during rounds 1-12 and are used during rounds 2-13.
+ *
+ */
+
+ //
+ // Round Constants
+ //
+ wire [511:0] c_array_out;
+
+ wire c_array_ena_update = (fsm_state == FSM_STATE_UPDATE_LPS_WAIT) ? 1 : 0;
+ wire c_array_ena_final_n = (fsm_state == FSM_STATE_FINAL_N_LPS_WAIT) ? 1 : 0;
+ wire c_array_ena_final_sigma = (fsm_state == FSM_STATE_FINAL_SIGMA_LPS_WAIT) ? 1 : 0;
+
+ wire c_array_ena = lps_key_last && round_count_not_done && (c_array_ena_update || c_array_ena_final_n || c_array_ena_final_sigma);
+
+ //
+ (* ROM_STYLE="BLOCK" *)
+ //
+ streebog_rom_c_array c_array
+ (
+ .clk (clock),
+ .ena (c_array_ena),
+ .din (round_count),
+ .dout (c_array_out)
+ );
+
+ /*
+ * The following pieces of code take care of LPS and summation inputs and outputs, they also take care
+ * of output digest register and corresponding valid flag.
+ *
+ */
+
+
+ //
+ // Internal State Control Logic
+ //
+ always @(posedge clock)
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE: if (init) begin
+ h <= (short_mode == 1'b1) ? STREEBOG_IV_256 : STREEBOG_IV_512;
+ N <= {512{1'b0}};
+ Sigma <= {512{1'b0}};
+ end
+
+ FSM_STATE_UPDATE_ADDER_WAIT: if (adder_rdy_both) begin
+ N <= adder_n_sum;
+ Sigma <= adder_sigma_sum;
+ end
+
+ FSM_STATE_UPDATE_LPS_WAIT:
+ if (lps_key_rdy && !round_count_not_done)
+ h <= lps_key_out ^ lps_data_out ^ h ^ block;
+
+ FSM_STATE_FINAL_N_LPS_WAIT:
+ if (lps_key_rdy && !round_count_not_done)
+ h <= lps_key_out ^ lps_data_out ^ h ^ N;
+
+ endcase
+
+
+ //
+ // Output Register Control Logic
+ //
+ always @(posedge clock)
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE: if (init) begin
+ digest_reg <= {512{1'bX}};
+ digest_valid_reg <= 1'b0;
+ end
+
+ FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+ if (lps_key_rdy && !round_count_not_done) begin
+ digest_reg <= lps_key_out ^ lps_data_out ^ h ^ Sigma;
+ digest_valid_reg <= 1'b1;
+ end
+
+ endcase
+
+
+ //
+ // Round Count Logic
+ //
+ always @(posedge clock)
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE:
+ if (update || final) round_count <= 4'd0;
+
+ FSM_STATE_UPDATE_LPS_WAIT,
+ FSM_STATE_FINAL_N_LPS_WAIT,
+ FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+ if (lps_key_rdy) round_count <= round_count_not_done ? round_count + 1'b1 : 4'd0;
+
+ endcase
+
+
+ //
+ // Key and Data LPS Cores Logic
+ //
+ always @(posedge clock)
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE: if (!init) begin
+ if (update) lps_key_in <= h ^ N;
+ if (!update && final) lps_key_in <= h;
+ end
+
+ FSM_STATE_UPDATE_LPS_WAIT:
+ if (lps_key_rdy && round_count_not_done) begin
+ lps_key_in <= lps_key_out ^ c_array_out;
+ lps_data_in <= lps_key_out ^ (round_count_active ? lps_data_out : block);
+ end
+
+ FSM_STATE_FINAL_N_LPS_WAIT: if (lps_key_rdy) begin
+ lps_key_in <= lps_key_out ^ (round_count_not_done ? c_array_out : lps_data_out ^ h ^ N);
+ lps_data_in <= round_count_not_done ? lps_key_out ^ (round_count_active ? lps_data_out : N) : {512{1'bX}};
+ end
+
+ FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+ if (lps_key_rdy && round_count_not_done) begin
+ lps_key_in <= lps_key_out ^ c_array_out;
+ lps_data_in <= round_count_active ? lps_key_out ^ lps_data_out : lps_key_out ^ Sigma;
+ end
+
+ endcase
+
+
+endmodule
diff --git a/streebog_hash/streebog_rom_a_matrix.v b/streebog_hash/streebog_rom_a_matrix.v
new file mode 100644
index 0000000..ba3607b
--- /dev/null
+++ b/streebog_hash/streebog_rom_a_matrix.v
@@ -0,0 +1,152 @@
+`timescale 1ns / 1ps
+
+module streebog_rom_a_matrix
+ (
+ clk,
+ din, dout
+ );
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire [ 5: 0] din;
+ output wire [63: 0] dout;
+
+
+ //
+ // Output Register
+ //
+ reg [63: 0] dout_reg;
+ assign dout = dout_reg;
+
+
+ //
+ // A Transformation Matrix
+ //
+
+ /*
+ * Original matrix from the standard was transformed to allow efficient implementation of
+ * hardware multiplication. The following matrix is effectively the transposed version
+ * of the original matrix A with reversed row order.
+ *
+ * Original 64x64 bit matrix from the standard has the following form:
+ *
+ * a[i,j] is 1-bit matrix element
+ *
+ * A_row(i) is 64-bit row of matrix
+ * A_col(j) is 64-bit column of matrix
+ *
+ *
+ * A_col(0) A_col(1) A_col(62) A_col(63)
+ * | | | |
+ * | | | |
+ * +----------------------------------------------+
+ * | a[ 0,63] a[ 0,62] ... a[ 0, 1] a[ 0, 0] | --A_row(0)
+ * | a[ 1,63] a[ 1,62] ... a[ 1, 1] a[ 1, 0] | --A_row(1)
+ * | ... |
+ * | a[62,63] a[62,62] ... a[62, 1] a[62, 0] | --A_row(62)
+ * | a[63,63] a[63,62] ... a[63, 1] a[63, 0] | --A_row(63)
+ * +----------------------------------------------+
+ *
+ *
+ * A_row(0)...A_row(63) are given in the original specification. Instead of row vectors we need a set of
+ * column vectors A_col(0)...A_col(63). A_col() can be obtained by transposing A_row().
+ *
+ *
+ * A_row(0) A_row(1) A_row(62) A_row(63)
+ * | | | |
+ * | | | |
+ * +---------------------------------------------+
+ * | a[ 0,63] a[ 1,63] ... a[62,63] a[63,63] | --A_col(0)
+ * | a[ 0,62] a[ 1,62] ... a[62,62] a[63,62] | --A_col(1)
+ * | ... |
+ * | a[ 0, 1] a[ 1, 1] ... a[62, 1] a[63, 1] | --A_col(62)
+ * | a[ 0, 0] a[ 1, 0] ... a[62, 0] a[63, 0] | --A_col(63)
+ * +---------------------------------------------+
+ *
+ *
+ * The only problem with A_col() is that original 64-bit A_row() values in the standard are written from MSB to LSB. That implies that
+ * original matrix columns are numbered from 63 to 0, while matrix rows are numbered from 0 to 63. Because of that we need to reverse
+ * row order after transposition. Original matrix had element a[0,0] in A_row(0), but after transposition element a[0,0] turns out
+ * to be in A_col(63), not in A_col(0). Because of that addresses inside of case() below are reversed. This effectively reverses
+ * the order in which A_col() follow.
+ *
+ */
+
+ always @(posedge clk) begin
+ //
+ case (din)
+ //
+ 6'h3F: dout_reg <= 64'hB18285C0BA4F9506;
+ 6'h3E: dout_reg <= 64'h584142605DA7CA83;
+ 6'h3D: dout_reg <= 64'h2CA021302E53E5C1;
+ 6'h3C: dout_reg <= 64'h16509098172972E0;
+ 6'h3B: dout_reg <= 64'hBA2A4D8C315B2C76;
+ 6'h3A: dout_reg <= 64'hEC172386A2E2833D;
+ 6'h39: dout_reg <= 64'hC7091403EB3E5418;
+ 6'h38: dout_reg <= 64'h63040A81759F2A0C;
+ 6'h37: dout_reg <= 64'h025DA344601EA1B8;
+ 6'h36: dout_reg <= 64'h012ED1A2308FD05C;
+ 6'h35: dout_reg <= 64'h8017685198C7E8AE;
+ 6'h34: dout_reg <= 64'h408BB4284C63F457;
+ 6'h33: dout_reg <= 64'h2218F9D046AFDB13;
+ 6'h32: dout_reg <= 64'h13515FACC3C94CB1;
+ 6'h31: dout_reg <= 64'h0B758C12817A87E0;
+ 6'h30: dout_reg <= 64'h05BA4689C03D4370;
+ 6'h2F: dout_reg <= 64'hA1F0C986411102CC;
+ 6'h2E: dout_reg <= 64'hD0F864C3A0080166;
+ 6'h2D: dout_reg <= 64'hE87CB2E1508480B3;
+ 6'h2C: dout_reg <= 64'hF4BED9F0A8C24059;
+ 6'h2B: dout_reg <= 64'hDB2F257E95702260;
+ 6'h2A: dout_reg <= 64'h4C67DB398BA913FC;
+ 6'h29: dout_reg <= 64'h87C3241A04450B32;
+ 6'h28: dout_reg <= 64'h43E1920D82220599;
+ 6'h27: dout_reg <= 64'hE0802541868B1232;
+ 6'h26: dout_reg <= 64'h704012A0C3458999;
+ 6'h25: dout_reg <= 64'hB8208950E12244CC;
+ 6'h24: dout_reg <= 64'h5C1044A8F011A266;
+ 6'h23: dout_reg <= 64'h4E0887957E834381;
+ 6'h22: dout_reg <= 64'hC704668B394AB3F2;
+ 6'h21: dout_reg <= 64'h830296041A2E4BCB;
+ 6'h20: dout_reg <= 64'hC1014B820D172565;
+ 6'h1F: dout_reg <= 64'h7DD80C6D98218914;
+ 6'h1E: dout_reg <= 64'h3E6C06B64C90440A;
+ 6'h1D: dout_reg <= 64'h9F36835B26C8A285;
+ 6'h1C: dout_reg <= 64'h4F1BC1AD93E45142;
+ 6'h1B: dout_reg <= 64'hDA55ECBBD1D3A135;
+ 6'h1A: dout_reg <= 64'h10727AB0F048598E;
+ 6'h19: dout_reg <= 64'hF56131B560852553;
+ 6'h18: dout_reg <= 64'hFAB018DA30421229;
+ 6'h17: dout_reg <= 64'h82B12139880C7F01;
+ 6'h16: dout_reg <= 64'h4158909CC4063F80;
+ 6'h15: dout_reg <= 64'hA02CC8CEE2831F40;
+ 6'h14: dout_reg <= 64'h5016E46771C10F20;
+ 6'h13: dout_reg <= 64'h2ABAD30AB0ECF811;
+ 6'h12: dout_reg <= 64'h17EC48BC507A0309;
+ 6'h11: dout_reg <= 64'h09C785E72031FE05;
+ 6'h10: dout_reg <= 64'h046342731018FF02;
+ 6'h0F: dout_reg <= 64'h91E9E113A54E2B57;
+ 6'h0E: dout_reg <= 64'h4874F009522715AB;
+ 6'h0D: dout_reg <= 64'hA43AF804A9138A55;
+ 6'h0C: dout_reg <= 64'hD21D7C825409C5AA;
+ 6'h0B: dout_reg <= 64'h78E75F528F4A4982;
+ 6'h0A: dout_reg <= 64'hAD9ACEBA62EB0F16;
+ 6'h09: dout_reg <= 64'h47A4864E943BAC5C;
+ 6'h08: dout_reg <= 64'h23D2C3274A9D56AE;
+ 6'h07: dout_reg <= 64'h06016A5C89D498B1;
+ 6'h06: dout_reg <= 64'h8380B5AE446A4C58;
+ 6'h05: dout_reg <= 64'hC140DA57A2B5262C;
+ 6'h04: dout_reg <= 64'hE0206DAB51DA9316;
+ 6'h03: dout_reg <= 64'h7611DC09A1B9D1BA;
+ 6'h02: dout_reg <= 64'h3D0984585908F0EC;
+ 6'h01: dout_reg <= 64'h1805A870255060C7;
+ 6'h00: dout_reg <= 64'h0C02D4B812A83063;
+ //
+ endcase // case(din)
+ //
+ end // always @(posedge clk)
+
+
+endmodule
diff --git a/streebog_hash/streebog_rom_c_array.v b/streebog_hash/streebog_rom_c_array.v
new file mode 100644
index 0000000..e31b5c0
--- /dev/null
+++ b/streebog_hash/streebog_rom_c_array.v
@@ -0,0 +1,58 @@
+`timescale 1ns / 1ps
+
+module streebog_rom_c_array
+ (
+ clk, ena,
+ din, dout
+ );
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire ena;
+ input wire [ 3:0] din;
+ output wire [511:0] dout;
+
+
+ //
+ // Output Register
+ //
+ reg [511:0] dout_reg;
+ assign dout = dout_reg;
+
+
+ //
+ // C Round Constants Array
+ //
+ always @(posedge clk) begin
+ //
+ if (ena) begin
+ //
+ case (din)
+ //
+ 4'h0: dout_reg <= 512'hB1085BDA1ECADAE9EBCB2F81C0657C1F2F6A76432E45D016714EB88D7585C4FC4B7CE09192676901A2422A08A460D31505767436CC744D23DD806559F2A64507;
+ 4'h1: dout_reg <= 512'h6FA3B58AA99D2F1A4FE39D460F70B5D7F3FEEA720A232B9861D55E0F16B501319AB5176B12D699585CB561C2DB0AA7CA55DDA21BD7CBCD56E679047021B19BB7;
+ 4'h2: dout_reg <= 512'hF574DCAC2BCE2FC70A39FC286A3D843506F15E5F529C1F8BF2EA7514B1297B7BD3E20FE490359EB1C1C93A376062DB09C2B6F443867ADB31991E96F50ABA0AB2;
+ 4'h3: dout_reg <= 512'hEF1FDFB3E81566D2F948E1A05D71E4DD488E857E335C3C7D9D721CAD685E353FA9D72C82ED03D675D8B71333935203BE3453EAA193E837F1220CBEBC84E3D12E;
+ //
+ 4'h4: dout_reg <= 512'h4BEA6BACAD4747999A3F410C6CA923637F151C1F1686104A359E35D7800FFFBDBFCD1747253AF5A3DFFF00B723271A167A56A27EA9EA63F5601758FD7C6CFE57;
+ 4'h5: dout_reg <= 512'hAE4FAEAE1D3AD3D96FA4C33B7A3039C02D66C4F95142A46C187F9AB49AF08EC6CFFAA6B71C9AB7B40AF21F66C2BEC6B6BF71C57236904F35FA68407A46647D6E;
+ 4'h6: dout_reg <= 512'hF4C70E16EEAAC5EC51AC86FEBF240954399EC6C7E6BF87C9D3473E33197A93C90992ABC52D822C3706476983284A05043517454CA23C4AF38886564D3A14D493;
+ 4'h7: dout_reg <= 512'h9B1F5B424D93C9A703E7AA020C6E41414EB7F8719C36DE1E89B4443B4DDBC49AF4892BCB929B069069D18D2BD1A5C42F36ACC2355951A8D9A47F0DD4BF02E71E;
+ //
+ 4'h8: dout_reg <= 512'h378F5A541631229B944C9AD8EC165FDE3A7D3A1B258942243CD955B7E00D0984800A440BDBB2CEB17B2B8A9AA6079C540E38DC92CB1F2A607261445183235ADB;
+ 4'h9: dout_reg <= 512'hABBEDEA680056F52382AE548B2E4F3F38941E71CFF8A78DB1FFFE18A1B3361039FE76702AF69334B7A1E6C303B7652F43698FAD1153BB6C374B4C7FB98459CED;
+ 4'hA: dout_reg <= 512'h7BCD9ED0EFC889FB3002C6CD635AFE94D8FA6BBBEBAB076120018021148466798A1D71EFEA48B9CAEFBACD1D7D476E98DEA2594AC06FD85D6BCAA4CD81F32D1B;
+ 4'hB: dout_reg <= 512'h378EE767F11631BAD21380B00449B17ACDA43C32BCDF1D77F82012D430219F9B5D80EF9D1891CC86E71DA4AA88E12852FAF417D5D9B21B9948BC924AF11BD720;
+ //
+ default: dout_reg <= {512{1'bX}};
+ //
+ endcase // case (din)
+ //
+ end // if (ena)
+ //
+ end // always @(posedge clk)
+
+endmodule
diff --git a/streebog_hash/streebog_rom_s_table.v b/streebog_hash/streebog_rom_s_table.v
new file mode 100644
index 0000000..9779b0f
--- /dev/null
+++ b/streebog_hash/streebog_rom_s_table.v
@@ -0,0 +1,299 @@
+`timescale 1ns / 1ps
+
+module streebog_rom_s_table
+ (
+ clk, ena,
+ din, dout
+ );
+
+
+ //
+ // Ports
+ //
+ input wire clk;
+ input wire ena;
+ input wire [ 7: 0] din;
+ output wire [ 7: 0] dout;
+
+
+ //
+ // Output Register
+ //
+ reg [ 7: 0] dout_reg;
+ assign dout = dout_reg;
+
+
+ //
+ // S Transformation Lookup Table
+ //
+ always @(posedge clk) begin
+ //
+ if (ena) begin
+ //
+ case (din)
+ //
+ 8'h00: dout_reg <= 8'hFC;
+ 8'h01: dout_reg <= 8'hEE;
+ 8'h02: dout_reg <= 8'hDD;
+ 8'h03: dout_reg <= 8'h11;
+ 8'h04: dout_reg <= 8'hCF;
+ 8'h05: dout_reg <= 8'h6E;
+ 8'h06: dout_reg <= 8'h31;
+ 8'h07: dout_reg <= 8'h16;
+ 8'h08: dout_reg <= 8'hFB;
+ 8'h09: dout_reg <= 8'hC4;
+ 8'h0A: dout_reg <= 8'hFA;
+ 8'h0B: dout_reg <= 8'hDA;
+ 8'h0C: dout_reg <= 8'h23;
+ 8'h0D: dout_reg <= 8'hC5;
+ 8'h0E: dout_reg <= 8'h04;
+ 8'h0F: dout_reg <= 8'h4D;
+ 8'h10: dout_reg <= 8'hE9;
+ 8'h11: dout_reg <= 8'h77;
+ 8'h12: dout_reg <= 8'hF0;
+ 8'h13: dout_reg <= 8'hDB;
+ 8'h14: dout_reg <= 8'h93;
+ 8'h15: dout_reg <= 8'h2E;
+ 8'h16: dout_reg <= 8'h99;
+ 8'h17: dout_reg <= 8'hBA;
+ 8'h18: dout_reg <= 8'h17;
+ 8'h19: dout_reg <= 8'h36;
+ 8'h1A: dout_reg <= 8'hF1;
+ 8'h1B: dout_reg <= 8'hBB;
+ 8'h1C: dout_reg <= 8'h14;
+ 8'h1D: dout_reg <= 8'hCD;
+ 8'h1E: dout_reg <= 8'h5F;
+ 8'h1F: dout_reg <= 8'hC1;
+ 8'h20: dout_reg <= 8'hF9;
+ 8'h21: dout_reg <= 8'h18;
+ 8'h22: dout_reg <= 8'h65;
+ 8'h23: dout_reg <= 8'h5A;
+ 8'h24: dout_reg <= 8'hE2;
+ 8'h25: dout_reg <= 8'h5C;
+ 8'h26: dout_reg <= 8'hEF;
+ 8'h27: dout_reg <= 8'h21;
+ 8'h28: dout_reg <= 8'h81;
+ 8'h29: dout_reg <= 8'h1C;
+ 8'h2A: dout_reg <= 8'h3C;
+ 8'h2B: dout_reg <= 8'h42;
+ 8'h2C: dout_reg <= 8'h8B;
+ 8'h2D: dout_reg <= 8'h01;
+ 8'h2E: dout_reg <= 8'h8E;
+ 8'h2F: dout_reg <= 8'h4F;
+ 8'h30: dout_reg <= 8'h05;
+ 8'h31: dout_reg <= 8'h84;
+ 8'h32: dout_reg <= 8'h02;
+ 8'h33: dout_reg <= 8'hAE;
+ 8'h34: dout_reg <= 8'hE3;
+ 8'h35: dout_reg <= 8'h6A;
+ 8'h36: dout_reg <= 8'h8F;
+ 8'h37: dout_reg <= 8'hA0;
+ 8'h38: dout_reg <= 8'h06;
+ 8'h39: dout_reg <= 8'h0B;
+ 8'h3A: dout_reg <= 8'hED;
+ 8'h3B: dout_reg <= 8'h98;
+ 8'h3C: dout_reg <= 8'h7F;
+ 8'h3D: dout_reg <= 8'hD4;
+ 8'h3E: dout_reg <= 8'hD3;
+ 8'h3F: dout_reg <= 8'h1F;
+ 8'h40: dout_reg <= 8'hEB;
+ 8'h41: dout_reg <= 8'h34;
+ 8'h42: dout_reg <= 8'h2C;
+ 8'h43: dout_reg <= 8'h51;
+ 8'h44: dout_reg <= 8'hEA;
+ 8'h45: dout_reg <= 8'hC8;
+ 8'h46: dout_reg <= 8'h48;
+ 8'h47: dout_reg <= 8'hAB;
+ 8'h48: dout_reg <= 8'hF2;
+ 8'h49: dout_reg <= 8'h2A;
+ 8'h4A: dout_reg <= 8'h68;
+ 8'h4B: dout_reg <= 8'hA2;
+ 8'h4C: dout_reg <= 8'hFD;
+ 8'h4D: dout_reg <= 8'h3A;
+ 8'h4E: dout_reg <= 8'hCE;
+ 8'h4F: dout_reg <= 8'hCC;
+ 8'h50: dout_reg <= 8'hB5;
+ 8'h51: dout_reg <= 8'h70;
+ 8'h52: dout_reg <= 8'h0E;
+ 8'h53: dout_reg <= 8'h56;
+ 8'h54: dout_reg <= 8'h08;
+ 8'h55: dout_reg <= 8'h0C;
+ 8'h56: dout_reg <= 8'h76;
+ 8'h57: dout_reg <= 8'h12;
+ 8'h58: dout_reg <= 8'hBF;
+ 8'h59: dout_reg <= 8'h72;
+ 8'h5A: dout_reg <= 8'h13;
+ 8'h5B: dout_reg <= 8'h47;
+ 8'h5C: dout_reg <= 8'h9C;
+ 8'h5D: dout_reg <= 8'hB7;
+ 8'h5E: dout_reg <= 8'h5D;
+ 8'h5F: dout_reg <= 8'h87;
+ 8'h60: dout_reg <= 8'h15;
+ 8'h61: dout_reg <= 8'hA1;
+ 8'h62: dout_reg <= 8'h96;
+ 8'h63: dout_reg <= 8'h29;
+ 8'h64: dout_reg <= 8'h10;
+ 8'h65: dout_reg <= 8'h7B;
+ 8'h66: dout_reg <= 8'h9A;
+ 8'h67: dout_reg <= 8'hC7;
+ 8'h68: dout_reg <= 8'hF3;
+ 8'h69: dout_reg <= 8'h91;
+ 8'h6A: dout_reg <= 8'h78;
+ 8'h6B: dout_reg <= 8'h6F;
+ 8'h6C: dout_reg <= 8'h9D;
+ 8'h6D: dout_reg <= 8'h9E;
+ 8'h6E: dout_reg <= 8'hB2;
+ 8'h6F: dout_reg <= 8'hB1;
+ 8'h70: dout_reg <= 8'h32;
+ 8'h71: dout_reg <= 8'h75;
+ 8'h72: dout_reg <= 8'h19;
+ 8'h73: dout_reg <= 8'h3D;
+ 8'h74: dout_reg <= 8'hFF;
+ 8'h75: dout_reg <= 8'h35;
+ 8'h76: dout_reg <= 8'h8A;
+ 8'h77: dout_reg <= 8'h7E;
+ 8'h78: dout_reg <= 8'h6D;
+ 8'h79: dout_reg <= 8'h54;
+ 8'h7A: dout_reg <= 8'hC6;
+ 8'h7B: dout_reg <= 8'h80;
+ 8'h7C: dout_reg <= 8'hC3;
+ 8'h7D: dout_reg <= 8'hBD;
+ 8'h7E: dout_reg <= 8'h0D;
+ 8'h7F: dout_reg <= 8'h57;
+ 8'h80: dout_reg <= 8'hDF;
+ 8'h81: dout_reg <= 8'hF5;
+ 8'h82: dout_reg <= 8'h24;
+ 8'h83: dout_reg <= 8'hA9;
+ 8'h84: dout_reg <= 8'h3E;
+ 8'h85: dout_reg <= 8'hA8;
+ 8'h86: dout_reg <= 8'h43;
+ 8'h87: dout_reg <= 8'hC9;
+ 8'h88: dout_reg <= 8'hD7;
+ 8'h89: dout_reg <= 8'h79;
+ 8'h8A: dout_reg <= 8'hD6;
+ 8'h8B: dout_reg <= 8'hF6;
+ 8'h8C: dout_reg <= 8'h7C;
+ 8'h8D: dout_reg <= 8'h22;
+ 8'h8E: dout_reg <= 8'hB9;
+ 8'h8F: dout_reg <= 8'h03;
+ 8'h90: dout_reg <= 8'hE0;
+ 8'h91: dout_reg <= 8'h0F;
+ 8'h92: dout_reg <= 8'hEC;
+ 8'h93: dout_reg <= 8'hDE;
+ 8'h94: dout_reg <= 8'h7A;
+ 8'h95: dout_reg <= 8'h94;
+ 8'h96: dout_reg <= 8'hB0;
+ 8'h97: dout_reg <= 8'hBC;
+ 8'h98: dout_reg <= 8'hDC;
+ 8'h99: dout_reg <= 8'hE8;
+ 8'h9A: dout_reg <= 8'h28;
+ 8'h9B: dout_reg <= 8'h50;
+ 8'h9C: dout_reg <= 8'h4E;
+ 8'h9D: dout_reg <= 8'h33;
+ 8'h9E: dout_reg <= 8'h0A;
+ 8'h9F: dout_reg <= 8'h4A;
+ 8'hA0: dout_reg <= 8'hA7;
+ 8'hA1: dout_reg <= 8'h97;
+ 8'hA2: dout_reg <= 8'h60;
+ 8'hA3: dout_reg <= 8'h73;
+ 8'hA4: dout_reg <= 8'h1E;
+ 8'hA5: dout_reg <= 8'h00;
+ 8'hA6: dout_reg <= 8'h62;
+ 8'hA7: dout_reg <= 8'h44;
+ 8'hA8: dout_reg <= 8'h1A;
+ 8'hA9: dout_reg <= 8'hB8;
+ 8'hAA: dout_reg <= 8'h38;
+ 8'hAB: dout_reg <= 8'h82;
+ 8'hAC: dout_reg <= 8'h64;
+ 8'hAD: dout_reg <= 8'h9F;
+ 8'hAE: dout_reg <= 8'h26;
+ 8'hAF: dout_reg <= 8'h41;
+ 8'hB0: dout_reg <= 8'hAD;
+ 8'hB1: dout_reg <= 8'h45;
+ 8'hB2: dout_reg <= 8'h46;
+ 8'hB3: dout_reg <= 8'h92;
+ 8'hB4: dout_reg <= 8'h27;
+ 8'hB5: dout_reg <= 8'h5E;
+ 8'hB6: dout_reg <= 8'h55;
+ 8'hB7: dout_reg <= 8'h2F;
+ 8'hB8: dout_reg <= 8'h8C;
+ 8'hB9: dout_reg <= 8'hA3;
+ 8'hBA: dout_reg <= 8'hA5;
+ 8'hBB: dout_reg <= 8'h7D;
+ 8'hBC: dout_reg <= 8'h69;
+ 8'hBD: dout_reg <= 8'hD5;
+ 8'hBE: dout_reg <= 8'h95;
+ 8'hBF: dout_reg <= 8'h3B;
+ 8'hC0: dout_reg <= 8'h07;
+ 8'hC1: dout_reg <= 8'h58;
+ 8'hC2: dout_reg <= 8'hB3;
+ 8'hC3: dout_reg <= 8'h40;
+ 8'hC4: dout_reg <= 8'h86;
+ 8'hC5: dout_reg <= 8'hAC;
+ 8'hC6: dout_reg <= 8'h1D;
+ 8'hC7: dout_reg <= 8'hF7;
+ 8'hC8: dout_reg <= 8'h30;
+ 8'hC9: dout_reg <= 8'h37;
+ 8'hCA: dout_reg <= 8'h6B;
+ 8'hCB: dout_reg <= 8'hE4;
+ 8'hCC: dout_reg <= 8'h88;
+ 8'hCD: dout_reg <= 8'hD9;
+ 8'hCE: dout_reg <= 8'hE7;
+ 8'hCF: dout_reg <= 8'h89;
+ 8'hD0: dout_reg <= 8'hE1;
+ 8'hD1: dout_reg <= 8'h1B;
+ 8'hD2: dout_reg <= 8'h83;
+ 8'hD3: dout_reg <= 8'h49;
+ 8'hD4: dout_reg <= 8'h4C;
+ 8'hD5: dout_reg <= 8'h3F;
+ 8'hD6: dout_reg <= 8'hF8;
+ 8'hD7: dout_reg <= 8'hFE;
+ 8'hD8: dout_reg <= 8'h8D;
+ 8'hD9: dout_reg <= 8'h53;
+ 8'hDA: dout_reg <= 8'hAA;
+ 8'hDB: dout_reg <= 8'h90;
+ 8'hDC: dout_reg <= 8'hCA;
+ 8'hDD: dout_reg <= 8'hD8;
+ 8'hDE: dout_reg <= 8'h85;
+ 8'hDF: dout_reg <= 8'h61;
+ 8'hE0: dout_reg <= 8'h20;
+ 8'hE1: dout_reg <= 8'h71;
+ 8'hE2: dout_reg <= 8'h67;
+ 8'hE3: dout_reg <= 8'hA4;
+ 8'hE4: dout_reg <= 8'h2D;
+ 8'hE5: dout_reg <= 8'h2B;
+ 8'hE6: dout_reg <= 8'h09;
+ 8'hE7: dout_reg <= 8'h5B;
+ 8'hE8: dout_reg <= 8'hCB;
+ 8'hE9: dout_reg <= 8'h9B;
+ 8'hEA: dout_reg <= 8'h25;
+ 8'hEB: dout_reg <= 8'hD0;
+ 8'hEC: dout_reg <= 8'hBE;
+ 8'hED: dout_reg <= 8'hE5;
+ 8'hEE: dout_reg <= 8'h6C;
+ 8'hEF: dout_reg <= 8'h52;
+ 8'hF0: dout_reg <= 8'h59;
+ 8'hF1: dout_reg <= 8'hA6;
+ 8'hF2: dout_reg <= 8'h74;
+ 8'hF3: dout_reg <= 8'hD2;
+ 8'hF4: dout_reg <= 8'hE6;
+ 8'hF5: dout_reg <= 8'hF4;
+ 8'hF6: dout_reg <= 8'hB4;
+ 8'hF7: dout_reg <= 8'hC0;
+ 8'hF8: dout_reg <= 8'hD1;
+ 8'hF9: dout_reg <= 8'h66;
+ 8'hFA: dout_reg <= 8'hAF;
+ 8'hFB: dout_reg <= 8'hC2;
+ 8'hFC: dout_reg <= 8'h39;
+ 8'hFD: dout_reg <= 8'h4B;
+ 8'hFE: dout_reg <= 8'h63;
+ 8'hFF: dout_reg <= 8'hB6;
+ //
+ endcase // case (din)
+ //
+ end // if (ena)
+ //
+ end // always @(posedge clk)
+
+
+endmodule
diff --git a/streebog_hash/tb/streebog_tb.v b/streebog_hash/tb/streebog_tb.v
new file mode 100644
index 0000000..291f11c
--- /dev/null
+++ b/streebog_hash/tb/streebog_tb.v
@@ -0,0 +1,198 @@
+`timescale 1ns / 1ps
+
+module streebog_tb;
+
+
+ localparam STREEBOG_MODE_SHORT = 1;
+ localparam STREEBOG_MODE_LONG = 0;
+
+ // short message that fits into one block
+ localparam [511:0] MSG_SINGLE = 512'h01323130393837363534333231303938373635343332313039383736353433323130393837363534333231303938373635343332313039383736353433323130;
+
+ // length of short message in bits
+ localparam [ 9:0] MSG_SINGLE_LENGTH = 10'd504;
+
+ // correct 512-bit digest of short message
+ localparam [511:0] MSG_SINGLE_DIGEST_LONG = 512'h486f64c1917879417fef082b3381a4e211c324f074654c38823a7b76f830ad00fa1fbae42b1285c0352f227524bc9ab16254288dd6863dccd5b9f54a1ad0541b;
+
+ // correct 256-bit digest of short message
+ localparam [255:0] MSG_SINGLE_DIGEST_SHORT = 256'h00557be5e584fd52a449b16b0251d05d27f94ab76cbaa6da890b59d8ef1e159d;
+
+
+ // first block of long message
+ localparam [511:0] MSG_DOUBLE_FIRST = 512'hfbeafaebef20fffbf0e1e0f0f520e0ed20e8ece0ebe5f0f2f120fff0eeec20f120faf2fee5e2202ce8f6f3ede220e8e6eee1e8f0f2d1202ce8f0f2e5e220e5d1;
+
+ // second block of long message
+ localparam [511:0] MSG_DOUBLE_SECOND = 512'h0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001fbe2e5f0eee3c820;
+
+ // length of first part of long message in bits
+ localparam [ 9:0] MSG_DOUBLE_FIRST_LENGTH = 10'd512;
+
+ // length of second part of long message in bits
+ localparam [ 9:0] MSG_DOUBLE_SECOND_LENGTH = 10'd64;
+
+ // correct 512-bit digest of long message
+ localparam [511:0] MSG_DOUBLE_DIGEST_LONG = 512'h28fbc9bada033b1460642bdcddb90c3fb3e56c497ccd0f62b8a2ad4935e85f037613966de4ee00531ae60f3b5a47f8dae06915d5f2f194996fcabf2622e6881e;
+
+ // correct 256-bit digest of short message
+ localparam [511:0] MSG_DOUBLE_DIGEST_SHORT = 256'h508f7e553c06501d749a66fc28c6cac0b005746d97537fa85d9e40904efed29d;
+
+
+ //
+ // Inputs
+ //
+ reg clock;
+ reg [511:0] block;
+ reg [ 9:0] block_length;
+ reg init = 0;
+ reg update = 0;
+ reg final = 0;
+ reg short_mode;
+
+
+ //
+ // Outputs
+ //
+ wire [511:0] digest;
+ wire digest_valid;
+ wire ready;
+
+
+ //
+ // UUT
+ //
+ streebog_hash_top uut
+ (
+ .clock (clock),
+
+ .block (block),
+ .block_length (block_length),
+ .init (init),
+ .update (update),
+ .final (final),
+ .short_mode (short_mode),
+ .digest (digest),
+ .digest_valid (digest_valid),
+ .ready (ready)
+ );
+
+ //
+ // Clock
+ //
+ initial clock = 1'b0;
+ always #5 clock = ~clock;
+
+ reg [511:0] hash;
+ wire [255:0] hash_short = hash[511:256];
+
+ initial begin
+ //
+ #100;
+ //
+ $display("Checking 512-bit mode on short message...");
+ //
+ streebog_init(STREEBOG_MODE_LONG);
+ streebog_set_block(MSG_SINGLE, MSG_SINGLE_LENGTH);
+ streebog_update();
+ streebog_final();
+ //
+ if (hash == MSG_SINGLE_DIGEST_LONG) $display("OK");
+ else $display("ERROR: hash == %0128h", hash);
+ //
+ #100;
+ //
+ $display("Checking 256-bit mode on short message...");
+ //
+ streebog_init(STREEBOG_MODE_SHORT);
+ streebog_set_block(MSG_SINGLE, MSG_SINGLE_LENGTH);
+ streebog_update();
+ streebog_final();
+ //
+ if (hash_short == MSG_SINGLE_DIGEST_SHORT) $display("OK");
+ else $display("ERROR: hash_short == %064h", hash_short);
+ //
+ #100;
+ //
+ $display("Checking 512-bit mode on long message...");
+ //
+ streebog_init(STREEBOG_MODE_LONG);
+ streebog_set_block(MSG_DOUBLE_FIRST, MSG_DOUBLE_FIRST_LENGTH);
+ streebog_update();
+ streebog_set_block(MSG_DOUBLE_SECOND, MSG_DOUBLE_SECOND_LENGTH);
+ streebog_update();
+ streebog_final();
+ //
+ if (hash == MSG_DOUBLE_DIGEST_LONG) $display("OK");
+ else $display("ERROR: hash == %0128h", hash);
+ //
+ #100;
+ //
+ $display("Checking 256-bit mode on long message...");
+ //
+ streebog_init(STREEBOG_MODE_SHORT);
+ streebog_set_block(MSG_DOUBLE_FIRST, MSG_DOUBLE_FIRST_LENGTH);
+ streebog_update();
+ streebog_set_block(MSG_DOUBLE_SECOND, MSG_DOUBLE_SECOND_LENGTH);
+ streebog_update();
+ streebog_final();
+ //
+ if (hash_short == MSG_DOUBLE_DIGEST_SHORT) $display("OK");
+ else $display("ERROR: hash_short == %064h", hash_short);
+ //
+ #100;
+ //
+ $finish;
+ end
+
+
+ task streebog_init;
+ input use_short_mode;
+ begin
+ short_mode = use_short_mode;
+ init = 1;
+ #10;
+ init = 0;
+ #10;
+ end
+ endtask
+
+
+ task streebog_set_block;
+ input [511:0] new_block;
+ input [ 9:0] new_block_length;
+ begin
+ block = new_block;
+ block_length = new_block_length;
+
+ end
+ endtask;
+
+
+ task streebog_update;
+ begin
+ update = 1;
+ #10;
+ update = 0;
+ #10
+ while (!ready) #10;
+ #10;
+ end
+ endtask
+
+
+ task streebog_final;
+ begin
+ final = 1;
+ #10;
+ final = 0;
+ #10
+ while (!digest_valid) #10;
+ hash = digest;
+ #10;
+ while (!ready) #10;
+ #10;
+ end
+ endtask
+
+endmodule
+
diff --git a/streebog_wrapper.v b/streebog_wrapper.v
new file mode 100644
index 0000000..a2ef47d
--- /dev/null
+++ b/streebog_wrapper.v
@@ -0,0 +1,241 @@
+module streebog_wrapper
+ (
+ input wire clk,
+ input wire rst,
+
+ input wire cs,
+ input wire we,
+
+ input wire [ 7: 0] address,
+ input wire [31: 0] write_data,
+ output wire [31: 0] read_data
+ );
+
+ //----------------------------------------------------------------
+ // Internal constant and parameter definitions.
+ //----------------------------------------------------------------
+ localparam ADDR_NAME0 = 8'h00;
+ localparam ADDR_NAME1 = 8'h01;
+ localparam ADDR_VERSION = 8'h02;
+
+ localparam ADDR_CTRL = 8'h08; // {short, final, update, init}
+ localparam ADDR_STATUS = 8'h09; // {valid, ready}
+ localparam ADDR_BLOCK_BITS = 8'h0a; // block length in bits
+ localparam ADDR_MODE = 8'h0b; // 0=long (512-bit) mode, 1=short (256-bit) mode
+
+ localparam ADDR_BLOCK0 = 8'h10;
+ localparam ADDR_BLOCK1 = 8'h11;
+ localparam ADDR_BLOCK2 = 8'h12;
+ localparam ADDR_BLOCK3 = 8'h13;
+ localparam ADDR_BLOCK4 = 8'h14;
+ localparam ADDR_BLOCK5 = 8'h15;
+ localparam ADDR_BLOCK6 = 8'h16;
+ localparam ADDR_BLOCK7 = 8'h17;
+ localparam ADDR_BLOCK8 = 8'h18;
+ localparam ADDR_BLOCK9 = 8'h19;
+ localparam ADDR_BLOCK10 = 8'h1a;
+ localparam ADDR_BLOCK11 = 8'h1b;
+ localparam ADDR_BLOCK12 = 8'h1c;
+ localparam ADDR_BLOCK13 = 8'h1d;
+ localparam ADDR_BLOCK14 = 8'h1e;
+ localparam ADDR_BLOCK15 = 8'h1f;
+
+ localparam ADDR_DIGEST0 = 8'h20;
+ localparam ADDR_DIGEST1 = 8'h21;
+ localparam ADDR_DIGEST2 = 8'h22;
+ localparam ADDR_DIGEST3 = 8'h23;
+ localparam ADDR_DIGEST4 = 8'h24;
+ localparam ADDR_DIGEST5 = 8'h25;
+ localparam ADDR_DIGEST6 = 8'h26;
+ localparam ADDR_DIGEST7 = 8'h27;
+ localparam ADDR_DIGEST8 = 8'h28;
+ localparam ADDR_DIGEST9 = 8'h29;
+ localparam ADDR_DIGEST10 = 8'h2a;
+ localparam ADDR_DIGEST11 = 8'h2b;
+ localparam ADDR_DIGEST12 = 8'h2c;
+ localparam ADDR_DIGEST13 = 8'h2d;
+ localparam ADDR_DIGEST14 = 8'h2e;
+ localparam ADDR_DIGEST15 = 8'h2f;
+
+
+ localparam CTRL_INIT_BIT = 0;
+ localparam CTRL_UPDATE_BIT = 1;
+ localparam CTRL_FINAL_BIT = 2;
+
+ localparam STATUS_READY_BIT = 0;
+ localparam STATUS_VALID_BIT = 1;
+
+ localparam CORE_NAME0 = 32'h73747265; // "stre"
+ localparam CORE_NAME1 = 32'h65626F67; // "ebog"
+ localparam CORE_VERSION = 32'h302E3130; // "0.10"
+
+
+ //----------------------------------------------------------------
+ // Control register
+ //----------------------------------------------------------------
+ reg [2:0] reg_ctrl; // core input
+ reg [9:0] reg_block_bits; // input block length in bits
+ reg reg_mode; // long/short mode
+
+
+ //----------------------------------------------------------------
+ // Init, Update and Final 1-Cycle Pulses
+ //----------------------------------------------------------------
+ reg [2:0] reg_ctrl_dly;
+ always @(posedge clk) reg_ctrl_dly <= reg_ctrl;
+
+ wire core_init_pulse = (reg_ctrl[CTRL_INIT_BIT] == 1'b1) && (reg_ctrl_dly[CTRL_INIT_BIT] == 1'b0);
+ wire core_update_pulse = (reg_ctrl[CTRL_UPDATE_BIT] == 1'b1) && (reg_ctrl_dly[CTRL_UPDATE_BIT] == 1'b0);
+ wire core_final_pulse = (reg_ctrl[CTRL_FINAL_BIT] == 1'b1) && (reg_ctrl_dly[CTRL_FINAL_BIT] == 1'b0);
+
+
+ //----------------------------------------------------------------
+ // Status register
+ //----------------------------------------------------------------
+ wire core_ready; // core output
+ wire digest_valid; // core output
+
+ wire [1:0] reg_status = {digest_valid, core_ready};
+
+
+ //----------------------------------------------------------------
+ // Block and Digest
+ //----------------------------------------------------------------
+ reg [511 : 0] core_block; // core input
+ wire [511 : 0] core_digest; // core output
+
+
+ //----------------------------------------------------------------
+ // core instantiation.
+ //----------------------------------------------------------------
+ streebog_hash_top streebog
+ (
+ .clock (clk),
+
+ .block (core_block),
+ .block_length (reg_block_bits),
+
+ .init (core_init_pulse),
+ .update (core_update_pulse),
+ .final (core_final_pulse),
+
+ .short_mode (reg_mode),
+
+ .digest (core_digest),
+ .digest_valid (digest_valid),
+
+ .ready (core_ready)
+ );
+
+ //----------------------------------------------------------------
+ // Read Latch
+ //----------------------------------------------------------------
+ reg [31: 0] tmp_read_data;
+
+ assign read_data = tmp_read_data;
+
+
+ //----------------------------------------------------------------
+ // Read/Write Interface
+ //----------------------------------------------------------------
+ always @(posedge clk)
+ //
+ if (rst) begin
+ //
+ reg_ctrl <= 3'b000;
+ reg_block_bits <= 10'd0;
+ reg_mode <= 1'b0;
+ core_block <= {512{1'b0}};
+ tmp_read_data <= 32'h00000000;
+ //
+ end else if (cs) begin
+ //
+ if (we) begin
+ //
+ // Write Handler
+ //
+ case (address)
+ ADDR_CTRL: reg_ctrl <= write_data[2:0];
+ ADDR_BLOCK_BITS: reg_block_bits <= write_data[9:0];
+ ADDR_MODE: reg_mode <= write_data[0];
+ ADDR_BLOCK0: core_block[511:480] <= write_data;
+ ADDR_BLOCK1: core_block[479:448] <= write_data;
+ ADDR_BLOCK2: core_block[447:416] <= write_data;
+ ADDR_BLOCK3: core_block[415:384] <= write_data;
+ ADDR_BLOCK4: core_block[383:352] <= write_data;
+ ADDR_BLOCK5: core_block[351:320] <= write_data;
+ ADDR_BLOCK6: core_block[319:288] <= write_data;
+ ADDR_BLOCK7: core_block[287:256] <= write_data;
+ ADDR_BLOCK8: core_block[255:224] <= write_data;
+ ADDR_BLOCK9: core_block[223:192] <= write_data;
+ ADDR_BLOCK10: core_block[191:160] <= write_data;
+ ADDR_BLOCK11: core_block[159:128] <= write_data;
+ ADDR_BLOCK12: core_block[127: 96] <= write_data;
+ ADDR_BLOCK13: core_block[ 95: 64] <= write_data;
+ ADDR_BLOCK14: core_block[ 63: 32] <= write_data;
+ ADDR_BLOCK15: core_block[ 31: 0] <= write_data;
+ endcase
+ //
+ end else begin
+ //
+ // Read Handler
+ //
+ case (address)
+ //
+ ADDR_NAME0: tmp_read_data <= CORE_NAME0;
+ ADDR_NAME1: tmp_read_data <= CORE_NAME1;
+ ADDR_VERSION: tmp_read_data <= CORE_VERSION;
+ ADDR_CTRL: tmp_read_data <= {{28{1'b0}}, reg_ctrl};
+ ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status};
+ ADDR_BLOCK_BITS: tmp_read_data <= {{22{1'b0}}, reg_block_bits};
+ ADDR_MODE: tmp_read_data <= {{31{1'b0}}, reg_mode};
+ //
+ ADDR_BLOCK0: tmp_read_data <= core_block[511:480];
+ ADDR_BLOCK1: tmp_read_data <= core_block[479:448];
+ ADDR_BLOCK2: tmp_read_data <= core_block[447:416];
+ ADDR_BLOCK3: tmp_read_data <= core_block[415:384];
+ ADDR_BLOCK4: tmp_read_data <= core_block[383:352];
+ ADDR_BLOCK5: tmp_read_data <= core_block[351:320];
+ ADDR_BLOCK6: tmp_read_data <= core_block[319:288];
+ ADDR_BLOCK7: tmp_read_data <= core_block[287:256];
+ ADDR_BLOCK8: tmp_read_data <= core_block[255:224];
+ ADDR_BLOCK9: tmp_read_data <= core_block[223:192];
+ ADDR_BLOCK10: tmp_read_data <= core_block[191:160];
+ ADDR_BLOCK11: tmp_read_data <= core_block[159:128];
+ ADDR_BLOCK12: tmp_read_data <= core_block[127: 96];
+ ADDR_BLOCK13: tmp_read_data <= core_block[ 95: 64];
+ ADDR_BLOCK14: tmp_read_data <= core_block[ 63: 32];
+ ADDR_BLOCK15: tmp_read_data <= core_block[ 31: 0];
+ //
+ ADDR_DIGEST0: tmp_read_data <= core_digest[511:480];
+ ADDR_DIGEST1: tmp_read_data <= core_digest[479:448];
+ ADDR_DIGEST2: tmp_read_data <= core_digest[447:416];
+ ADDR_DIGEST3: tmp_read_data <= core_digest[415:384];
+ ADDR_DIGEST4: tmp_read_data <= core_digest[383:352];
+ ADDR_DIGEST5: tmp_read_data <= core_digest[351:320];
+ ADDR_DIGEST6: tmp_read_data <= core_digest[319:288];
+ ADDR_DIGEST7: tmp_read_data <= core_digest[287:256];
+ ADDR_DIGEST8: tmp_read_data <= core_digest[255:224];
+ ADDR_DIGEST9: tmp_read_data <= core_digest[223:192];
+ ADDR_DIGEST10: tmp_read_data <= core_digest[191:160];
+ ADDR_DIGEST11: tmp_read_data <= core_digest[159:128];
+ ADDR_DIGEST12: tmp_read_data <= core_digest[127: 96];
+ ADDR_DIGEST13: tmp_read_data <= core_digest[ 95: 64];
+ ADDR_DIGEST14: tmp_read_data <= core_digest[ 63: 32];
+ ADDR_DIGEST15: tmp_read_data <= core_digest[ 31: 0];
+ //
+ default: tmp_read_data <= 32'h00000000;
+ //
+ endcase
+ //
+ end
+ //
+ end
+
+
+endmodule // streebog_wrapper
+
+
+//======================================================================
+// EOF streebog_wrapper.v
+//======================================================================