summaryrefslogtreecommitdiff
path: root/streebog_hash/streebog_hash_top.v
diff options
context:
space:
mode:
Diffstat (limited to 'streebog_hash/streebog_hash_top.v')
-rw-r--r--streebog_hash/streebog_hash_top.v421
1 files changed, 421 insertions, 0 deletions
diff --git a/streebog_hash/streebog_hash_top.v b/streebog_hash/streebog_hash_top.v
new file mode 100644
index 0000000..1cd1bbe
--- /dev/null
+++ b/streebog_hash/streebog_hash_top.v
@@ -0,0 +1,421 @@
+`timescale 1ns / 1ps
+
+module streebog_hash_top
+ (
+ clock,
+ block, block_length,
+ init, update, final,
+ short_mode,
+ digest, digest_valid,
+ ready
+ );
+
+
+ //
+ // Parameters
+ //
+ parameter PS_PIPELINE_STAGES = 2; // 2, 4, 8
+ parameter L_PIPELINE_STAGES = 2; // 2, 4, 8, 16, 32, 64
+
+
+ //
+ // Ports
+ //
+ input wire clock; // core clock
+ input wire [511:0] block; // input message block
+ input wire [ 9:0] block_length; // length of input block in bits (0..512)
+ input wire init; // flag to start calculation of new message hash
+ input wire update; // flag to compress next message block
+ input wire final; // flag to run final transformation after last message block
+ input wire short_mode; // 0 = produce 512-bit hash, 1 = produce 256-bit hash
+ output wire [511:0] digest; // message digest output
+ output wire digest_valid; // hash is ready (digest output value is valid)
+ output wire ready; // core is ready (init/update/final can be asserted)
+
+
+ //
+ // Initialization Vectors and Round Count
+ //
+ localparam STREEBOG_IV_512 = {512{1'b0}};
+ localparam STREEBOG_IV_256 = {64{8'h01}};
+ localparam STREEBOG_NUM_ROUNDS = 4'd12;
+
+
+ //
+ // State Registers
+ //
+ reg [511:0] h; // |
+ reg [511:0] Sigma; // | Internal State Registers
+ reg [511:0] N; // |
+
+ reg [511:0] digest_reg;
+ reg digest_valid_reg = 1'b0;
+ reg [ 3:0] round_count = 4'd0;
+
+ assign digest = digest_reg;
+ assign digest_valid = digest_valid_reg;
+
+
+ //
+ // Handy Internal Flags
+ //
+ wire round_count_active = (round_count > 4'd0) ? 1 : 0; // transformation has been started
+ wire round_count_not_done = (round_count < STREEBOG_NUM_ROUNDS) ? 1 : 0; // transformation has not been finished
+
+
+ /*
+ * Compression procedure includes 13 rounds. To perform every round we need to know
+ * round key. This implementation uses two parallel LPS cores. The first LPS core (key core)
+ * is used to produce round keys, the second LPS core (data core) is used to encrypt message block.
+ *
+ * Data core is not activated during the first round, because round key is not yet known during
+ * the first round. During the second round, key core computes next (second) round key, while data core encrypts
+ * mesage block using first round key and so on. The last compression round doesn't include encryption step.
+ * Instead of it simple XOR operation is used.
+ *
+ * Compression procedure requires 13 key calculations and 12 data encryptions. LPS cores operate according to
+ * the following schedule:
+ *
+ *
+ * +----------+----------+----------+- -+----------+
+ * Round Count | 0 | 1 | 2 | ... | 12 |
+ * +----------+----------+----------+- -+----------+
+ * Key Core | KEY #0 | KEY #1 | KEY #2 | ... | KEY #12 |
+ * +----------+----------+----------+- -+----------+
+ * Data Core | Idle | DATA #0 | DATA #1 | ... | DATA #11 |
+ * +----------+----------+----------+- -+----------+
+ *
+ */
+
+
+ //
+ // LPS Core for Round Key Calculation
+ //
+ reg [511:0] lps_key_in; //
+ wire [511:0] lps_key_out; //
+ wire lps_key_ena; //
+ wire lps_key_last; //
+ wire lps_key_rdy; //
+
+ wire lps_key_ena_update = (fsm_state == FSM_STATE_UPDATE_LPS_TRIG) ? 1 : 0;
+ wire lps_key_ena_final_n = (fsm_state == FSM_STATE_FINAL_N_LPS_TRIG) ? 1 : 0;
+ wire lps_key_ena_final_sigma = (fsm_state == FSM_STATE_FINAL_SIGMA_LPS_TRIG) ? 1 : 0;
+
+ assign lps_key_ena = lps_key_ena_update || lps_key_ena_final_n || lps_key_ena_final_sigma;
+
+ streebog_core_lps #
+ (
+ .PS_PIPELINE_STAGES (PS_PIPELINE_STAGES),
+ .L_PIPELINE_STAGES (L_PIPELINE_STAGES)
+ )
+ lps_key
+ (
+ .clk (clock),
+ .ena (lps_key_ena),
+ .rdy (lps_key_rdy),
+ .last (lps_key_last),
+ .din (lps_key_in),
+ .dout (lps_key_out)
+ );
+
+
+ //
+ // LPS Core for Block Compression
+ //
+ reg [511:0] lps_data_in;
+ wire [511:0] lps_data_out;
+ wire lps_data_ena;
+ wire lps_data_last;
+ wire lps_data_rdy;
+
+ assign lps_data_ena = lps_key_ena & round_count_active;
+
+ streebog_core_lps #
+ (
+ .PS_PIPELINE_STAGES (PS_PIPELINE_STAGES),
+ .L_PIPELINE_STAGES (L_PIPELINE_STAGES)
+ )
+ lps_data
+ (
+ .clk (clock),
+ .ena (lps_data_ena),
+ .rdy (lps_data_rdy),
+ .last (lps_data_last),
+ .din (lps_data_in),
+ .dout (lps_data_out)
+ );
+
+
+ /*
+ * According to specification, internal state must be updated after compression, this
+ * involves addition of two pairs of 512-bit numbers. This operation is done in two
+ * parallel summation cores. The first core updates N register, the second core updates
+ * Sigma register. Summation is triggered before LPS cores are activated. Actual update
+ * of N and Sigma occurs after completion of compression procedure.
+ *
+ */
+
+
+ //
+ // Summation Trigger Flag
+ //
+ wire adder_trig = (fsm_state == FSM_STATE_UPDATE_ADDER_TRIG) ? 1 : 0;
+
+
+ //
+ // Block Length Adder (N = N + |M|)
+ //
+ wire [511:0] adder_n_sum;
+ wire adder_n_rdy;
+
+ streebog_core_adder_s6 adder_n
+ (
+ .clk (clock),
+ .ena (adder_trig),
+ .rdy (adder_n_rdy),
+ .x (N),
+ .y ({{502{1'b0}}, block_length}),
+ .sum (adder_n_sum)
+ );
+
+
+ //
+ // Message Adder (Sigma = Sigma + M)
+ //
+ wire [511:0] adder_sigma_sum;
+ wire adder_sigma_rdy;
+
+ streebog_core_adder_s6 adder_sigma
+ (
+ .clk (clock),
+ .ena (adder_trig),
+ .rdy (adder_sigma_rdy),
+ .x (Sigma),
+ .y (block),
+ .sum (adder_sigma_sum)
+ );
+
+
+ //
+ // Handy Flags
+ //
+ wire lps_last_both = lps_key_last & lps_data_last;
+ wire lps_rdy_both = lps_key_rdy & lps_data_rdy;
+ wire adder_rdy_both = adder_n_rdy & adder_sigma_rdy;
+
+
+ /*
+ * Operation of this core is controlled by FSM logic. Ready flag is embedded in state encoding. FSM goes out of
+ * idle state when init/update/final flags become active. Init flag has priority over update and final flags.
+ * Update flag has priority over final flag.
+ *
+ */
+
+
+ //
+ // FSM States
+ //
+ localparam FSM_STATE_IDLE = 4'b1_00_0; // core is idle
+ //
+ localparam FSM_STATE_UPDATE_LPS_TRIG = 4'b0_00_0; // core is triggering gN(h,m) transformation
+ localparam FSM_STATE_UPDATE_LPS_WAIT = 4'b0_00_1; // core is waiting for transformation to complete
+ //
+ localparam FSM_STATE_UPDATE_ADDER_TRIG = 4'b0_11_0; // core is triggering summation
+ localparam FSM_STATE_UPDATE_ADDER_WAIT = 4'b0_11_1; // core is waiting for summation to complete
+ //
+ localparam FSM_STATE_FINAL_N_LPS_TRIG = 4'b0_01_0; // core is triggering g0(h,N) transformation
+ localparam FSM_STATE_FINAL_N_LPS_WAIT = 4'b0_01_1; // core is waiting for transformation to complete
+ //
+ localparam FSM_STATE_FINAL_SIGMA_LPS_TRIG = 4'b0_10_0; // core is triggering g0(h,Sigma) transformation
+ localparam FSM_STATE_FINAL_SIGMA_LPS_WAIT = 4'b0_10_1; // core is waiting for transformation of complete
+
+
+ //
+ // FSM State Register and Core Ready Flag
+ //
+ reg [ 3: 0] fsm_state = FSM_STATE_IDLE;
+ assign ready = fsm_state[3];
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @(posedge clock) begin
+ //
+ case (fsm_state)
+ //
+ // init
+ //
+ FSM_STATE_IDLE: begin
+ if (!init && update) fsm_state <= FSM_STATE_UPDATE_ADDER_TRIG;
+ if (!init && !update && final) fsm_state <= FSM_STATE_FINAL_N_LPS_TRIG;
+ end
+ //
+ // update -> gN(h,m)
+ //
+ FSM_STATE_UPDATE_ADDER_TRIG: fsm_state <= FSM_STATE_UPDATE_LPS_TRIG;
+ FSM_STATE_UPDATE_LPS_TRIG: fsm_state <= FSM_STATE_UPDATE_LPS_WAIT;
+ FSM_STATE_UPDATE_LPS_WAIT:
+ if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_UPDATE_LPS_TRIG : FSM_STATE_UPDATE_ADDER_WAIT;
+ FSM_STATE_UPDATE_ADDER_WAIT:
+ if (adder_rdy_both) fsm_state <= FSM_STATE_IDLE;
+ //
+ // final -> g0(h,N)
+ //
+ FSM_STATE_FINAL_N_LPS_TRIG: fsm_state <= FSM_STATE_FINAL_N_LPS_WAIT;
+ FSM_STATE_FINAL_N_LPS_WAIT:
+ if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_FINAL_N_LPS_TRIG : FSM_STATE_FINAL_SIGMA_LPS_TRIG;
+ //
+ // final -> g0(h,Sigma)
+ //
+ FSM_STATE_FINAL_SIGMA_LPS_TRIG: fsm_state <= FSM_STATE_FINAL_SIGMA_LPS_WAIT;
+ FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+ if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_FINAL_SIGMA_LPS_TRIG : FSM_STATE_IDLE;
+ //
+ // default
+ //
+ default: fsm_state <= FSM_STATE_IDLE;
+ //
+ endcase
+ //
+ end
+
+
+ /*
+ * Key calculation involves 12 round constants. These constants are stored in an array. The first key
+ * (calculated during the first round) does not require a constant. New constant is preloaded during the last
+ * cycle of LPS transformation. LPS cores have dedicated output flag indicating that operation is about to complete.
+ * This flag is used as Clock Enable. Constants are preloaded during rounds 1-12 and are used during rounds 2-13.
+ *
+ */
+
+ //
+ // Round Constants
+ //
+ wire [511:0] c_array_out;
+
+ wire c_array_ena_update = (fsm_state == FSM_STATE_UPDATE_LPS_WAIT) ? 1 : 0;
+ wire c_array_ena_final_n = (fsm_state == FSM_STATE_FINAL_N_LPS_WAIT) ? 1 : 0;
+ wire c_array_ena_final_sigma = (fsm_state == FSM_STATE_FINAL_SIGMA_LPS_WAIT) ? 1 : 0;
+
+ wire c_array_ena = lps_key_last && round_count_not_done && (c_array_ena_update || c_array_ena_final_n || c_array_ena_final_sigma);
+
+ //
+ (* ROM_STYLE="BLOCK" *)
+ //
+ streebog_rom_c_array c_array
+ (
+ .clk (clock),
+ .ena (c_array_ena),
+ .din (round_count),
+ .dout (c_array_out)
+ );
+
+ /*
+ * The following pieces of code take care of LPS and summation inputs and outputs, they also take care
+ * of output digest register and corresponding valid flag.
+ *
+ */
+
+
+ //
+ // Internal State Control Logic
+ //
+ always @(posedge clock)
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE: if (init) begin
+ h <= (short_mode == 1'b1) ? STREEBOG_IV_256 : STREEBOG_IV_512;
+ N <= {512{1'b0}};
+ Sigma <= {512{1'b0}};
+ end
+
+ FSM_STATE_UPDATE_ADDER_WAIT: if (adder_rdy_both) begin
+ N <= adder_n_sum;
+ Sigma <= adder_sigma_sum;
+ end
+
+ FSM_STATE_UPDATE_LPS_WAIT:
+ if (lps_key_rdy && !round_count_not_done)
+ h <= lps_key_out ^ lps_data_out ^ h ^ block;
+
+ FSM_STATE_FINAL_N_LPS_WAIT:
+ if (lps_key_rdy && !round_count_not_done)
+ h <= lps_key_out ^ lps_data_out ^ h ^ N;
+
+ endcase
+
+
+ //
+ // Output Register Control Logic
+ //
+ always @(posedge clock)
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE: if (init) begin
+ digest_reg <= {512{1'bX}};
+ digest_valid_reg <= 1'b0;
+ end
+
+ FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+ if (lps_key_rdy && !round_count_not_done) begin
+ digest_reg <= lps_key_out ^ lps_data_out ^ h ^ Sigma;
+ digest_valid_reg <= 1'b1;
+ end
+
+ endcase
+
+
+ //
+ // Round Count Logic
+ //
+ always @(posedge clock)
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE:
+ if (update || final) round_count <= 4'd0;
+
+ FSM_STATE_UPDATE_LPS_WAIT,
+ FSM_STATE_FINAL_N_LPS_WAIT,
+ FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+ if (lps_key_rdy) round_count <= round_count_not_done ? round_count + 1'b1 : 4'd0;
+
+ endcase
+
+
+ //
+ // Key and Data LPS Cores Logic
+ //
+ always @(posedge clock)
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE: if (!init) begin
+ if (update) lps_key_in <= h ^ N;
+ if (!update && final) lps_key_in <= h;
+ end
+
+ FSM_STATE_UPDATE_LPS_WAIT:
+ if (lps_key_rdy && round_count_not_done) begin
+ lps_key_in <= lps_key_out ^ c_array_out;
+ lps_data_in <= lps_key_out ^ (round_count_active ? lps_data_out : block);
+ end
+
+ FSM_STATE_FINAL_N_LPS_WAIT: if (lps_key_rdy) begin
+ lps_key_in <= lps_key_out ^ (round_count_not_done ? c_array_out : lps_data_out ^ h ^ N);
+ lps_data_in <= round_count_not_done ? lps_key_out ^ (round_count_active ? lps_data_out : N) : {512{1'bX}};
+ end
+
+ FSM_STATE_FINAL_SIGMA_LPS_WAIT:
+ if (lps_key_rdy && round_count_not_done) begin
+ lps_key_in <= lps_key_out ^ c_array_out;
+ lps_data_in <= round_count_active ? lps_key_out ^ lps_data_out : lps_key_out ^ Sigma;
+ end
+
+ endcase
+
+
+endmodule