`timescale 1ns / 1ps module streebog_hash_top ( clock, block, block_length, init, update, final, short_mode, digest, digest_valid, ready ); // // Parameters // parameter PS_PIPELINE_STAGES = 2; // 2, 4, 8 parameter L_PIPELINE_STAGES = 2; // 2, 4, 8, 16, 32, 64 // // Ports // input wire clock; // core clock input wire [511:0] block; // input message block input wire [ 9:0] block_length; // length of input block in bits (0..512) input wire init; // flag to start calculation of new message hash input wire update; // flag to compress next message block input wire final; // flag to run final transformation after last message block input wire short_mode; // 0 = produce 512-bit hash, 1 = produce 256-bit hash output wire [511:0] digest; // message digest output output wire digest_valid; // hash is ready (digest output value is valid) output wire ready; // core is ready (init/update/final can be asserted) // // Initialization Vectors and Round Count // localparam STREEBOG_IV_512 = {512{1'b0}}; localparam STREEBOG_IV_256 = {64{8'h01}}; localparam STREEBOG_NUM_ROUNDS = 4'd12; // // State Registers // reg [511:0] h; // | reg [511:0] Sigma; // | Internal State Registers reg [511:0] N; // | reg [511:0] digest_reg; reg digest_valid_reg = 1'b0; reg [ 3:0] round_count = 4'd0; assign digest = digest_reg; assign digest_valid = digest_valid_reg; // // Handy Internal Flags // wire round_count_active = (round_count > 4'd0) ? 1 : 0; // transformation has been started wire round_count_not_done = (round_count < STREEBOG_NUM_ROUNDS) ? 1 : 0; // transformation has not been finished /* * Compression procedure includes 13 rounds. To perform every round we need to know * round key. This implementation uses two parallel LPS cores. The first LPS core (key core) * is used to produce round keys, the second LPS core (data core) is used to encrypt message block. * * Data core is not activated during the first round, because round key is not yet known during * the first round. During the second round, key core computes next (second) round key, while data core encrypts * mesage block using first round key and so on. The last compression round doesn't include encryption step. * Instead of it simple XOR operation is used. * * Compression procedure requires 13 key calculations and 12 data encryptions. LPS cores operate according to * the following schedule: * * * +----------+----------+----------+- -+----------+ * Round Count | 0 | 1 | 2 | ... | 12 | * +----------+----------+----------+- -+----------+ * Key Core | KEY #0 | KEY #1 | KEY #2 | ... | KEY #12 | * +----------+----------+----------+- -+----------+ * Data Core | Idle | DATA #0 | DATA #1 | ... | DATA #11 | * +----------+----------+----------+- -+----------+ * */ // // LPS Core for Round Key Calculation // reg [511:0] lps_key_in; // wire [511:0] lps_key_out; // wire lps_key_ena; // wire lps_key_last; // wire lps_key_rdy; // wire lps_key_ena_update = (fsm_state == FSM_STATE_UPDATE_LPS_TRIG) ? 1 : 0; wire lps_key_ena_final_n = (fsm_state == FSM_STATE_FINAL_N_LPS_TRIG) ? 1 : 0; wire lps_key_ena_final_sigma = (fsm_state == FSM_STATE_FINAL_SIGMA_LPS_TRIG) ? 1 : 0; assign lps_key_ena = lps_key_ena_update || lps_key_ena_final_n || lps_key_ena_final_sigma; streebog_core_lps # ( .PS_PIPELINE_STAGES (PS_PIPELINE_STAGES), .L_PIPELINE_STAGES (L_PIPELINE_STAGES) ) lps_key ( .clk (clock), .ena (lps_key_ena), .rdy (lps_key_rdy), .last (lps_key_last), .din (lps_key_in), .dout (lps_key_out) ); // // LPS Core for Block Compression // reg [511:0] lps_data_in; wire [511:0] lps_data_out; wire lps_data_ena; wire lps_data_last; wire lps_data_rdy; assign lps_data_ena = lps_key_ena & round_count_active; streebog_core_lps # ( .PS_PIPELINE_STAGES (PS_PIPELINE_STAGES), .L_PIPELINE_STAGES (L_PIPELINE_STAGES) ) lps_data ( .clk (clock), .ena (lps_data_ena), .rdy (lps_data_rdy), .last (lps_data_last), .din (lps_data_in), .dout (lps_data_out) ); /* * According to specification, internal state must be updated after compression, this * involves addition of two pairs of 512-bit numbers. This operation is done in two * parallel summation cores. The first core updates N register, the second core updates * Sigma register. Summation is triggered before LPS cores are activated. Actual update * of N and Sigma occurs after completion of compression procedure. * */ // // Summation Trigger Flag // wire adder_trig = (fsm_state == FSM_STATE_UPDATE_ADDER_TRIG) ? 1 : 0; // // Block Length Adder (N = N + |M|) // wire [511:0] adder_n_sum; wire adder_n_rdy; streebog_core_adder_s6 adder_n ( .clk (clock), .ena (adder_trig), .rdy (adder_n_rdy), .x (N), .y ({{502{1'b0}}, block_length}), .sum (adder_n_sum) ); // // Message Adder (Sigma = Sigma + M) // wire [511:0] adder_sigma_sum; wire adder_sigma_rdy; streebog_core_adder_s6 adder_sigma ( .clk (clock), .ena (adder_trig), .rdy (adder_sigma_rdy), .x (Sigma), .y (block), .sum (adder_sigma_sum) ); // // Handy Flags // wire lps_last_both = lps_key_last & lps_data_last; wire lps_rdy_both = lps_key_rdy & lps_data_rdy; wire adder_rdy_both = adder_n_rdy & adder_sigma_rdy; /* * Operation of this core is controlled by FSM logic. Ready flag is embedded in state encoding. FSM goes out of * idle state when init/update/final flags become active. Init flag has priority over update and final flags. * Update flag has priority over final flag. * */ // // FSM States // localparam FSM_STATE_IDLE = 4'b1_00_0; // core is idle // localparam FSM_STATE_UPDATE_LPS_TRIG = 4'b0_00_0; // core is triggering gN(h,m) transformation localparam FSM_STATE_UPDATE_LPS_WAIT = 4'b0_00_1; // core is waiting for transformation to complete // localparam FSM_STATE_UPDATE_ADDER_TRIG = 4'b0_11_0; // core is triggering summation localparam FSM_STATE_UPDATE_ADDER_WAIT = 4'b0_11_1; // core is waiting for summation to complete // localparam FSM_STATE_FINAL_N_LPS_TRIG = 4'b0_01_0; // core is triggering g0(h,N) transformation localparam FSM_STATE_FINAL_N_LPS_WAIT = 4'b0_01_1; // core is waiting for transformation to complete // localparam FSM_STATE_FINAL_SIGMA_LPS_TRIG = 4'b0_10_0; // core is triggering g0(h,Sigma) transformation localparam FSM_STATE_FINAL_SIGMA_LPS_WAIT = 4'b0_10_1; // core is waiting for transformation of complete // // FSM State Register and Core Ready Flag // reg [ 3: 0] fsm_state = FSM_STATE_IDLE; assign ready = fsm_state[3]; // // FSM Transition Logic // always @(posedge clock) begin // case (fsm_state) // // init // FSM_STATE_IDLE: begin if (!init && update) fsm_state <= FSM_STATE_UPDATE_ADDER_TRIG; if (!init && !update && final) fsm_state <= FSM_STATE_FINAL_N_LPS_TRIG; end // // update -> gN(h,m) // FSM_STATE_UPDATE_ADDER_TRIG: fsm_state <= FSM_STATE_UPDATE_LPS_TRIG; FSM_STATE_UPDATE_LPS_TRIG: fsm_state <= FSM_STATE_UPDATE_LPS_WAIT; FSM_STATE_UPDATE_LPS_WAIT: if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_UPDATE_LPS_TRIG : FSM_STATE_UPDATE_ADDER_WAIT; FSM_STATE_UPDATE_ADDER_WAIT: if (adder_rdy_both) fsm_state <= FSM_STATE_IDLE; // // final -> g0(h,N) // FSM_STATE_FINAL_N_LPS_TRIG: fsm_state <= FSM_STATE_FINAL_N_LPS_WAIT; FSM_STATE_FINAL_N_LPS_WAIT: if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_FINAL_N_LPS_TRIG : FSM_STATE_FINAL_SIGMA_LPS_TRIG; // // final -> g0(h,Sigma) // FSM_STATE_FINAL_SIGMA_LPS_TRIG: fsm_state <= FSM_STATE_FINAL_SIGMA_LPS_WAIT; FSM_STATE_FINAL_SIGMA_LPS_WAIT: if (lps_rdy_both) fsm_state <= round_count_not_done ? FSM_STATE_FINAL_SIGMA_LPS_TRIG : FSM_STATE_IDLE; // // default // default: fsm_state <= FSM_STATE_IDLE; // endcase // end /* * Key calculation involves 12 round constants. These constants are stored in an array. The first key * (calculated during the first round) does not require a constant. New constant is preloaded during the last * cycle of LPS transformation. LPS cores have dedicated output flag indicating that operation is about to complete. * This flag is used as Clock Enable. Constants are preloaded during rounds 1-12 and are used during rounds 2-13. * */ // // Round Constants // wire [511:0] c_array_out; wire c_array_ena_update = (fsm_state == FSM_STATE_UPDATE_LPS_WAIT) ? 1 : 0; wire c_array_ena_final_n = (fsm_state == FSM_STATE_FINAL_N_LPS_WAIT) ? 1 : 0; wire c_array_ena_final_sigma = (fsm_state == FSM_STATE_FINAL_SIGMA_LPS_WAIT) ? 1 : 0; wire c_array_ena = lps_key_last && round_count_not_done && (c_array_ena_update || c_array_ena_final_n || c_array_ena_final_sigma); // (* ROM_STYLE="BLOCK" *) // streebog_rom_c_array c_array ( .clk (clock), .ena (c_array_ena), .din (round_count), .dout (c_array_out) ); /* * The following pieces of code take care of LPS and summation inputs and outputs, they also take care * of output digest register and corresponding valid flag. * */ // // Internal State Control Logic // always @(posedge clock) // case (fsm_state) FSM_STATE_IDLE: if (init) begin h <= (short_mode == 1'b1) ? STREEBOG_IV_256 : STREEBOG_IV_512; N <= {512{1'b0}}; Sigma <= {512{1'b0}}; end FSM_STATE_UPDATE_ADDER_WAIT: if (adder_rdy_both) begin N <= adder_n_sum; Sigma <= adder_sigma_sum; end FSM_STATE_UPDATE_LPS_WAIT: if (lps_key_rdy && !round_count_not_done) h <= lps_key_out ^ lps_data_out ^ h ^ block; FSM_STATE_FINAL_N_LPS_WAIT: if (lps_key_rdy && !round_count_not_done) h <= lps_key_out ^ lps_data_out ^ h ^ N; endcase // // Output Register Control Logic // always @(posedge clock) // case (fsm_state) FSM_STATE_IDLE: if (init) begin digest_reg <= {512{1'bX}}; digest_valid_reg <= 1'b0; end FSM_STATE_FINAL_SIGMA_LPS_WAIT: if (lps_key_rdy && !round_count_not_done) begin digest_reg <= lps_key_out ^ lps_data_out ^ h ^ Sigma; digest_valid_reg <= 1'b1; end endcase // // Round Count Logic // always @(posedge clock) // case (fsm_state) FSM_STATE_IDLE: if (update || final) round_count <= 4'd0; FSM_STATE_UPDATE_LPS_WAIT, FSM_STATE_FINAL_N_LPS_WAIT, FSM_STATE_FINAL_SIGMA_LPS_WAIT: if (lps_key_rdy) round_count <= round_count_not_done ? round_count + 1'b1 : 4'd0; endcase // // Key and Data LPS Cores Logic // always @(posedge clock) // case (fsm_state) FSM_STATE_IDLE: if (!init) begin if (update) lps_key_in <= h ^ N; if (!update && final) lps_key_in <= h; end FSM_STATE_UPDATE_LPS_WAIT: if (lps_key_rdy && round_count_not_done) begin lps_key_in <= lps_key_out ^ c_array_out; lps_data_in <= lps_key_out ^ (round_count_active ? lps_data_out : block); end FSM_STATE_FINAL_N_LPS_WAIT: if (lps_key_rdy) begin lps_key_in <= lps_key_out ^ (round_count_not_done ? c_array_out : lps_data_out ^ h ^ N); lps_data_in <= round_count_not_done ? lps_key_out ^ (round_count_active ? lps_data_out : N) : {512{1'bX}}; end FSM_STATE_FINAL_SIGMA_LPS_WAIT: if (lps_key_rdy && round_count_not_done) begin lps_key_in <= lps_key_out ^ c_array_out; lps_data_in <= round_count_active ? lps_key_out ^ lps_data_out : lps_key_out ^ Sigma; end endcase endmodule