summaryrefslogtreecommitdiff
path: root/rtl/modexpa7_top.v
diff options
context:
space:
mode:
Diffstat (limited to 'rtl/modexpa7_top.v')
-rw-r--r--rtl/modexpa7_top.v706
1 files changed, 706 insertions, 0 deletions
diff --git a/rtl/modexpa7_top.v b/rtl/modexpa7_top.v
new file mode 100644
index 0000000..6c5a922
--- /dev/null
+++ b/rtl/modexpa7_top.v
@@ -0,0 +1,706 @@
+//======================================================================
+//
+// Copyright (c) 2016, NORDUnet A/S All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// - Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//======================================================================
+
+`timescale 1ns / 1ps
+
+module modexpa7_top
+ #(parameter MAX_MODULUS_WIDTH = 1024)
+ (
+ input wire clk,
+
+ input wire init,
+ output wire ready,
+
+ input wire next,
+ output wire valid,
+
+ input wire [MODULUS_NUM_BITS-1:0] modulus_width,
+ input wire [MODULUS_NUM_BITS-1:0] exponent_width,
+
+ input wire fast_public_mode,
+
+ input wire bus_cs,
+ input wire bus_we,
+ input wire [ADDR_WIDTH_TOTAL-1:0] bus_addr,
+ input wire [31:0] bus_data_wr,
+ output wire [31:0] bus_data_rd
+ );
+
+
+ //
+ // modexpa7_clog2()
+ //
+ function integer modexpa7_clog2;
+ input integer value;
+ integer ret;
+ begin
+ value = value - 1;
+ for (ret = 0; value > 0; ret = ret + 1)
+ value = value >> 1;
+ modexpa7_clog2 = ret;
+ end
+ endfunction
+
+
+ //
+ // Locals
+ //
+ localparam OPERAND_ADDR_WIDTH = modexpa7_clog2(MAX_MODULUS_WIDTH / 32);
+ localparam MODULUS_NUM_BITS = modexpa7_clog2(MAX_MODULUS_WIDTH + 1);
+ localparam ADDR_WIDTH_TOTAL = OPERAND_ADDR_WIDTH + 2;
+
+ localparam [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}};
+ localparam [OPERAND_ADDR_WIDTH :0] bram_core_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}};
+
+ localparam [ MODULUS_NUM_BITS:0] round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}};
+
+
+ //
+ // User Memory
+ //
+ wire [OPERAND_ADDR_WIDTH-1:0] ro_modulus_bram_addr;
+ wire [ 31:0] ro_modulus_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH-1:0] ro_message_bram_addr = bram_user_addr_zero;
+ wire [ 31:0] ro_message_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH-1:0] ro_exponent_bram_addr = bram_user_addr_zero;
+ wire [ 31:0] ro_exponent_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH-1:0] rw_result_bram_addr = bram_user_addr_zero;
+ wire [ 31:0] rw_result_bram_out;
+ reg rw_result_bram_wr = 1'b0;
+ wire [ 31:0] rw_result_bram_in;
+
+ modexpa7_buffer_user #
+ (
+ .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH)
+ )
+ mem_user
+ (
+ .clk (clk),
+
+ .bus_cs (bus_cs),
+ .bus_we (bus_we),
+ .bus_addr (bus_addr),
+ .bus_data_wr (bus_data_wr),
+ .bus_data_rd (bus_data_rd),
+
+ .ro_modulus_bram_addr (ro_modulus_bram_addr),
+ .ro_modulus_bram_out (ro_modulus_bram_out),
+
+ .ro_message_bram_addr (ro_message_bram_addr),
+ .ro_message_bram_out (ro_message_bram_out),
+
+ .ro_exponent_bram_addr (ro_exponent_bram_addr),
+ .ro_exponent_bram_out (ro_exponent_bram_out),
+
+ .rw_result_bram_addr (rw_result_bram_addr),
+ .rw_result_bram_wr (rw_result_bram_wr),
+ .rw_result_bram_in (rw_result_bram_in)
+ );
+
+
+ //
+ // Core (Internal) Memory
+ //
+ wire [OPERAND_ADDR_WIDTH:0] rw_coeff_bram_addr;
+ wire rw_coeff_bram_wr;
+ wire [ 31:0] rw_coeff_bram_in;
+ wire [ 31:0] rw_coeff_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH:0] rw_mm_bram_addr = bram_core_addr_zero;
+ reg rw_mm_bram_wr = 1'b0;
+ reg [ 31:0] rw_mm_bram_in;
+ wire [ 31:0] rw_mm_bram_out;
+
+ wire [OPERAND_ADDR_WIDTH:0] rw_nn_bram_addr;
+ wire rw_nn_bram_wr;
+ wire [ 31:0] rw_nn_bram_in;
+
+ reg [OPERAND_ADDR_WIDTH:0] rw_y_bram_addr = bram_core_addr_zero;
+ reg rw_y_bram_wr = 1'b0;
+ reg [ 31:0] rw_y_bram_in;
+ wire [ 31:0] rw_y_bram_out;
+
+ wire [OPERAND_ADDR_WIDTH:0] rw_r_bram_addr;
+ wire rw_r_bram_wr;
+ wire [ 31:0] rw_r_bram_in;
+ wire [ 31:0] rw_r_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH:0] rw_t_bram_addr = bram_core_addr_zero;
+ reg rw_t_bram_wr = 1'b0;
+ reg [ 31:0] rw_t_bram_in;
+ wire [ 31:0] rw_t_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH:0] ro_coeff_bram_addr = bram_core_addr_zero;
+ wire [ 31:0] ro_coeff_bram_out;
+
+ wire [OPERAND_ADDR_WIDTH:0] ro_mm_bram_addr;
+ wire [ 31:0] ro_mm_bram_out;
+
+ wire [OPERAND_ADDR_WIDTH:0] ro_nn_bram_addr;
+ wire [ 31:0] ro_nn_bram_out;
+
+ reg [OPERAND_ADDR_WIDTH:0] ro_r_bram_addr = bram_core_addr_zero;
+ wire [ 31:0] ro_r_bram_out;
+
+ wire [OPERAND_ADDR_WIDTH:0] ro_t_bram_addr;
+ wire [ 31:0] ro_t_bram_out;
+
+ modexpa7_buffer_core #
+ (
+ .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH)
+ )
+ mem_core
+ (
+ .clk (clk),
+
+ .rw_coeff_bram_addr (rw_coeff_bram_addr),
+ .rw_coeff_bram_wr (rw_coeff_bram_wr),
+ .rw_coeff_bram_in (rw_coeff_bram_in),
+ .rw_coeff_bram_out (rw_coeff_bram_out),
+
+ .rw_mm_bram_addr (rw_mm_bram_addr),
+ .rw_mm_bram_wr (rw_mm_bram_wr),
+ .rw_mm_bram_in (rw_mm_bram_in),
+ .rw_mm_bram_out (rw_mm_bram_out),
+
+ .rw_nn_bram_addr (rw_nn_bram_addr),
+ .rw_nn_bram_wr (rw_nn_bram_wr),
+ .rw_nn_bram_in (rw_nn_bram_in),
+
+ .rw_y_bram_addr (rw_y_bram_addr),
+ .rw_y_bram_wr (rw_y_bram_wr),
+ .rw_y_bram_in (rw_y_bram_in),
+ .rw_y_bram_out (rw_y_bram_out),
+
+ .rw_r_bram_addr (rw_r_bram_addr),
+ .rw_r_bram_wr (rw_r_bram_wr),
+ .rw_r_bram_in (rw_r_bram_in),
+ .rw_r_bram_out (rw_r_bram_out),
+
+ .rw_t_bram_addr (rw_t_bram_addr),
+ .rw_t_bram_wr (rw_t_bram_wr),
+ .rw_t_bram_in (rw_t_bram_in),
+ .rw_t_bram_out (rw_t_bram_out),
+
+ .ro_coeff_bram_addr (ro_coeff_bram_addr),
+ .ro_coeff_bram_out (ro_coeff_bram_out),
+
+ .ro_mm_bram_addr (ro_mm_bram_addr),
+ .ro_mm_bram_out (ro_mm_bram_out),
+
+ .ro_nn_bram_addr (ro_nn_bram_addr),
+ .ro_nn_bram_out (ro_nn_bram_out),
+
+ .ro_r_bram_addr (ro_r_bram_addr),
+ .ro_r_bram_out (ro_r_bram_out),
+
+ .ro_t_bram_addr (ro_t_bram_addr),
+ .ro_t_bram_out (ro_t_bram_out)
+ );
+
+
+ //
+ // Small 32-bit ModInv Core
+ //
+ wire modinv_ena;
+ wire modinv_rdy;
+
+ wire [31: 0] modinv_n0;
+ wire [31: 0] modinv_n0_negative = ~modinv_n0 + 1'b1;
+ wire [31: 0] modinv_n0_modinv;
+
+ modexpa7_modinv32 core_modinv32
+ (
+ .clk (clk),
+
+ .ena (modinv_ena),
+ .rdy (modinv_rdy),
+
+ .n0 (modinv_n0_negative),
+ .n0_modinv (modinv_n0_modinv)
+ );
+
+
+ //
+ // Montgomery Coefficient Calculator
+ //
+ modexpa7_montgomery_coeff #
+ (
+ .MODULUS_NUM_BITS (MODULUS_NUM_BITS),
+ .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH)
+ )
+ core_montgomery_coeff
+ (
+ .clk (clk),
+
+ .ena (init),
+ .rdy (ready),
+
+ .modulus_width (modulus_width),
+
+ .coeff_bram_addr (rw_coeff_bram_addr),
+ .coeff_bram_wr (rw_coeff_bram_wr),
+ .coeff_bram_in (rw_coeff_bram_in),
+ .coeff_bram_out (rw_coeff_bram_out),
+
+ .nn_bram_addr (rw_nn_bram_addr),
+ .nn_bram_wr (rw_nn_bram_wr),
+ .nn_bram_in (rw_nn_bram_in),
+
+ .modulus_bram_addr (ro_modulus_bram_addr),
+ .modulus_bram_out (ro_modulus_bram_out),
+
+ .modinv_n0 (modinv_n0),
+ .modinv_ena (modinv_ena),
+ .modinv_rdy (modinv_rdy)
+ );
+
+
+ //
+ // Montgomery Multiplier
+ //
+ reg mul_ena = 1'b0;
+ wire mul_rdy;
+
+ modexpa7_montgomery_multiplier #
+ (
+ .OPERAND_NUM_BITS (MODULUS_NUM_BITS),
+ .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH)
+ )
+ core_montgomery_multiplier
+ (
+ .clk (clk),
+
+ .ena (mul_ena),
+ .rdy (mul_rdy),
+
+ .operand_width (modulus_width),
+
+ .x_bram_addr (ro_t_bram_addr),
+ .x_bram_out (ro_t_bram_out),
+
+ .y_bram_addr (ro_mm_bram_addr),
+ .y_bram_out (ro_mm_bram_out),
+
+ .n_bram_addr (ro_nn_bram_addr),
+ .n_bram_out (ro_nn_bram_out),
+
+ .z_bram_addr (rw_r_bram_addr),
+ .z_bram_wr (rw_r_bram_wr),
+ .z_bram_in (rw_r_bram_in),
+ .z_bram_out (rw_r_bram_out),
+
+ .n0_modinv (modinv_n0_modinv)
+ );
+
+
+ //
+ // FSM
+ //
+ localparam FSM_STATE_IDLE = 6'd0;
+
+ localparam FSM_STATE_INIT_LOAD = 6'd11;
+ localparam FSM_STATE_INIT_WAIT = 6'd12;
+ localparam FSM_STATE_INIT_UNLOAD = 6'd13;
+
+ localparam FSM_STATE_READ_EI = 6'd20;
+
+ localparam FSM_STATE_ROUND_BEGIN = 6'd25;
+
+ localparam FSM_STATE_MULTIPLY_LOAD = 6'd31;
+ localparam FSM_STATE_MULTIPLY_WAIT = 6'd32;
+ localparam FSM_STATE_MULTIPLY_UNLOAD = 6'd33;
+
+ localparam FSM_STATE_SQUARE_LOAD = 6'd41;
+ localparam FSM_STATE_SQUARE_WAIT = 6'd42;
+ localparam FSM_STATE_SQUARE_UNLOAD = 6'd43;
+
+ localparam FSM_STATE_ROUND_END = 6'd50;
+
+ localparam FSM_STATE_FINAL = 6'd60;
+
+ reg [5: 0] fsm_state = FSM_STATE_IDLE;
+
+
+ //
+ // Trigger
+ //
+ reg next_dly = 1'b0;
+ always @(posedge clk) next_dly <= next;
+ wire next_trig = (next == 1'b1) && (next_dly == 1'b0);
+
+
+ //
+ // Valid Register
+ //
+ reg valid_reg = 1'b0;
+ assign valid = valid_reg;
+
+
+ //
+ // Next/ Valid Logic
+ //
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_FINAL) begin
+ //
+ valid_reg <= 1'b1;
+ //
+ end else if (fsm_state == FSM_STATE_IDLE) begin
+ //
+ if (valid_reg && !next) valid_reg <= 1'b0;
+ //
+ end
+
+
+ //
+ // Exponent Bit Counter
+ //
+ reg [4: 0] ei_bit_count = 5'd0;
+ wire ei_bit = ro_exponent_bram_out[ei_bit_count];
+
+
+ //
+ // Round Counter
+ //
+ reg [MODULUS_NUM_BITS:0] round_count = round_count_zero;
+ wire [MODULUS_NUM_BITS:0] round_count_last = exponent_width - 1'b1;
+ wire [MODULUS_NUM_BITS:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero;
+
+
+ //
+ // Handy Wires
+ //
+ wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH];
+
+ wire [OPERAND_ADDR_WIDTH :0] bram_core_addr_last = {modulus_width_msb, 1'b0};
+
+ wire [OPERAND_ADDR_WIDTH :0] bram_user_addr_last_ext = bram_core_addr_last - 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_last = bram_user_addr_last_ext[OPERAND_ADDR_WIDTH-1:0];
+
+
+ //
+ // Handy Functions
+ //
+ function [OPERAND_ADDR_WIDTH:0] bram_core_addr_next_or_zero;
+ input [OPERAND_ADDR_WIDTH:0] bram_core_addr;
+ begin
+ bram_core_addr_next_or_zero = (bram_core_addr < bram_core_addr_last) ? bram_core_addr + 1'b1 : bram_core_addr_zero;
+ end
+ endfunction
+
+ function [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_next_or_zero;
+ input [OPERAND_ADDR_WIDTH-1:0] bram_user_addr;
+ begin
+ bram_user_addr_next_or_zero = (bram_user_addr < bram_user_addr_last) ? bram_user_addr + 1'b1 : bram_user_addr_zero;
+ end
+ endfunction
+
+
+ //
+ // Result BRAM Input
+ //
+ assign rw_result_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out;
+
+
+ //
+ // MM BRAM Input Selector
+ //
+ always @(*)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT_LOAD:
+ //
+ rw_mm_bram_in = (rw_mm_bram_addr < bram_core_addr_last) ? ro_message_bram_out : {32{1'b0}};
+
+ FSM_STATE_INIT_UNLOAD:
+ //
+ rw_mm_bram_in = ro_r_bram_out;
+
+ FSM_STATE_SQUARE_UNLOAD:
+ //
+ rw_mm_bram_in = ro_r_bram_out;
+
+ default:
+ //
+ rw_mm_bram_in = {32{1'bX}};
+
+ endcase
+
+
+ //
+ // Y BRAM Input Selector
+ //
+ always @(*)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT_LOAD:
+ //
+ rw_y_bram_in = (rw_mm_bram_addr == bram_core_addr_zero) ? 32'h00000001 : 32'h00000000;
+
+ FSM_STATE_MULTIPLY_UNLOAD:
+ //
+ rw_y_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out; // RW!
+
+ default:
+ //
+ rw_y_bram_in = {32{1'bX}};
+
+ endcase
+
+
+ //
+ // T BRAM Input Selector
+ //
+ always @(*)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT_LOAD:
+ //
+ rw_t_bram_in = ro_coeff_bram_out;
+
+ FSM_STATE_MULTIPLY_LOAD:
+ //
+ rw_t_bram_in = rw_y_bram_out;
+
+ FSM_STATE_SQUARE_LOAD:
+ //
+ rw_t_bram_in = rw_mm_bram_out;
+
+ default:
+ //
+ rw_t_bram_in = {32{1'bX}};
+
+ endcase
+
+
+ //
+ // Main Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state)
+
+ FSM_STATE_INIT_LOAD: begin
+ //
+ rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ rw_y_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ rw_t_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+ rw_y_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+ rw_t_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+ //
+ if (ro_coeff_bram_addr > bram_core_addr_zero) ro_coeff_bram_addr <= bram_core_addr_next_or_zero(ro_coeff_bram_addr);
+ else ro_coeff_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_coeff_bram_addr);
+ //
+ if (ro_message_bram_addr > bram_user_addr_zero) ro_message_bram_addr <= bram_user_addr_next_or_zero(ro_message_bram_addr);
+ else ro_message_bram_addr <= rw_mm_bram_wr ? bram_user_addr_zero : bram_user_addr_next_or_zero(ro_message_bram_addr);
+ //
+ end
+
+ FSM_STATE_INIT_WAIT: begin
+ //
+ if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+ else mul_ena <= 1'b1;
+ //
+ end
+
+ FSM_STATE_INIT_UNLOAD: begin
+ //
+ rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+ //
+ if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+ else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+ //
+ end
+
+ FSM_STATE_MULTIPLY_LOAD: begin
+ //
+ rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero;
+ //
+ if (rw_y_bram_addr > bram_core_addr_zero) rw_y_bram_addr <= bram_core_addr_next_or_zero(rw_y_bram_addr);
+ else rw_y_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_y_bram_addr);
+ //
+ end
+
+ FSM_STATE_MULTIPLY_WAIT: begin
+ //
+ if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+ else mul_ena <= 1'b1;
+ //
+ end
+
+ FSM_STATE_MULTIPLY_UNLOAD: begin
+ //
+ rw_y_bram_wr <= (rw_y_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_y_bram_addr <= rw_y_bram_wr ? bram_core_addr_next_or_zero(rw_y_bram_addr) : bram_core_addr_zero;
+ //
+ if (ei_bit) begin
+ //
+ if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+ else ro_r_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+ //
+ end else begin
+ //
+ if (rw_t_bram_addr > bram_core_addr_zero) rw_t_bram_addr <= bram_core_addr_next_or_zero(rw_t_bram_addr);
+ else rw_t_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_t_bram_addr);
+ //
+ end
+ //
+ if (round_count == round_count_last) begin
+ //
+ if (rw_result_bram_addr == bram_user_addr_zero) begin
+ //
+ if (rw_y_bram_wr) begin
+ //
+ rw_result_bram_wr <= (rw_y_bram_addr > bram_core_addr_zero) ? 1'b0 : 1'b1;
+ rw_result_bram_addr <= (rw_y_bram_addr > bram_core_addr_zero) ? bram_user_addr_zero : bram_user_addr_next_or_zero(rw_result_bram_addr);
+ //
+ end else begin
+ //
+ rw_result_bram_wr <= 1'b1;
+ rw_result_bram_addr <= bram_user_addr_zero;
+ //
+ end
+ //
+ end else begin
+ //
+ rw_result_bram_wr <= (rw_result_bram_addr < bram_user_addr_last) ? 1'b1 : 1'b0;
+ rw_result_bram_addr <= bram_user_addr_next_or_zero(rw_result_bram_addr);
+ //
+ end
+ //
+ end
+ //
+ end
+
+ FSM_STATE_SQUARE_LOAD: begin
+ //
+ rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero;
+ //
+ if (rw_mm_bram_addr > bram_core_addr_zero) rw_mm_bram_addr <= bram_core_addr_next_or_zero(rw_mm_bram_addr);
+ else rw_mm_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_mm_bram_addr);
+ //
+ end
+
+ FSM_STATE_SQUARE_WAIT: begin
+ //
+ if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1;
+ else mul_ena <= 1'b1;
+ //
+ end
+
+ FSM_STATE_SQUARE_UNLOAD: begin
+ //
+ rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0;
+ //
+ rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero;
+ //
+ if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr);
+ else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr);
+ //
+ end
+
+ FSM_STATE_ROUND_END: begin
+ //
+ round_count <= round_count_next;
+ //
+ if (round_count < round_count_last) begin
+ //
+ ei_bit_count <= ei_bit_count + 1'b1;
+ //
+ if (ei_bit_count == 5'd31)
+ //
+ ro_exponent_bram_addr <= bram_user_addr_next_or_zero(ro_exponent_bram_addr);
+ //
+ end else begin
+ //
+ ei_bit_count <= 5'd0;
+ //
+ ro_exponent_bram_addr <= bram_user_addr_zero;
+ //
+ end
+ //
+ end
+
+ endcase
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @(posedge clk)
+ //
+ case (fsm_state)
+
+ FSM_STATE_IDLE: fsm_state <= (!valid_reg && next_trig) ? FSM_STATE_INIT_LOAD : FSM_STATE_IDLE;
+
+ FSM_STATE_INIT_LOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_LOAD : FSM_STATE_INIT_WAIT;
+ FSM_STATE_INIT_WAIT: fsm_state <= mul_rdy ? FSM_STATE_INIT_UNLOAD : FSM_STATE_INIT_WAIT;
+ FSM_STATE_INIT_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_UNLOAD : FSM_STATE_READ_EI;
+
+ FSM_STATE_READ_EI: fsm_state <= FSM_STATE_ROUND_BEGIN;
+
+ FSM_STATE_ROUND_BEGIN: fsm_state <= (!ei_bit && fast_public_mode && (round_count < round_count_last)) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_MULTIPLY_LOAD;
+
+ FSM_STATE_MULTIPLY_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_LOAD : FSM_STATE_MULTIPLY_WAIT;
+ FSM_STATE_MULTIPLY_WAIT: fsm_state <= mul_rdy ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_MULTIPLY_WAIT;
+ FSM_STATE_MULTIPLY_UNLOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_SQUARE_LOAD;
+
+ FSM_STATE_SQUARE_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_SQUARE_WAIT;
+ FSM_STATE_SQUARE_WAIT: fsm_state <= mul_rdy ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_SQUARE_WAIT;
+ FSM_STATE_SQUARE_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_ROUND_END;
+
+ FSM_STATE_ROUND_END: fsm_state <= (round_count < round_count_last) ? FSM_STATE_READ_EI : FSM_STATE_FINAL;
+
+ FSM_STATE_FINAL: fsm_state <= FSM_STATE_IDLE;
+
+ default: fsm_state <= FSM_STATE_IDLE;
+
+ endcase
+
+
+endmodule