From c09de3ee3a303bfab596def8e0b5c8b845e5a97f Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Tue, 7 Mar 2017 19:55:09 -0500 Subject: Promote to a repository in the core tree. Change name of reset signal from rst_n to reset_n for consistancy with other Cryptech cores. Code common between this core and the ecdsa256 core split out into a separate library repository. Minor cleanup (Windows-isms, indentation). --- rtl/curve/curve_dbl_add_384.v | 1748 ++++++++++---------- rtl/curve/curve_mul_384.v | 1440 ++++++++-------- rtl/curve/rom/brom_p384_delta.v | 46 +- rtl/curve/rom/brom_p384_g_x.v | 48 +- rtl/curve/rom/brom_p384_g_y.v | 50 +- rtl/curve/rom/brom_p384_h_x.v | 46 +- rtl/curve/rom/brom_p384_h_y.v | 48 +- rtl/curve/rom/brom_p384_one.v | 46 +- rtl/curve/rom/brom_p384_q.v | 50 +- rtl/curve/rom/brom_p384_zero.v | 10 +- rtl/curve/uop/uop_add_rom.v | 66 - rtl/curve/uop/uop_conv_rom.v | 38 - rtl/curve/uop/uop_dbl_rom.v | 58 - rtl/curve/uop/uop_init_rom.v | 33 - rtl/curve/uop_ecdsa.v | 50 - rtl/ecdsa384.v | 180 +- rtl/ecdsa384_wrapper.v | 46 +- rtl/lowlevel/adder32_wrapper.v | 73 - rtl/lowlevel/adder47_wrapper.v | 69 - rtl/lowlevel/artix7/adder32_artix7.v | 96 -- rtl/lowlevel/artix7/adder47_artix7.v | 91 - rtl/lowlevel/artix7/dsp48e1_wrapper.v | 159 -- rtl/lowlevel/artix7/mac16_artix7.v | 90 - rtl/lowlevel/artix7/subtractor32_artix7.v | 94 -- rtl/lowlevel/ecdsa_lowlevel_settings.v | 17 - rtl/lowlevel/generic/adder32_generic.v | 67 - rtl/lowlevel/generic/adder47_generic.v | 64 - rtl/lowlevel/generic/mac16_generic.v | 74 - rtl/lowlevel/generic/subtractor32_generic.v | 67 - rtl/lowlevel/mac16_wrapper.v | 75 - rtl/lowlevel/subtractor32_wrapper.v | 72 - rtl/modular/modular_adder.v | 298 ---- .../modular_invertor/helper/modinv_helper_copy.v | 148 -- .../modular_invertor/helper/modinv_helper_init.v | 172 -- .../helper/modinv_helper_invert_compare.v | 286 ---- .../helper/modinv_helper_invert_precalc.v | 408 ----- .../helper/modinv_helper_invert_update.v | 257 --- .../helper/modinv_helper_reduce_precalc.v | 328 ---- .../helper/modinv_helper_reduce_update.v | 153 -- rtl/modular/modular_invertor/modinv_clog2.v | 10 - rtl/modular/modular_invertor/modular_invertor.v | 981 ----------- rtl/modular/modular_multiplier_384.v | 804 ++++----- rtl/modular/modular_reductor_384.v | 1478 ++++++++--------- rtl/modular/modular_subtractor.v | 292 ---- rtl/multiword/mw_comparator.v | 220 --- rtl/multiword/mw_mover.v | 175 -- rtl/util/bram_1rw_1ro_readfirst.v | 101 -- 47 files changed, 3020 insertions(+), 8202 deletions(-) delete mode 100644 rtl/curve/uop/uop_add_rom.v delete mode 100644 rtl/curve/uop/uop_conv_rom.v delete mode 100644 rtl/curve/uop/uop_dbl_rom.v delete mode 100644 rtl/curve/uop/uop_init_rom.v delete mode 100644 rtl/curve/uop_ecdsa.v delete mode 100644 rtl/lowlevel/adder32_wrapper.v delete mode 100644 rtl/lowlevel/adder47_wrapper.v delete mode 100644 rtl/lowlevel/artix7/adder32_artix7.v delete mode 100644 rtl/lowlevel/artix7/adder47_artix7.v delete mode 100644 rtl/lowlevel/artix7/dsp48e1_wrapper.v delete mode 100644 rtl/lowlevel/artix7/mac16_artix7.v delete mode 100644 rtl/lowlevel/artix7/subtractor32_artix7.v delete mode 100644 rtl/lowlevel/ecdsa_lowlevel_settings.v delete mode 100644 rtl/lowlevel/generic/adder32_generic.v delete mode 100644 rtl/lowlevel/generic/adder47_generic.v delete mode 100644 rtl/lowlevel/generic/mac16_generic.v delete mode 100644 rtl/lowlevel/generic/subtractor32_generic.v delete mode 100644 rtl/lowlevel/mac16_wrapper.v delete mode 100644 rtl/lowlevel/subtractor32_wrapper.v delete mode 100644 rtl/modular/modular_adder.v delete mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_copy.v delete mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_init.v delete mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v delete mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v delete mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v delete mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v delete mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v delete mode 100644 rtl/modular/modular_invertor/modinv_clog2.v delete mode 100644 rtl/modular/modular_invertor/modular_invertor.v delete mode 100644 rtl/modular/modular_subtractor.v delete mode 100644 rtl/multiword/mw_comparator.v delete mode 100644 rtl/multiword/mw_mover.v delete mode 100644 rtl/util/bram_1rw_1ro_readfirst.v (limited to 'rtl') diff --git a/rtl/curve/curve_dbl_add_384.v b/rtl/curve/curve_dbl_add_384.v index 70a23bf..d14bbc7 100644 --- a/rtl/curve/curve_dbl_add_384.v +++ b/rtl/curve/curve_dbl_add_384.v @@ -1,874 +1,874 @@ -//------------------------------------------------------------------------------ -// -// curve_dbl_add_384.v -// ----------------------------------------------------------------------------- -// Elliptic curve point adder and doubler. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module curve_dbl_add_384 - ( - clk, rst_n, - ena, rdy, - uop_addr, uop, - px_addr, py_addr, pz_addr, rx_addr, ry_addr, rz_addr, q_addr, v_addr, - rx_wren, ry_wren, rz_wren, - px_din, py_din, pz_din, - rx_din, ry_din, rz_din, - rx_dout, ry_dout, rz_dout, q_din, v_din - ); - - - // - // Microcode - // -`include "uop_ecdsa.v" - - - // - // Constants - // - localparam WORD_COUNTER_WIDTH = 4; // 0 .. 11 - localparam OPERAND_NUM_WORDS = 12; // 12 * 32 = 384 - - - // - // Ports - // - input wire clk; // system clock - input wire rst_n; // active-low async reset - - input wire ena; // enable input - output wire rdy; // ready output - - output reg [ 6-1: 0] uop_addr; - input wire [20-1: 0] uop; - - output reg [WORD_COUNTER_WIDTH-1:0] px_addr; - output reg [WORD_COUNTER_WIDTH-1:0] py_addr; - output reg [WORD_COUNTER_WIDTH-1:0] pz_addr; - output reg [WORD_COUNTER_WIDTH-1:0] rx_addr; - output reg [WORD_COUNTER_WIDTH-1:0] ry_addr; - output reg [WORD_COUNTER_WIDTH-1:0] rz_addr; - output reg [WORD_COUNTER_WIDTH-1:0] v_addr; - output wire [WORD_COUNTER_WIDTH-1:0] q_addr; - - output wire rx_wren; - output wire ry_wren; - output wire rz_wren; - - input wire [ 32-1:0] px_din; - input wire [ 32-1:0] py_din; - input wire [ 32-1:0] pz_din; - input wire [ 32-1:0] rx_din; - input wire [ 32-1:0] ry_din; - input wire [ 32-1:0] rz_din; - output wire [ 32-1:0] rx_dout; - output wire [ 32-1:0] ry_dout; - output wire [ 32-1:0] rz_dout; - input wire [ 32-1:0] q_din; - input wire [ 32-1:0] v_din; - - - // - // Microcode - // - wire [ 4: 0] uop_opcode = uop[19:15]; - wire [ 4: 0] uop_src_a = uop[14:10]; - wire [ 4: 0] uop_src_b = uop[ 9: 5]; - wire [ 2: 0] uop_dst = uop[ 4: 2]; - wire [ 1: 0] uop_exec = uop[ 1: 0]; - - - // - // Multi-Word Comparator - // - wire mw_cmp_ena; - wire mw_cmp_rdy; - - wire mw_cmp_out_l; - wire mw_cmp_out_e; - wire mw_cmp_out_g; - - wire [WORD_COUNTER_WIDTH-1:0] mw_cmp_addr_xy; - - wire [ 32-1:0] mw_cmp_din_x; - wire [ 32-1:0] mw_cmp_din_y; - - // flags - reg flag_pz_is_zero; - reg flag_t1_is_zero; - reg flag_t2_is_zero; - - mw_comparator # - ( - .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), - .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) - ) - mw_comparator_inst - ( - .clk (clk), - .rst_n (rst_n), - - .ena (mw_cmp_ena), - .rdy (mw_cmp_rdy), - - .xy_addr (mw_cmp_addr_xy), - .x_din (mw_cmp_din_x), - .y_din (mw_cmp_din_y), - - .cmp_l (mw_cmp_out_l), - .cmp_e (mw_cmp_out_e), - .cmp_g (mw_cmp_out_g) - ); - - - // - // Modular Adder - // - wire mod_add_ena; - wire mod_add_rdy; - - wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_ab; - wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_n; - wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_s; - wire mod_add_wren_s; - - wire [ 32-1:0] mod_add_din_a; - wire [ 32-1:0] mod_add_din_b; - wire [ 32-1:0] mod_add_din_n; - wire [ 32-1:0] mod_add_dout_s; - - assign mod_add_din_n = q_din; - - modular_adder # - ( - .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), - .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) - ) - modular_adder_inst - ( - .clk (clk), - .rst_n (rst_n), - - .ena (mod_add_ena), - .rdy (mod_add_rdy), - - .ab_addr (mod_add_addr_ab), - .n_addr (mod_add_addr_n), - .s_addr (mod_add_addr_s), - .s_wren (mod_add_wren_s), - - .a_din (mod_add_din_a), - .b_din (mod_add_din_b), - .n_din (mod_add_din_n), - .s_dout (mod_add_dout_s) - ); - - - // - // Modular Subtractor - // - wire mod_sub_ena; - wire mod_sub_rdy; - - wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_ab; - wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_n; - wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_d; - wire mod_sub_wren_d; - - wire [ 32-1:0] mod_sub_din_a; - wire [ 32-1:0] mod_sub_din_b; - wire [ 32-1:0] mod_sub_din_n; - wire [ 32-1:0] mod_sub_dout_d; - - assign mod_sub_din_n = q_din; - - modular_subtractor # - ( - .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), - .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) - ) - modular_subtractor_inst - ( - .clk (clk), - .rst_n (rst_n), - - .ena (mod_sub_ena), - .rdy (mod_sub_rdy), - - .ab_addr (mod_sub_addr_ab), - .n_addr (mod_sub_addr_n), - .d_addr (mod_sub_addr_d), - .d_wren (mod_sub_wren_d), - - .a_din (mod_sub_din_a), - .b_din (mod_sub_din_b), - .n_din (mod_sub_din_n), - .d_dout (mod_sub_dout_d) - ); - - - // - // Modular Multiplier - // - wire mod_mul_ena; - wire mod_mul_rdy; - - wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_a; - wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_b; - wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_n; - wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_p; - wire mod_mul_wren_p; - - wire [ 32-1:0] mod_mul_din_a; - wire [ 32-1:0] mod_mul_din_b; - wire [ 32-1:0] mod_mul_din_n; - wire [ 32-1:0] mod_mul_dout_p; - - assign mod_mul_din_n = q_din; - - modular_multiplier_384 modular_multiplier_inst - ( - .clk (clk), - .rst_n (rst_n), - - .ena (mod_mul_ena), - .rdy (mod_mul_rdy), - - .a_addr (mod_mul_addr_a), - .b_addr (mod_mul_addr_b), - .n_addr (mod_mul_addr_n), - .p_addr (mod_mul_addr_p), - .p_wren (mod_mul_wren_p), - - .a_din (mod_mul_din_a), - .b_din (mod_mul_din_b), - .n_din (mod_mul_din_n), - .p_dout (mod_mul_dout_p) - ); - - - // - // Multi-Word Data Mover - // - wire mw_mov_ena; - wire mw_mov_rdy; - - wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_x; - wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_y; - wire mw_mov_wren_y; - - wire [ 32-1:0] mw_mov_din_x; - wire [ 32-1:0] mw_mov_dout_y; - - mw_mover # - ( - .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), - .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) - - ) - mw_mover_inst - ( - .clk (clk), - .rst_n (rst_n), - - .ena (mw_mov_ena), - .rdy (mw_mov_rdy), - - .x_addr (mw_mov_addr_x), - .y_addr (mw_mov_addr_y), - .y_wren (mw_mov_wren_y), - - .x_din (mw_mov_din_x), - .y_dout (mw_mov_dout_y) - ); - - - // - // ROMs - // - reg [WORD_COUNTER_WIDTH-1:0] brom_one_addr; - //reg [WORD_COUNTER_WIDTH-1:0] brom_zero_addr; - reg [WORD_COUNTER_WIDTH-1:0] brom_delta_addr; - reg [WORD_COUNTER_WIDTH-1:0] brom_g_x_addr; - reg [WORD_COUNTER_WIDTH-1:0] brom_g_y_addr; - reg [WORD_COUNTER_WIDTH-1:0] brom_h_x_addr; - reg [WORD_COUNTER_WIDTH-1:0] brom_h_y_addr; - - wire [ 32-1:0] brom_one_dout; - wire [ 32-1:0] brom_zero_dout; - wire [ 32-1:0] brom_delta_dout; - wire [ 32-1:0] brom_g_x_dout; - wire [ 32-1:0] brom_g_y_dout; - wire [ 32-1:0] brom_h_x_dout; - wire [ 32-1:0] brom_h_y_dout; - - (* ROM_STYLE="BLOCK" *) brom_p384_one brom_one_inst - (.clk(clk), .b_addr(brom_one_addr), .b_out(brom_one_dout)); - - brom_p384_zero brom_zero_inst - (.b_out(brom_zero_dout)); - - (* ROM_STYLE="BLOCK" *) brom_p384_delta brom_delta_inst - (.clk(clk), .b_addr(brom_delta_addr), .b_out(brom_delta_dout)); - - (* ROM_STYLE="BLOCK" *) brom_p384_g_x brom_g_x_inst - (.clk(clk), .b_addr(brom_g_x_addr), .b_out(brom_g_x_dout)); - - (* ROM_STYLE="BLOCK" *) brom_p384_g_y brom_g_y_inst - (.clk(clk), .b_addr(brom_g_y_addr), .b_out(brom_g_y_dout)); - - (* ROM_STYLE="BLOCK" *) brom_p384_h_x brom_h_x_inst - (.clk(clk), .b_addr(brom_h_x_addr), .b_out(brom_h_x_dout)); - - (* ROM_STYLE="BLOCK" *) brom_p384_h_y brom_h_y_inst - (.clk(clk), .b_addr(brom_h_y_addr), .b_out(brom_h_y_dout)); - - - // - // Temporary Variables - // - reg [WORD_COUNTER_WIDTH-1:0] bram_t1_wr_addr; - reg [WORD_COUNTER_WIDTH-1:0] bram_t2_wr_addr; - reg [WORD_COUNTER_WIDTH-1:0] bram_t3_wr_addr; - reg [WORD_COUNTER_WIDTH-1:0] bram_t4_wr_addr; - - reg [WORD_COUNTER_WIDTH-1:0] bram_t1_rd_addr; - reg [WORD_COUNTER_WIDTH-1:0] bram_t2_rd_addr; - reg [WORD_COUNTER_WIDTH-1:0] bram_t3_rd_addr; - reg [WORD_COUNTER_WIDTH-1:0] bram_t4_rd_addr; - - wire bram_t1_wr_en; - wire bram_t2_wr_en; - wire bram_t3_wr_en; - wire bram_t4_wr_en; - - wire [ 32-1:0] bram_t1_wr_data; - wire [ 32-1:0] bram_t2_wr_data; - wire [ 32-1:0] bram_t3_wr_data; - wire [ 32-1:0] bram_t4_wr_data; - - wire [ 32-1:0] bram_t1_rd_data; - wire [ 32-1:0] bram_t2_rd_data; - wire [ 32-1:0] bram_t3_rd_data; - wire [ 32-1:0] bram_t4_rd_data; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) - ) - bram_t1 - ( .clk (clk), - .a_addr(bram_t1_wr_addr), .a_wr(bram_t1_wr_en), .a_in(bram_t1_wr_data), .a_out(), - .b_addr(bram_t1_rd_addr), .b_out(bram_t1_rd_data) - ); - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) - ) - bram_t2 - ( .clk (clk), - .a_addr(bram_t2_wr_addr), .a_wr(bram_t2_wr_en), .a_in(bram_t2_wr_data), .a_out(), - .b_addr(bram_t2_rd_addr), .b_out(bram_t2_rd_data) - ); - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) - ) - bram_t3 - ( .clk (clk), - .a_addr(bram_t3_wr_addr), .a_wr(bram_t3_wr_en), .a_in(bram_t3_wr_data), .a_out(), - .b_addr(bram_t3_rd_addr), .b_out(bram_t3_rd_data) - ); - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) - ) - bram_t4 - ( .clk (clk), - .a_addr(bram_t4_wr_addr), .a_wr(bram_t4_wr_en), .a_in(bram_t4_wr_data), .a_out(), - .b_addr(bram_t4_rd_addr), .b_out(bram_t4_rd_data) - ); - - - // - // uOP Trigger Logic - // - reg uop_trig; - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) uop_trig <= 1'b0; - else uop_trig <= (fsm_state == FSM_STATE_FETCH) ? 1'b1 : 1'b0; - - - // - // FSM - // - localparam [ 1: 0] FSM_STATE_STALL = 2'b00; - localparam [ 1: 0] FSM_STATE_FETCH = 2'b01; - localparam [ 1: 0] FSM_STATE_EXECUTE = 2'b10; - - reg [ 1: 0] fsm_state = FSM_STATE_STALL; - wire [ 1: 0] fsm_state_next = (uop_opcode == OPCODE_RDY) ? FSM_STATE_STALL : FSM_STATE_FETCH; - - - // - // FSM Transition Logic - // - reg uop_done; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) fsm_state <= FSM_STATE_STALL; - else case (fsm_state) - FSM_STATE_STALL: fsm_state <= ena ? FSM_STATE_FETCH : FSM_STATE_STALL; - FSM_STATE_FETCH: fsm_state <= FSM_STATE_EXECUTE; - FSM_STATE_EXECUTE: fsm_state <= (!uop_trig && uop_done) ? fsm_state_next : FSM_STATE_EXECUTE; - default: fsm_state <= FSM_STATE_STALL; - endcase - - - // - // uOP Address Increment Logic - // - always @(posedge clk) - // - if (fsm_state == FSM_STATE_STALL) - uop_addr <= 5'd0; - else if (fsm_state == FSM_STATE_EXECUTE) - if (!uop_trig && uop_done) - uop_addr <= (uop_opcode == OPCODE_RDY) ? 5'd0 : uop_addr + 1'b1; - - - // - // uOP Completion Logic - // - always @(*) - // - case (uop_opcode) - OPCODE_CMP: uop_done = mw_cmp_rdy; - OPCODE_MOV: uop_done = mw_mov_rdy; - OPCODE_ADD: uop_done = mod_add_rdy; - OPCODE_SUB: uop_done = mod_sub_rdy; - OPCODE_MUL: uop_done = mod_mul_rdy; - OPCODE_RDY: uop_done = 1'b1; - default: uop_done = 1'b0; - endcase - - - // - // Helper Modules Enable Logic - // - assign mw_cmp_ena = uop_opcode[0] & uop_trig; - assign mw_mov_ena = uop_opcode[1] & uop_trig; - assign mod_add_ena = uop_opcode[2] & uop_trig; - assign mod_sub_ena = uop_opcode[3] & uop_trig; - assign mod_mul_ena = uop_opcode[4] & uop_trig; - - - // - // uOP Source Value Decoding Logic - // - reg [31: 0] uop_src_a_value; - - always @(*) - // - case (uop_src_a) - UOP_SRC_PX: uop_src_a_value = px_din; - UOP_SRC_PY: uop_src_a_value = py_din; - UOP_SRC_PZ: uop_src_a_value = pz_din; - - UOP_SRC_RX: uop_src_a_value = rx_din; - UOP_SRC_RY: uop_src_a_value = ry_din; - UOP_SRC_RZ: uop_src_a_value = rz_din; - - UOP_SRC_T1: uop_src_a_value = bram_t1_rd_data; - UOP_SRC_T2: uop_src_a_value = bram_t2_rd_data; - UOP_SRC_T3: uop_src_a_value = bram_t3_rd_data; - UOP_SRC_T4: uop_src_a_value = bram_t4_rd_data; - - UOP_SRC_ONE: uop_src_a_value = brom_one_dout; - UOP_SRC_ZERO: uop_src_a_value = brom_zero_dout; - UOP_SRC_DELTA: uop_src_a_value = brom_delta_dout; - - UOP_SRC_G_X: uop_src_a_value = brom_g_x_dout; - UOP_SRC_G_Y: uop_src_a_value = brom_g_y_dout; - - UOP_SRC_H_X: uop_src_a_value = brom_h_x_dout; - UOP_SRC_H_Y: uop_src_a_value = brom_h_y_dout; - - UOP_SRC_V: uop_src_a_value = v_din; - - default: uop_src_a_value = {32{1'bX}}; - endcase - - - assign mw_cmp_din_x = uop_src_a_value; - assign mw_mov_din_x = uop_src_a_value; - assign mod_add_din_a = uop_src_a_value; - assign mod_sub_din_a = uop_src_a_value; - assign mod_mul_din_a = uop_src_a_value; - - reg [31: 0] uop_src_b_value; - - always @(*) - // - case (uop_src_b) - UOP_SRC_PX: uop_src_b_value = px_din; - UOP_SRC_PY: uop_src_b_value = py_din; - UOP_SRC_PZ: uop_src_b_value = pz_din; - - UOP_SRC_RX: uop_src_b_value = rx_din; - UOP_SRC_RY: uop_src_b_value = ry_din; - UOP_SRC_RZ: uop_src_b_value = rz_din; - - UOP_SRC_T1: uop_src_b_value = bram_t1_rd_data; - UOP_SRC_T2: uop_src_b_value = bram_t2_rd_data; - UOP_SRC_T3: uop_src_b_value = bram_t3_rd_data; - UOP_SRC_T4: uop_src_b_value = bram_t4_rd_data; - - UOP_SRC_ONE: uop_src_b_value = brom_one_dout; - UOP_SRC_ZERO: uop_src_b_value = brom_zero_dout; - UOP_SRC_DELTA: uop_src_b_value = brom_delta_dout; - - UOP_SRC_G_X: uop_src_b_value = brom_g_x_dout; - UOP_SRC_G_Y: uop_src_b_value = brom_g_y_dout; - - UOP_SRC_H_X: uop_src_b_value = brom_h_x_dout; - UOP_SRC_H_Y: uop_src_b_value = brom_h_y_dout; - - UOP_SRC_V: uop_src_b_value = v_din; - - default: uop_src_b_value = {32{1'bX}}; - endcase - - assign mw_cmp_din_y = uop_src_b_value; - assign mod_add_din_b = uop_src_b_value; - assign mod_sub_din_b = uop_src_b_value; - assign mod_mul_din_b = uop_src_b_value; - - - // - // uOP Source & Destination Address Decoding Logic - // - reg [WORD_COUNTER_WIDTH-1:0] uop_src_a_addr; - reg [WORD_COUNTER_WIDTH-1:0] uop_src_b_addr; - reg [WORD_COUNTER_WIDTH-1:0] uop_dst_addr; - reg [WORD_COUNTER_WIDTH-1:0] uop_q_addr; - - assign q_addr = uop_q_addr; - - always @(*) - // - case (uop_opcode) - // - OPCODE_CMP: begin - uop_src_a_addr = mw_cmp_addr_xy; - uop_src_b_addr = mw_cmp_addr_xy; - uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}}; - uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}}; - end - // - OPCODE_MOV: begin - uop_src_a_addr = mw_mov_addr_x; - uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}}; - uop_dst_addr = mw_mov_addr_y; - uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}}; - end - // - OPCODE_ADD: begin - uop_src_a_addr = mod_add_addr_ab; - uop_src_b_addr = mod_add_addr_ab; - uop_dst_addr = mod_add_addr_s; - uop_q_addr = mod_add_addr_n; - end - // - OPCODE_SUB: begin - uop_src_a_addr = mod_sub_addr_ab; - uop_src_b_addr = mod_sub_addr_ab; - uop_dst_addr = mod_sub_addr_d; - uop_q_addr = mod_sub_addr_n; - end - // - OPCODE_MUL: begin - uop_src_a_addr = mod_mul_addr_a; - uop_src_b_addr = mod_mul_addr_b; - uop_dst_addr = mod_mul_addr_p; - uop_q_addr = mod_mul_addr_n; - end - // - default: begin - uop_src_a_addr = {WORD_COUNTER_WIDTH{1'bX}}; - uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}}; - uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}}; - uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}}; - end - // - endcase - - - // - // uOP Conditional Execution Logic - // - reg uop_exec_effective; - - always @(*) - // - case (uop_exec) - UOP_EXEC_ALWAYS: uop_exec_effective = 1'b1; - UOP_EXEC_PZT1T2_0XX: uop_exec_effective = flag_pz_is_zero; - UOP_EXEC_PZT1T2_100: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && flag_t2_is_zero; - UOP_EXEC_PZT1T2_101: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && !flag_t2_is_zero; - endcase - - - // - // uOP Destination Store Logic - // - reg uop_dst_wren; - - always @(*) - // - case (uop_opcode) - // - OPCODE_MOV: uop_dst_wren = mw_mov_wren_y & uop_exec_effective; - OPCODE_ADD: uop_dst_wren = mod_add_wren_s; - OPCODE_SUB: uop_dst_wren = mod_sub_wren_d; - OPCODE_MUL: uop_dst_wren = mod_mul_wren_p; - default: uop_dst_wren = 1'b0; - // - endcase - - - always @(*) begin - // - // - // - if (uop_src_a == UOP_SRC_PX) px_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_PX) px_addr = uop_src_b_addr; - else px_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - if (uop_src_a == UOP_SRC_PY) py_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_PY) py_addr = uop_src_b_addr; - else py_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - if (uop_src_a == UOP_SRC_PZ) pz_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_PZ) pz_addr = uop_src_b_addr; - else pz_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - // - // - if (uop_src_a == UOP_SRC_ONE) brom_one_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_ONE) brom_one_addr = uop_src_b_addr; - else brom_one_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - //if (uop_src_a == UOP_SRC_ZERO) brom_zero_addr = uop_src_a_addr; - //else if (uop_src_b == UOP_SRC_ZERO) brom_zero_addr = uop_src_b_addr; - //else brom_zero_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - if (uop_src_a == UOP_SRC_DELTA) brom_delta_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_DELTA) brom_delta_addr = uop_src_b_addr; - else brom_delta_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - // - // - if (uop_src_a == UOP_SRC_G_X) brom_g_x_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_G_X) brom_g_x_addr = uop_src_b_addr; - else brom_g_x_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - if (uop_src_a == UOP_SRC_G_Y) brom_g_y_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_G_Y) brom_g_y_addr = uop_src_b_addr; - else brom_g_y_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - // - // - if (uop_src_a == UOP_SRC_H_X) brom_h_x_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_H_X) brom_h_x_addr = uop_src_b_addr; - else brom_h_x_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - if (uop_src_a == UOP_SRC_H_Y) brom_h_y_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_H_Y) brom_h_y_addr = uop_src_b_addr; - else brom_h_y_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - // - // - if (uop_src_a == UOP_SRC_V) v_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_V) v_addr = uop_src_b_addr; - else v_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - // - // - if (uop_src_a == UOP_SRC_T1) bram_t1_rd_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_T1) bram_t1_rd_addr = uop_src_b_addr; - else bram_t1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - if (uop_src_a == UOP_SRC_T2) bram_t2_rd_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_T2) bram_t2_rd_addr = uop_src_b_addr; - else bram_t2_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - if (uop_src_a == UOP_SRC_T3) bram_t3_rd_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_T3) bram_t3_rd_addr = uop_src_b_addr; - else bram_t3_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - if (uop_src_a == UOP_SRC_T4) bram_t4_rd_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_T4) bram_t4_rd_addr = uop_src_b_addr; - else bram_t4_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - // - // - if (uop_dst == UOP_DST_T1) bram_t1_wr_addr = uop_dst_addr; - else bram_t1_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - if (uop_dst == UOP_DST_T2) bram_t2_wr_addr = uop_dst_addr; - else bram_t2_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - if (uop_dst == UOP_DST_T3) bram_t3_wr_addr = uop_dst_addr; - else bram_t3_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - if (uop_dst == UOP_DST_T4) bram_t4_wr_addr = uop_dst_addr; - else bram_t4_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; - // - // - // - if ((uop_dst == UOP_DST_RX) && (uop_dst_wren)) rx_addr = uop_dst_addr; - else begin - if (uop_src_a == UOP_SRC_RX) rx_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_RX) rx_addr = uop_src_b_addr; - else rx_addr = {WORD_COUNTER_WIDTH{1'bX}}; - end - // - if ((uop_dst == UOP_DST_RY) && (uop_dst_wren)) ry_addr = uop_dst_addr; - else begin - if (uop_src_a == UOP_SRC_RY) ry_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_RY) ry_addr = uop_src_b_addr; - else ry_addr = {WORD_COUNTER_WIDTH{1'bX}}; - end - // - if ((uop_dst == UOP_DST_RZ) && (uop_dst_wren)) rz_addr = uop_dst_addr; - else begin - if (uop_src_a == UOP_SRC_RZ) rz_addr = uop_src_a_addr; - else if (uop_src_b == UOP_SRC_RZ) rz_addr = uop_src_b_addr; - else rz_addr = {WORD_COUNTER_WIDTH{1'bX}}; - end - // - end - - - assign rx_wren = uop_dst_wren && (uop_dst == UOP_DST_RX); - assign ry_wren = uop_dst_wren && (uop_dst == UOP_DST_RY); - assign rz_wren = uop_dst_wren && (uop_dst == UOP_DST_RZ); - - assign bram_t1_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T1); - assign bram_t2_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T2); - assign bram_t3_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T3); - assign bram_t4_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T4); - - - - // - // Destination Value Selector - // - reg [31: 0] uop_dst_value; - - always @(*) - // - case (uop_opcode) - - OPCODE_MOV: uop_dst_value = mw_mov_dout_y; - OPCODE_ADD: uop_dst_value = mod_add_dout_s; - OPCODE_SUB: uop_dst_value = mod_sub_dout_d; - OPCODE_MUL: uop_dst_value = mod_mul_dout_p; - - default: uop_dst_value = {32{1'bX}}; - - endcase - - assign rx_dout = uop_dst_value; - assign ry_dout = uop_dst_value; - assign rz_dout = uop_dst_value; - - assign bram_t1_wr_data = uop_dst_value; - assign bram_t2_wr_data = uop_dst_value; - assign bram_t3_wr_data = uop_dst_value; - assign bram_t4_wr_data = uop_dst_value; - - - // - // Latch Comparison Flags - // - always @(posedge clk) - // - if ( (fsm_state == FSM_STATE_EXECUTE) && - (uop_opcode == OPCODE_CMP) && - (uop_done && !uop_trig) ) begin - - if ( (uop_src_a == UOP_SRC_PZ) && (uop_src_b == UOP_SRC_ZERO) ) - flag_pz_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g; - - if ( (uop_src_a == UOP_SRC_T1) && (uop_src_b == UOP_SRC_ZERO) ) - flag_t1_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g; - - if ( (uop_src_a == UOP_SRC_T2) && (uop_src_b == UOP_SRC_ZERO) ) - flag_t2_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g; - - end - - - // - // Ready Flag Logic - // - reg rdy_reg = 1'b1; - assign rdy = rdy_reg; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) rdy_reg <= 1'b1; - else begin - - /* clear flag */ - if (fsm_state == FSM_STATE_STALL) - if (ena) rdy_reg <= 1'b0; - - /* set flag */ - if ((fsm_state == FSM_STATE_EXECUTE) && !uop_trig && uop_done) - if (uop_opcode == OPCODE_RDY) rdy_reg <= 1'b1; - - end - - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ +// +// curve_dbl_add_384.v +// ----------------------------------------------------------------------------- +// Elliptic curve point adder and doubler. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module curve_dbl_add_384 + ( + clk, rst_n, + ena, rdy, + uop_addr, uop, + px_addr, py_addr, pz_addr, rx_addr, ry_addr, rz_addr, q_addr, v_addr, + rx_wren, ry_wren, rz_wren, + px_din, py_din, pz_din, + rx_din, ry_din, rz_din, + rx_dout, ry_dout, rz_dout, q_din, v_din + ); + + + // + // Microcode + // +`include "../../../../math/ecdsalib/rtl/curve/uop_ecdsa.v" + + + // + // Constants + // + localparam WORD_COUNTER_WIDTH = 4; // 0 .. 11 + localparam OPERAND_NUM_WORDS = 12; // 12 * 32 = 384 + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output reg [ 6-1: 0] uop_addr; + input wire [20-1: 0] uop; + + output reg [WORD_COUNTER_WIDTH-1:0] px_addr; + output reg [WORD_COUNTER_WIDTH-1:0] py_addr; + output reg [WORD_COUNTER_WIDTH-1:0] pz_addr; + output reg [WORD_COUNTER_WIDTH-1:0] rx_addr; + output reg [WORD_COUNTER_WIDTH-1:0] ry_addr; + output reg [WORD_COUNTER_WIDTH-1:0] rz_addr; + output reg [WORD_COUNTER_WIDTH-1:0] v_addr; + output wire [WORD_COUNTER_WIDTH-1:0] q_addr; + + output wire rx_wren; + output wire ry_wren; + output wire rz_wren; + + input wire [ 32-1:0] px_din; + input wire [ 32-1:0] py_din; + input wire [ 32-1:0] pz_din; + input wire [ 32-1:0] rx_din; + input wire [ 32-1:0] ry_din; + input wire [ 32-1:0] rz_din; + output wire [ 32-1:0] rx_dout; + output wire [ 32-1:0] ry_dout; + output wire [ 32-1:0] rz_dout; + input wire [ 32-1:0] q_din; + input wire [ 32-1:0] v_din; + + + // + // Microcode + // + wire [ 4: 0] uop_opcode = uop[19:15]; + wire [ 4: 0] uop_src_a = uop[14:10]; + wire [ 4: 0] uop_src_b = uop[ 9: 5]; + wire [ 2: 0] uop_dst = uop[ 4: 2]; + wire [ 1: 0] uop_exec = uop[ 1: 0]; + + + // + // Multi-Word Comparator + // + wire mw_cmp_ena; + wire mw_cmp_rdy; + + wire mw_cmp_out_l; + wire mw_cmp_out_e; + wire mw_cmp_out_g; + + wire [WORD_COUNTER_WIDTH-1:0] mw_cmp_addr_xy; + + wire [ 32-1:0] mw_cmp_din_x; + wire [ 32-1:0] mw_cmp_din_y; + + // flags + reg flag_pz_is_zero; + reg flag_t1_is_zero; + reg flag_t2_is_zero; + + mw_comparator # + ( + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) + ) + mw_comparator_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mw_cmp_ena), + .rdy (mw_cmp_rdy), + + .xy_addr (mw_cmp_addr_xy), + .x_din (mw_cmp_din_x), + .y_din (mw_cmp_din_y), + + .cmp_l (mw_cmp_out_l), + .cmp_e (mw_cmp_out_e), + .cmp_g (mw_cmp_out_g) + ); + + + // + // Modular Adder + // + wire mod_add_ena; + wire mod_add_rdy; + + wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_ab; + wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_n; + wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_s; + wire mod_add_wren_s; + + wire [ 32-1:0] mod_add_din_a; + wire [ 32-1:0] mod_add_din_b; + wire [ 32-1:0] mod_add_din_n; + wire [ 32-1:0] mod_add_dout_s; + + assign mod_add_din_n = q_din; + + modular_adder # + ( + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) + ) + modular_adder_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mod_add_ena), + .rdy (mod_add_rdy), + + .ab_addr (mod_add_addr_ab), + .n_addr (mod_add_addr_n), + .s_addr (mod_add_addr_s), + .s_wren (mod_add_wren_s), + + .a_din (mod_add_din_a), + .b_din (mod_add_din_b), + .n_din (mod_add_din_n), + .s_dout (mod_add_dout_s) + ); + + + // + // Modular Subtractor + // + wire mod_sub_ena; + wire mod_sub_rdy; + + wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_ab; + wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_n; + wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_d; + wire mod_sub_wren_d; + + wire [ 32-1:0] mod_sub_din_a; + wire [ 32-1:0] mod_sub_din_b; + wire [ 32-1:0] mod_sub_din_n; + wire [ 32-1:0] mod_sub_dout_d; + + assign mod_sub_din_n = q_din; + + modular_subtractor # + ( + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) + ) + modular_subtractor_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mod_sub_ena), + .rdy (mod_sub_rdy), + + .ab_addr (mod_sub_addr_ab), + .n_addr (mod_sub_addr_n), + .d_addr (mod_sub_addr_d), + .d_wren (mod_sub_wren_d), + + .a_din (mod_sub_din_a), + .b_din (mod_sub_din_b), + .n_din (mod_sub_din_n), + .d_dout (mod_sub_dout_d) + ); + + + // + // Modular Multiplier + // + wire mod_mul_ena; + wire mod_mul_rdy; + + wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_a; + wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_b; + wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_n; + wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_p; + wire mod_mul_wren_p; + + wire [ 32-1:0] mod_mul_din_a; + wire [ 32-1:0] mod_mul_din_b; + wire [ 32-1:0] mod_mul_din_n; + wire [ 32-1:0] mod_mul_dout_p; + + assign mod_mul_din_n = q_din; + + modular_multiplier_384 modular_multiplier_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mod_mul_ena), + .rdy (mod_mul_rdy), + + .a_addr (mod_mul_addr_a), + .b_addr (mod_mul_addr_b), + .n_addr (mod_mul_addr_n), + .p_addr (mod_mul_addr_p), + .p_wren (mod_mul_wren_p), + + .a_din (mod_mul_din_a), + .b_din (mod_mul_din_b), + .n_din (mod_mul_din_n), + .p_dout (mod_mul_dout_p) + ); + + + // + // Multi-Word Data Mover + // + wire mw_mov_ena; + wire mw_mov_rdy; + + wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_x; + wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_y; + wire mw_mov_wren_y; + + wire [ 32-1:0] mw_mov_din_x; + wire [ 32-1:0] mw_mov_dout_y; + + mw_mover # + ( + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) + + ) + mw_mover_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mw_mov_ena), + .rdy (mw_mov_rdy), + + .x_addr (mw_mov_addr_x), + .y_addr (mw_mov_addr_y), + .y_wren (mw_mov_wren_y), + + .x_din (mw_mov_din_x), + .y_dout (mw_mov_dout_y) + ); + + + // + // ROMs + // + reg [WORD_COUNTER_WIDTH-1:0] brom_one_addr; + //reg [WORD_COUNTER_WIDTH-1:0] brom_zero_addr; + reg [WORD_COUNTER_WIDTH-1:0] brom_delta_addr; + reg [WORD_COUNTER_WIDTH-1:0] brom_g_x_addr; + reg [WORD_COUNTER_WIDTH-1:0] brom_g_y_addr; + reg [WORD_COUNTER_WIDTH-1:0] brom_h_x_addr; + reg [WORD_COUNTER_WIDTH-1:0] brom_h_y_addr; + + wire [ 32-1:0] brom_one_dout; + wire [ 32-1:0] brom_zero_dout; + wire [ 32-1:0] brom_delta_dout; + wire [ 32-1:0] brom_g_x_dout; + wire [ 32-1:0] brom_g_y_dout; + wire [ 32-1:0] brom_h_x_dout; + wire [ 32-1:0] brom_h_y_dout; + + (* ROM_STYLE="BLOCK" *) brom_p384_one brom_one_inst + (.clk(clk), .b_addr(brom_one_addr), .b_out(brom_one_dout)); + + brom_p384_zero brom_zero_inst + (.b_out(brom_zero_dout)); + + (* ROM_STYLE="BLOCK" *) brom_p384_delta brom_delta_inst + (.clk(clk), .b_addr(brom_delta_addr), .b_out(brom_delta_dout)); + + (* ROM_STYLE="BLOCK" *) brom_p384_g_x brom_g_x_inst + (.clk(clk), .b_addr(brom_g_x_addr), .b_out(brom_g_x_dout)); + + (* ROM_STYLE="BLOCK" *) brom_p384_g_y brom_g_y_inst + (.clk(clk), .b_addr(brom_g_y_addr), .b_out(brom_g_y_dout)); + + (* ROM_STYLE="BLOCK" *) brom_p384_h_x brom_h_x_inst + (.clk(clk), .b_addr(brom_h_x_addr), .b_out(brom_h_x_dout)); + + (* ROM_STYLE="BLOCK" *) brom_p384_h_y brom_h_y_inst + (.clk(clk), .b_addr(brom_h_y_addr), .b_out(brom_h_y_dout)); + + + // + // Temporary Variables + // + reg [WORD_COUNTER_WIDTH-1:0] bram_t1_wr_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t2_wr_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t3_wr_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t4_wr_addr; + + reg [WORD_COUNTER_WIDTH-1:0] bram_t1_rd_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t2_rd_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t3_rd_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t4_rd_addr; + + wire bram_t1_wr_en; + wire bram_t2_wr_en; + wire bram_t3_wr_en; + wire bram_t4_wr_en; + + wire [ 32-1:0] bram_t1_wr_data; + wire [ 32-1:0] bram_t2_wr_data; + wire [ 32-1:0] bram_t3_wr_data; + wire [ 32-1:0] bram_t4_wr_data; + + wire [ 32-1:0] bram_t1_rd_data; + wire [ 32-1:0] bram_t2_rd_data; + wire [ 32-1:0] bram_t3_rd_data; + wire [ 32-1:0] bram_t4_rd_data; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) + ) + bram_t1 + ( .clk (clk), + .a_addr(bram_t1_wr_addr), .a_wr(bram_t1_wr_en), .a_in(bram_t1_wr_data), .a_out(), + .b_addr(bram_t1_rd_addr), .b_out(bram_t1_rd_data) + ); + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) + ) + bram_t2 + ( .clk (clk), + .a_addr(bram_t2_wr_addr), .a_wr(bram_t2_wr_en), .a_in(bram_t2_wr_data), .a_out(), + .b_addr(bram_t2_rd_addr), .b_out(bram_t2_rd_data) + ); + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) + ) + bram_t3 + ( .clk (clk), + .a_addr(bram_t3_wr_addr), .a_wr(bram_t3_wr_en), .a_in(bram_t3_wr_data), .a_out(), + .b_addr(bram_t3_rd_addr), .b_out(bram_t3_rd_data) + ); + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) + ) + bram_t4 + ( .clk (clk), + .a_addr(bram_t4_wr_addr), .a_wr(bram_t4_wr_en), .a_in(bram_t4_wr_data), .a_out(), + .b_addr(bram_t4_rd_addr), .b_out(bram_t4_rd_data) + ); + + + // + // uOP Trigger Logic + // + reg uop_trig; + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) uop_trig <= 1'b0; + else uop_trig <= (fsm_state == FSM_STATE_FETCH) ? 1'b1 : 1'b0; + + + // + // FSM + // + localparam [ 1: 0] FSM_STATE_STALL = 2'b00; + localparam [ 1: 0] FSM_STATE_FETCH = 2'b01; + localparam [ 1: 0] FSM_STATE_EXECUTE = 2'b10; + + reg [ 1: 0] fsm_state = FSM_STATE_STALL; + wire [ 1: 0] fsm_state_next = (uop_opcode == OPCODE_RDY) ? FSM_STATE_STALL : FSM_STATE_FETCH; + + + // + // FSM Transition Logic + // + reg uop_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state <= FSM_STATE_STALL; + else case (fsm_state) + FSM_STATE_STALL: fsm_state <= ena ? FSM_STATE_FETCH : FSM_STATE_STALL; + FSM_STATE_FETCH: fsm_state <= FSM_STATE_EXECUTE; + FSM_STATE_EXECUTE: fsm_state <= (!uop_trig && uop_done) ? fsm_state_next : FSM_STATE_EXECUTE; + default: fsm_state <= FSM_STATE_STALL; + endcase + + + // + // uOP Address Increment Logic + // + always @(posedge clk) + // + if (fsm_state == FSM_STATE_STALL) + uop_addr <= 5'd0; + else if (fsm_state == FSM_STATE_EXECUTE) + if (!uop_trig && uop_done) + uop_addr <= (uop_opcode == OPCODE_RDY) ? 5'd0 : uop_addr + 1'b1; + + + // + // uOP Completion Logic + // + always @(*) + // + case (uop_opcode) + OPCODE_CMP: uop_done = mw_cmp_rdy; + OPCODE_MOV: uop_done = mw_mov_rdy; + OPCODE_ADD: uop_done = mod_add_rdy; + OPCODE_SUB: uop_done = mod_sub_rdy; + OPCODE_MUL: uop_done = mod_mul_rdy; + OPCODE_RDY: uop_done = 1'b1; + default: uop_done = 1'b0; + endcase + + + // + // Helper Modules Enable Logic + // + assign mw_cmp_ena = uop_opcode[0] & uop_trig; + assign mw_mov_ena = uop_opcode[1] & uop_trig; + assign mod_add_ena = uop_opcode[2] & uop_trig; + assign mod_sub_ena = uop_opcode[3] & uop_trig; + assign mod_mul_ena = uop_opcode[4] & uop_trig; + + + // + // uOP Source Value Decoding Logic + // + reg [31: 0] uop_src_a_value; + + always @(*) + // + case (uop_src_a) + UOP_SRC_PX: uop_src_a_value = px_din; + UOP_SRC_PY: uop_src_a_value = py_din; + UOP_SRC_PZ: uop_src_a_value = pz_din; + + UOP_SRC_RX: uop_src_a_value = rx_din; + UOP_SRC_RY: uop_src_a_value = ry_din; + UOP_SRC_RZ: uop_src_a_value = rz_din; + + UOP_SRC_T1: uop_src_a_value = bram_t1_rd_data; + UOP_SRC_T2: uop_src_a_value = bram_t2_rd_data; + UOP_SRC_T3: uop_src_a_value = bram_t3_rd_data; + UOP_SRC_T4: uop_src_a_value = bram_t4_rd_data; + + UOP_SRC_ONE: uop_src_a_value = brom_one_dout; + UOP_SRC_ZERO: uop_src_a_value = brom_zero_dout; + UOP_SRC_DELTA: uop_src_a_value = brom_delta_dout; + + UOP_SRC_G_X: uop_src_a_value = brom_g_x_dout; + UOP_SRC_G_Y: uop_src_a_value = brom_g_y_dout; + + UOP_SRC_H_X: uop_src_a_value = brom_h_x_dout; + UOP_SRC_H_Y: uop_src_a_value = brom_h_y_dout; + + UOP_SRC_V: uop_src_a_value = v_din; + + default: uop_src_a_value = {32{1'bX}}; + endcase + + + assign mw_cmp_din_x = uop_src_a_value; + assign mw_mov_din_x = uop_src_a_value; + assign mod_add_din_a = uop_src_a_value; + assign mod_sub_din_a = uop_src_a_value; + assign mod_mul_din_a = uop_src_a_value; + + reg [31: 0] uop_src_b_value; + + always @(*) + // + case (uop_src_b) + UOP_SRC_PX: uop_src_b_value = px_din; + UOP_SRC_PY: uop_src_b_value = py_din; + UOP_SRC_PZ: uop_src_b_value = pz_din; + + UOP_SRC_RX: uop_src_b_value = rx_din; + UOP_SRC_RY: uop_src_b_value = ry_din; + UOP_SRC_RZ: uop_src_b_value = rz_din; + + UOP_SRC_T1: uop_src_b_value = bram_t1_rd_data; + UOP_SRC_T2: uop_src_b_value = bram_t2_rd_data; + UOP_SRC_T3: uop_src_b_value = bram_t3_rd_data; + UOP_SRC_T4: uop_src_b_value = bram_t4_rd_data; + + UOP_SRC_ONE: uop_src_b_value = brom_one_dout; + UOP_SRC_ZERO: uop_src_b_value = brom_zero_dout; + UOP_SRC_DELTA: uop_src_b_value = brom_delta_dout; + + UOP_SRC_G_X: uop_src_b_value = brom_g_x_dout; + UOP_SRC_G_Y: uop_src_b_value = brom_g_y_dout; + + UOP_SRC_H_X: uop_src_b_value = brom_h_x_dout; + UOP_SRC_H_Y: uop_src_b_value = brom_h_y_dout; + + UOP_SRC_V: uop_src_b_value = v_din; + + default: uop_src_b_value = {32{1'bX}}; + endcase + + assign mw_cmp_din_y = uop_src_b_value; + assign mod_add_din_b = uop_src_b_value; + assign mod_sub_din_b = uop_src_b_value; + assign mod_mul_din_b = uop_src_b_value; + + + // + // uOP Source & Destination Address Decoding Logic + // + reg [WORD_COUNTER_WIDTH-1:0] uop_src_a_addr; + reg [WORD_COUNTER_WIDTH-1:0] uop_src_b_addr; + reg [WORD_COUNTER_WIDTH-1:0] uop_dst_addr; + reg [WORD_COUNTER_WIDTH-1:0] uop_q_addr; + + assign q_addr = uop_q_addr; + + always @(*) + // + case (uop_opcode) + // + OPCODE_CMP: begin + uop_src_a_addr = mw_cmp_addr_xy; + uop_src_b_addr = mw_cmp_addr_xy; + uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}}; + uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + OPCODE_MOV: begin + uop_src_a_addr = mw_mov_addr_x; + uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}}; + uop_dst_addr = mw_mov_addr_y; + uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + OPCODE_ADD: begin + uop_src_a_addr = mod_add_addr_ab; + uop_src_b_addr = mod_add_addr_ab; + uop_dst_addr = mod_add_addr_s; + uop_q_addr = mod_add_addr_n; + end + // + OPCODE_SUB: begin + uop_src_a_addr = mod_sub_addr_ab; + uop_src_b_addr = mod_sub_addr_ab; + uop_dst_addr = mod_sub_addr_d; + uop_q_addr = mod_sub_addr_n; + end + // + OPCODE_MUL: begin + uop_src_a_addr = mod_mul_addr_a; + uop_src_b_addr = mod_mul_addr_b; + uop_dst_addr = mod_mul_addr_p; + uop_q_addr = mod_mul_addr_n; + end + // + default: begin + uop_src_a_addr = {WORD_COUNTER_WIDTH{1'bX}}; + uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}}; + uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}}; + uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + endcase + + + // + // uOP Conditional Execution Logic + // + reg uop_exec_effective; + + always @(*) + // + case (uop_exec) + UOP_EXEC_ALWAYS: uop_exec_effective = 1'b1; + UOP_EXEC_PZT1T2_0XX: uop_exec_effective = flag_pz_is_zero; + UOP_EXEC_PZT1T2_100: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && flag_t2_is_zero; + UOP_EXEC_PZT1T2_101: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && !flag_t2_is_zero; + endcase + + + // + // uOP Destination Store Logic + // + reg uop_dst_wren; + + always @(*) + // + case (uop_opcode) + // + OPCODE_MOV: uop_dst_wren = mw_mov_wren_y & uop_exec_effective; + OPCODE_ADD: uop_dst_wren = mod_add_wren_s; + OPCODE_SUB: uop_dst_wren = mod_sub_wren_d; + OPCODE_MUL: uop_dst_wren = mod_mul_wren_p; + default: uop_dst_wren = 1'b0; + // + endcase + + + always @(*) begin + // + // + // + if (uop_src_a == UOP_SRC_PX) px_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_PX) px_addr = uop_src_b_addr; + else px_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_PY) py_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_PY) py_addr = uop_src_b_addr; + else py_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_PZ) pz_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_PZ) pz_addr = uop_src_b_addr; + else pz_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_src_a == UOP_SRC_ONE) brom_one_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_ONE) brom_one_addr = uop_src_b_addr; + else brom_one_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + //if (uop_src_a == UOP_SRC_ZERO) brom_zero_addr = uop_src_a_addr; + //else if (uop_src_b == UOP_SRC_ZERO) brom_zero_addr = uop_src_b_addr; + //else brom_zero_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_DELTA) brom_delta_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_DELTA) brom_delta_addr = uop_src_b_addr; + else brom_delta_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_src_a == UOP_SRC_G_X) brom_g_x_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_G_X) brom_g_x_addr = uop_src_b_addr; + else brom_g_x_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_G_Y) brom_g_y_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_G_Y) brom_g_y_addr = uop_src_b_addr; + else brom_g_y_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_src_a == UOP_SRC_H_X) brom_h_x_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_H_X) brom_h_x_addr = uop_src_b_addr; + else brom_h_x_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_H_Y) brom_h_y_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_H_Y) brom_h_y_addr = uop_src_b_addr; + else brom_h_y_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_src_a == UOP_SRC_V) v_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_V) v_addr = uop_src_b_addr; + else v_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_src_a == UOP_SRC_T1) bram_t1_rd_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_T1) bram_t1_rd_addr = uop_src_b_addr; + else bram_t1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_T2) bram_t2_rd_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_T2) bram_t2_rd_addr = uop_src_b_addr; + else bram_t2_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_T3) bram_t3_rd_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_T3) bram_t3_rd_addr = uop_src_b_addr; + else bram_t3_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_T4) bram_t4_rd_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_T4) bram_t4_rd_addr = uop_src_b_addr; + else bram_t4_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_dst == UOP_DST_T1) bram_t1_wr_addr = uop_dst_addr; + else bram_t1_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_dst == UOP_DST_T2) bram_t2_wr_addr = uop_dst_addr; + else bram_t2_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_dst == UOP_DST_T3) bram_t3_wr_addr = uop_dst_addr; + else bram_t3_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_dst == UOP_DST_T4) bram_t4_wr_addr = uop_dst_addr; + else bram_t4_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if ((uop_dst == UOP_DST_RX) && (uop_dst_wren)) rx_addr = uop_dst_addr; + else begin + if (uop_src_a == UOP_SRC_RX) rx_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_RX) rx_addr = uop_src_b_addr; + else rx_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + if ((uop_dst == UOP_DST_RY) && (uop_dst_wren)) ry_addr = uop_dst_addr; + else begin + if (uop_src_a == UOP_SRC_RY) ry_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_RY) ry_addr = uop_src_b_addr; + else ry_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + if ((uop_dst == UOP_DST_RZ) && (uop_dst_wren)) rz_addr = uop_dst_addr; + else begin + if (uop_src_a == UOP_SRC_RZ) rz_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_RZ) rz_addr = uop_src_b_addr; + else rz_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + end + + + assign rx_wren = uop_dst_wren && (uop_dst == UOP_DST_RX); + assign ry_wren = uop_dst_wren && (uop_dst == UOP_DST_RY); + assign rz_wren = uop_dst_wren && (uop_dst == UOP_DST_RZ); + + assign bram_t1_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T1); + assign bram_t2_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T2); + assign bram_t3_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T3); + assign bram_t4_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T4); + + + + // + // Destination Value Selector + // + reg [31: 0] uop_dst_value; + + always @(*) + // + case (uop_opcode) + + OPCODE_MOV: uop_dst_value = mw_mov_dout_y; + OPCODE_ADD: uop_dst_value = mod_add_dout_s; + OPCODE_SUB: uop_dst_value = mod_sub_dout_d; + OPCODE_MUL: uop_dst_value = mod_mul_dout_p; + + default: uop_dst_value = {32{1'bX}}; + + endcase + + assign rx_dout = uop_dst_value; + assign ry_dout = uop_dst_value; + assign rz_dout = uop_dst_value; + + assign bram_t1_wr_data = uop_dst_value; + assign bram_t2_wr_data = uop_dst_value; + assign bram_t3_wr_data = uop_dst_value; + assign bram_t4_wr_data = uop_dst_value; + + + // + // Latch Comparison Flags + // + always @(posedge clk) + // + if ( (fsm_state == FSM_STATE_EXECUTE) && + (uop_opcode == OPCODE_CMP) && + (uop_done && !uop_trig) ) begin + + if ( (uop_src_a == UOP_SRC_PZ) && (uop_src_b == UOP_SRC_ZERO) ) + flag_pz_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g; + + if ( (uop_src_a == UOP_SRC_T1) && (uop_src_b == UOP_SRC_ZERO) ) + flag_t1_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g; + + if ( (uop_src_a == UOP_SRC_T2) && (uop_src_b == UOP_SRC_ZERO) ) + flag_t2_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g; + + end + + + // + // Ready Flag Logic + // + reg rdy_reg = 1'b1; + assign rdy = rdy_reg; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) rdy_reg <= 1'b1; + else begin + + /* clear flag */ + if (fsm_state == FSM_STATE_STALL) + if (ena) rdy_reg <= 1'b0; + + /* set flag */ + if ((fsm_state == FSM_STATE_EXECUTE) && !uop_trig && uop_done) + if (uop_opcode == OPCODE_RDY) rdy_reg <= 1'b1; + + end + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/curve/curve_mul_384.v b/rtl/curve/curve_mul_384.v index 9fcb884..5b8faf1 100644 --- a/rtl/curve/curve_mul_384.v +++ b/rtl/curve/curve_mul_384.v @@ -1,720 +1,720 @@ -//------------------------------------------------------------------------------ -// -// curve_mul_384.v -// ----------------------------------------------------------------------------- -// Elliptic curve point scalar multiplier. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module curve_mul_384 - ( - clk, rst_n, - ena, rdy, - k_addr, rx_addr, ry_addr, - rx_wren, ry_wren, - k_din, - rx_dout, ry_dout - ); - - - // - // Constants - // - localparam WORD_COUNTER_WIDTH = 4; // 0 .. 11 - localparam OPERAND_NUM_WORDS = 12; // 12 * 32 = 384 - - - // - // Ports - // - input wire clk; // system clock - input wire rst_n; // active-low async reset - - input wire ena; // enable input - output wire rdy; // ready output - - output wire [ 3: 0] k_addr; - output wire [ 3: 0] rx_addr; - output wire [ 3: 0] ry_addr; - - output wire rx_wren; - output wire ry_wren; - - input wire [31: 0] k_din; - - output wire [31: 0] rx_dout; - output wire [31: 0] ry_dout; - - - // - // Temporary Variables - // - reg [ 3: 0] bram_tx_wr_addr; - reg [ 3: 0] bram_ty_wr_addr; - reg [ 3: 0] bram_tz_wr_addr; - - reg [ 3: 0] bram_rx_wr_addr; - reg [ 3: 0] bram_ry_wr_addr; - reg [ 3: 0] bram_rz_wr_addr; - wire [ 3: 0] bram_rz1_wr_addr; - - reg [ 3: 0] bram_tx_rd_addr; - reg [ 3: 0] bram_ty_rd_addr; - reg [ 3: 0] bram_tz_rd_addr; - - reg [ 3: 0] bram_rx_rd_addr; - reg [ 3: 0] bram_ry_rd_addr; - reg [ 3: 0] bram_rz_rd_addr; - wire [ 3: 0] bram_rz1_rd_addr; - - reg bram_tx_wr_en; - reg bram_ty_wr_en; - reg bram_tz_wr_en; - - reg bram_rx_wr_en; - reg bram_ry_wr_en; - reg bram_rz_wr_en; - wire bram_rz1_wr_en; - - wire [31: 0] bram_tx_rd_data; - wire [31: 0] bram_ty_rd_data; - wire [31: 0] bram_tz_rd_data; - - wire [31: 0] bram_rx_rd_data; - wire [31: 0] bram_ry_rd_data; - wire [31: 0] bram_rz_rd_data; - wire [31: 0] bram_rz1_rd_data; - - reg [31: 0] bram_tx_wr_data_in; - reg [31: 0] bram_ty_wr_data_in; - reg [31: 0] bram_tz_wr_data_in; - - reg [31: 0] bram_rx_wr_data_in; - reg [31: 0] bram_ry_wr_data_in; - reg [31: 0] bram_rz_wr_data_in; - wire [31: 0] bram_rz1_wr_data_in; - - wire [31: 0] bram_tx_wr_data_out; - wire [31: 0] bram_ty_wr_data_out; - wire [31: 0] bram_tz_wr_data_out; - - wire [31: 0] bram_rx_wr_data_out; - wire [31: 0] bram_ry_wr_data_out; - wire [31: 0] bram_rz_wr_data_out; - - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) - bram_tx (.clk(clk), - .a_addr(bram_tx_wr_addr), .a_wr(bram_tx_wr_en), .a_in(bram_tx_wr_data_in), .a_out(bram_tx_wr_data_out), - .b_addr(bram_tx_rd_addr), .b_out(bram_tx_rd_data)); - - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) - bram_ty (.clk(clk), - .a_addr(bram_ty_wr_addr), .a_wr(bram_ty_wr_en), .a_in(bram_ty_wr_data_in), .a_out(bram_ty_wr_data_out), - .b_addr(bram_ty_rd_addr), .b_out(bram_ty_rd_data)); - - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) - bram_tz (.clk(clk), - .a_addr(bram_tz_wr_addr), .a_wr(bram_tz_wr_en), .a_in(bram_tz_wr_data_in), .a_out(bram_tz_wr_data_out), - .b_addr(bram_tz_rd_addr), .b_out(bram_tz_rd_data)); - - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) - bram_rx (.clk(clk), - .a_addr(bram_rx_wr_addr), .a_wr(bram_rx_wr_en), .a_in(bram_rx_wr_data_in), .a_out(bram_rx_wr_data_out), - .b_addr(bram_rx_rd_addr), .b_out(bram_rx_rd_data)); - - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) - bram_ry (.clk(clk), - .a_addr(bram_ry_wr_addr), .a_wr(bram_ry_wr_en), .a_in(bram_ry_wr_data_in), .a_out(bram_ry_wr_data_out), - .b_addr(bram_ry_rd_addr), .b_out(bram_ry_rd_data)); - - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) - bram_rz (.clk(clk), - .a_addr(bram_rz_wr_addr), .a_wr(bram_rz_wr_en), .a_in(bram_rz_wr_data_in), .a_out(bram_rz_wr_data_out), - .b_addr(bram_rz_rd_addr), .b_out(bram_rz_rd_data)); - - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) - bram_rz1 (.clk(clk), - .a_addr(bram_rz1_wr_addr), .a_wr(bram_rz1_wr_en), .a_in(bram_rz1_wr_data_in), .a_out(), - .b_addr(bram_rz1_rd_addr), .b_out(bram_rz1_rd_data)); - - - // - // FSM - // - localparam [ 3: 0] FSM_STATE_IDLE = 4'd00; - localparam [ 3: 0] FSM_STATE_PREPARE_TRIG = 4'd01; - localparam [ 3: 0] FSM_STATE_PREPARE_WAIT = 4'd02; - localparam [ 3: 0] FSM_STATE_DOUBLE_TRIG = 4'd03; - localparam [ 3: 0] FSM_STATE_DOUBLE_WAIT = 4'd04; - localparam [ 3: 0] FSM_STATE_ADD_TRIG = 4'd05; - localparam [ 3: 0] FSM_STATE_ADD_WAIT = 4'd06; - localparam [ 3: 0] FSM_STATE_COPY_TRIG = 4'd07; - localparam [ 3: 0] FSM_STATE_COPY_WAIT = 4'd08; - localparam [ 3: 0] FSM_STATE_INVERT_TRIG = 4'd09; - localparam [ 3: 0] FSM_STATE_INVERT_WAIT = 4'd10; - localparam [ 3: 0] FSM_STATE_CONVERT_TRIG = 4'd11; - localparam [ 3: 0] FSM_STATE_CONVERT_WAIT = 4'd12; - localparam [ 3: 0] FSM_STATE_DONE = 4'd13; - - reg [3:0] fsm_state = FSM_STATE_IDLE; - - - // - // Round Counter - // - reg [ 8: 0] bit_counter; - wire [ 8: 0] bit_counter_max = 9'd383; - wire [ 8: 0] bit_counter_zero = 9'd0; - wire [ 8: 0] bit_counter_next = - (bit_counter < bit_counter_max) ? bit_counter + 1'b1 : bit_counter_zero; - - - // - // Round Completion - // - wire [ 3: 0] fsm_state_round_next = (bit_counter < bit_counter_max) ? - FSM_STATE_DOUBLE_TRIG : FSM_STATE_INVERT_TRIG; - - - // - // OP Trigger Logic - // - reg op_trig; - wire op_done; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) op_trig <= 1'b0; - else op_trig <= (fsm_state == FSM_STATE_PREPARE_TRIG) || - (fsm_state == FSM_STATE_DOUBLE_TRIG) || - (fsm_state == FSM_STATE_ADD_TRIG) || - (fsm_state == FSM_STATE_CONVERT_TRIG); - - // - // Microprograms - // - wire [ 5: 0] op_rom_addr; - wire [19: 0] op_rom_init_data; - wire [19: 0] op_rom_dbl_data; - wire [19: 0] op_rom_add_data; - wire [19: 0] op_rom_conv_data; - reg [19: 0] op_rom_mux_data; - - (* RAM_STYLE="BLOCK" *) - uop_init_rom op_rom_init - ( - .clk (clk), - .addr (op_rom_addr), - .data (op_rom_init_data) - ); - - (* RAM_STYLE="BLOCK" *) - uop_dbl_rom op_rom_dbl - ( - .clk (clk), - .addr (op_rom_addr), - .data (op_rom_dbl_data) - ); - - (* RAM_STYLE="BLOCK" *) - uop_add_rom op_rom_add - ( - .clk (clk), - .addr (op_rom_addr), - .data (op_rom_add_data) - ); - - (* RAM_STYLE="BLOCK" *) - uop_conv_rom op_rom_conv - ( - .clk (clk), - .addr (op_rom_addr), - .data (op_rom_conv_data) - ); - - always @(*) - // - case (fsm_state) - FSM_STATE_PREPARE_WAIT: op_rom_mux_data = op_rom_init_data; - FSM_STATE_DOUBLE_WAIT: op_rom_mux_data = op_rom_dbl_data; - FSM_STATE_ADD_WAIT: op_rom_mux_data = op_rom_add_data; - FSM_STATE_CONVERT_WAIT: op_rom_mux_data = op_rom_conv_data; - default: op_rom_mux_data = {20{1'bX}}; - endcase - - - - // - // Modulus - // - reg [ 3: 0] rom_q_addr; - wire [31: 0] rom_q_data; - - brom_p384_q rom_q - ( - .clk (clk), - .b_addr (rom_q_addr), - .b_out (rom_q_data) - ); - - - // - // Worker - // - wire [ 3: 0] worker_addr_px; - wire [ 3: 0] worker_addr_py; - wire [ 3: 0] worker_addr_pz; - - wire [ 3: 0] worker_addr_rx; - wire [ 3: 0] worker_addr_ry; - wire [ 3: 0] worker_addr_rz; - - wire [ 3: 0] worker_addr_q; - - wire worker_wren_rx; - wire worker_wren_ry; - wire worker_wren_rz; - - reg [31: 0] worker_din_px; - reg [31: 0] worker_din_py; - reg [31: 0] worker_din_pz; - - reg [31: 0] worker_din_rx; - reg [31: 0] worker_din_ry; - reg [31: 0] worker_din_rz; - - wire [31: 0] worker_dout_rx; - wire [31: 0] worker_dout_ry; - wire [31: 0] worker_dout_rz; - - curve_dbl_add_384 worker - ( - .clk (clk), - .rst_n (rst_n), - - .ena (op_trig), - .rdy (op_done), - - .uop_addr (op_rom_addr), - .uop (op_rom_mux_data), - - .px_addr (worker_addr_px), - .py_addr (worker_addr_py), - .pz_addr (worker_addr_pz), - - .rx_addr (worker_addr_rx), - .ry_addr (worker_addr_ry), - .rz_addr (worker_addr_rz), - - .q_addr (worker_addr_q), - - .v_addr (bram_rz1_rd_addr), - - .rx_wren (worker_wren_rx), - .ry_wren (worker_wren_ry), - .rz_wren (worker_wren_rz), - - .px_din (worker_din_px), - .py_din (worker_din_py), - .pz_din (worker_din_pz), - - .rx_din (worker_din_rx), - .ry_din (worker_din_ry), - .rz_din (worker_din_rz), - - .rx_dout (worker_dout_rx), - .ry_dout (worker_dout_ry), - .rz_dout (worker_dout_rz), - - .q_din (rom_q_data), - - .v_din (bram_rz1_rd_data) - ); - - - // - // Mover - // - reg move_trig; - wire move_done; - - wire [ 3: 0] mover_addr_x; - wire [ 3: 0] mover_addr_y; - - wire mover_wren_y; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) move_trig <= 1'b0; - else move_trig <= (fsm_state == FSM_STATE_COPY_TRIG); - - mw_mover # - ( - .WORD_COUNTER_WIDTH (4), - .OPERAND_NUM_WORDS (12) - ) - mover - ( - .clk (clk), - .rst_n (rst_n), - - .ena (move_trig), - .rdy (move_done), - - .x_addr (mover_addr_x), - .y_addr (mover_addr_y), - .y_wren (mover_wren_y), - - .x_din ({32{1'bX}}), - .y_dout () - ); - - - // - // Invertor - // - reg invert_trig; - wire invert_done; - - wire [ 3: 0] invertor_addr_a; - wire [ 3: 0] invertor_addr_q; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) invert_trig <= 1'b0; - else invert_trig <= (fsm_state == FSM_STATE_INVERT_TRIG); - - modular_invertor # - ( - .MAX_OPERAND_WIDTH(384) - ) - invertor - ( - .clk (clk), - .rst_n (rst_n), - - .ena (invert_trig), - .rdy (invert_done), - - .a_addr (invertor_addr_a), - .q_addr (invertor_addr_q), - .a1_addr (bram_rz1_wr_addr), - .a1_wren (bram_rz1_wr_en), - - .a_din (bram_rz_rd_data), - .q_din (rom_q_data), - .a1_dout (bram_rz1_wr_data_in) - ); - - - // - // FSM Transition Logic - // - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; - else case (fsm_state) - - FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_PREPARE_TRIG : FSM_STATE_IDLE; - - FSM_STATE_PREPARE_TRIG: fsm_state <= FSM_STATE_PREPARE_WAIT; - FSM_STATE_PREPARE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DOUBLE_TRIG : FSM_STATE_PREPARE_WAIT; - - FSM_STATE_DOUBLE_TRIG: fsm_state <= FSM_STATE_DOUBLE_WAIT; - FSM_STATE_DOUBLE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_ADD_TRIG : FSM_STATE_DOUBLE_WAIT; - - FSM_STATE_ADD_TRIG: fsm_state <= FSM_STATE_ADD_WAIT; - FSM_STATE_ADD_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_COPY_TRIG : FSM_STATE_ADD_WAIT; - - FSM_STATE_COPY_TRIG: fsm_state <= FSM_STATE_COPY_WAIT; - FSM_STATE_COPY_WAIT: fsm_state <= (!move_trig && move_done) ? fsm_state_round_next : FSM_STATE_COPY_WAIT; - - FSM_STATE_INVERT_TRIG: fsm_state <= FSM_STATE_INVERT_WAIT; - FSM_STATE_INVERT_WAIT: fsm_state <= (!invert_trig && invert_done) ? FSM_STATE_CONVERT_TRIG : FSM_STATE_INVERT_WAIT; - - FSM_STATE_CONVERT_TRIG: fsm_state <= FSM_STATE_CONVERT_WAIT; - FSM_STATE_CONVERT_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DONE : FSM_STATE_CONVERT_WAIT; - - FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE; - - default: fsm_state <= FSM_STATE_IDLE; - - endcase - - - // - // Bit Counter Increment - // - always @(posedge clk) begin - // - if ((fsm_state == FSM_STATE_PREPARE_WAIT) && !op_trig && op_done) - bit_counter <= bit_counter_zero; - // - if ((fsm_state == FSM_STATE_COPY_WAIT) && !move_trig && move_done) - bit_counter <= bit_counter_next; - // - end - - - // - // K Latch Logic - // - reg [ 3: 0] k_addr_reg; - reg [31: 0] k_din_reg; - - assign k_addr = k_addr_reg; - - always @(posedge clk) begin - // - if (fsm_state == FSM_STATE_DOUBLE_TRIG) - k_addr_reg <= 4'd11 - bit_counter[8:5]; - // - if (fsm_state == FSM_STATE_ADD_TRIG) - k_din_reg <= (bit_counter[4:0] == 5'd0) ? k_din : {k_din_reg[30:0], 1'bX}; - // - end - - - - // - // Copy Inhibit Logic - // - wire move_inhibit = k_din_reg[31]; - - wire copy_t2r_int = mover_wren_y & ~move_inhibit; - - - always @(*) begin - // - // Q - // - case (fsm_state) - FSM_STATE_DOUBLE_WAIT: rom_q_addr = worker_addr_q; - FSM_STATE_ADD_WAIT: rom_q_addr = worker_addr_q; - FSM_STATE_INVERT_WAIT: rom_q_addr = invertor_addr_q; - FSM_STATE_CONVERT_WAIT: rom_q_addr = worker_addr_q; - default: rom_q_addr = worker_addr_q; - endcase - - // - // R(X,Y,Z) - // - case (fsm_state) - // - FSM_STATE_PREPARE_WAIT: begin - // - bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; - bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz; - bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz; - bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz; - // - end - // - FSM_STATE_DOUBLE_WAIT: begin - // - bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz; - bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; - bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; - bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; - // - end - // - FSM_STATE_ADD_WAIT: begin - // - bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; - bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz; - bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz; - bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz; - // - end - // - FSM_STATE_COPY_WAIT: begin - // - bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; - bram_rx_wr_addr <= mover_addr_y; bram_ry_wr_addr <= mover_addr_y; bram_rz_wr_addr <= mover_addr_y; - bram_rx_wr_en <= copy_t2r_int; bram_ry_wr_en <= copy_t2r_int; bram_rz_wr_en <= copy_t2r_int; - bram_rx_wr_data_in <= bram_tx_rd_data; bram_ry_wr_data_in <= bram_ty_rd_data; bram_rz_wr_data_in <= bram_tz_rd_data; - // - end - // - FSM_STATE_INVERT_WAIT: begin - // - bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= invertor_addr_a; - bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; - bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; - bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; - // - end - // - FSM_STATE_CONVERT_WAIT: begin - // - bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz; - bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; - bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; - bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; - // - end - - // - default: begin - // - bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; - bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; - bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; - bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; - // - end - // - endcase - // - // T(X,Y,Z) - // - case (fsm_state) - // - FSM_STATE_DOUBLE_WAIT: begin - // - bram_tx_rd_addr <= {4{1'bX}}; bram_ty_rd_addr <= {4{1'bX}}; bram_tz_rd_addr <= {4{1'bX}}; - bram_tx_wr_addr <= worker_addr_rx; bram_ty_wr_addr <= worker_addr_ry; bram_tz_wr_addr <= worker_addr_rz; - bram_tx_wr_en <= worker_wren_rx; bram_ty_wr_en <= worker_wren_ry; bram_tz_wr_en <= worker_wren_rz; - bram_tx_wr_data_in <= worker_dout_rx; bram_ty_wr_data_in <= worker_dout_ry; bram_tz_wr_data_in <= worker_dout_rz; - // - end - // - FSM_STATE_ADD_WAIT: begin - // - bram_tx_rd_addr <= worker_addr_px; bram_ty_rd_addr <= worker_addr_py; bram_tz_rd_addr <= worker_addr_pz; - bram_tx_wr_addr <= {4{1'bX}}; bram_ty_wr_addr <= {4{1'bX}}; bram_tz_wr_addr <= {4{1'bX}}; - bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0; - bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}}; - // - end - // - FSM_STATE_COPY_WAIT: begin - // - bram_tx_rd_addr <= mover_addr_x; bram_ty_rd_addr <= mover_addr_x; bram_tz_rd_addr <= mover_addr_x; - bram_tx_wr_addr <= {4{1'bX}}; bram_ty_wr_addr <= {4{1'bX}}; bram_tz_wr_addr <= {4{1'bX}}; - bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0; - bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}}; - // - end - - // - default: begin - // - bram_tx_rd_addr <= {4{1'bX}}; bram_ty_rd_addr <= {4{1'bX}}; bram_tz_rd_addr <= {4{1'bX}}; - bram_tx_wr_addr <= {4{1'bX}}; bram_ty_wr_addr <= {4{1'bX}}; bram_tz_wr_addr <= {4{1'bX}}; - bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0; - bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}}; - // - end - // - endcase - // - // Worker - // - case (fsm_state) - // - FSM_STATE_DOUBLE_WAIT: begin - // - worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data; - worker_din_rx <= bram_tx_wr_data_out; worker_din_ry <= bram_ty_wr_data_out; worker_din_rz <= bram_tz_wr_data_out; - // - end - // - FSM_STATE_ADD_WAIT: begin - // - worker_din_px <= bram_tx_rd_data; worker_din_py <= bram_ty_rd_data; worker_din_pz <= bram_tz_rd_data; - worker_din_rx <= bram_rx_wr_data_out; worker_din_ry <= bram_ry_wr_data_out; worker_din_rz <= bram_rz_wr_data_out; - // - end - // - FSM_STATE_CONVERT_WAIT: begin - // - worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data; - worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}}; - // - end - // - default: begin - // - worker_din_px <= {32{1'bX}}; worker_din_py <= {32{1'bX}}; worker_din_pz <= {32{1'bX}}; - worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}}; - // - end - // - endcase - // - end - - - // - // Output Mapping - // - assign rx_wren = worker_wren_rx && (fsm_state == FSM_STATE_CONVERT_WAIT); - assign ry_wren = worker_wren_ry && (fsm_state == FSM_STATE_CONVERT_WAIT); - - assign rx_dout = worker_dout_rx; - assign ry_dout = worker_dout_ry; - - assign rx_addr = worker_addr_rx; - assign ry_addr = worker_addr_ry; - - - // - // Ready Flag Logic - // - reg rdy_reg = 1'b1; - assign rdy = rdy_reg; - - always @(posedge clk or negedge rst_n) - - if (rst_n == 1'b0) rdy_reg <= 1'b1; - else begin - - /* clear flag */ - if ((fsm_state == FSM_STATE_IDLE) && ena) - rdy_reg <= 1'b0; - - /* set flag */ - if (fsm_state == FSM_STATE_DONE) - rdy_reg <= 1'b1; - - end - - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ +// +// curve_mul_384.v +// ----------------------------------------------------------------------------- +// Elliptic curve point scalar multiplier. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module curve_mul_384 + ( + clk, rst_n, + ena, rdy, + k_addr, rx_addr, ry_addr, + rx_wren, ry_wren, + k_din, + rx_dout, ry_dout + ); + + + // + // Constants + // + localparam WORD_COUNTER_WIDTH = 4; // 0 .. 11 + localparam OPERAND_NUM_WORDS = 12; // 12 * 32 = 384 + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [ 3: 0] k_addr; + output wire [ 3: 0] rx_addr; + output wire [ 3: 0] ry_addr; + + output wire rx_wren; + output wire ry_wren; + + input wire [31: 0] k_din; + + output wire [31: 0] rx_dout; + output wire [31: 0] ry_dout; + + + // + // Temporary Variables + // + reg [ 3: 0] bram_tx_wr_addr; + reg [ 3: 0] bram_ty_wr_addr; + reg [ 3: 0] bram_tz_wr_addr; + + reg [ 3: 0] bram_rx_wr_addr; + reg [ 3: 0] bram_ry_wr_addr; + reg [ 3: 0] bram_rz_wr_addr; + wire [ 3: 0] bram_rz1_wr_addr; + + reg [ 3: 0] bram_tx_rd_addr; + reg [ 3: 0] bram_ty_rd_addr; + reg [ 3: 0] bram_tz_rd_addr; + + reg [ 3: 0] bram_rx_rd_addr; + reg [ 3: 0] bram_ry_rd_addr; + reg [ 3: 0] bram_rz_rd_addr; + wire [ 3: 0] bram_rz1_rd_addr; + + reg bram_tx_wr_en; + reg bram_ty_wr_en; + reg bram_tz_wr_en; + + reg bram_rx_wr_en; + reg bram_ry_wr_en; + reg bram_rz_wr_en; + wire bram_rz1_wr_en; + + wire [31: 0] bram_tx_rd_data; + wire [31: 0] bram_ty_rd_data; + wire [31: 0] bram_tz_rd_data; + + wire [31: 0] bram_rx_rd_data; + wire [31: 0] bram_ry_rd_data; + wire [31: 0] bram_rz_rd_data; + wire [31: 0] bram_rz1_rd_data; + + reg [31: 0] bram_tx_wr_data_in; + reg [31: 0] bram_ty_wr_data_in; + reg [31: 0] bram_tz_wr_data_in; + + reg [31: 0] bram_rx_wr_data_in; + reg [31: 0] bram_ry_wr_data_in; + reg [31: 0] bram_rz_wr_data_in; + wire [31: 0] bram_rz1_wr_data_in; + + wire [31: 0] bram_tx_wr_data_out; + wire [31: 0] bram_ty_wr_data_out; + wire [31: 0] bram_tz_wr_data_out; + + wire [31: 0] bram_rx_wr_data_out; + wire [31: 0] bram_ry_wr_data_out; + wire [31: 0] bram_rz_wr_data_out; + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_tx (.clk(clk), + .a_addr(bram_tx_wr_addr), .a_wr(bram_tx_wr_en), .a_in(bram_tx_wr_data_in), .a_out(bram_tx_wr_data_out), + .b_addr(bram_tx_rd_addr), .b_out(bram_tx_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_ty (.clk(clk), + .a_addr(bram_ty_wr_addr), .a_wr(bram_ty_wr_en), .a_in(bram_ty_wr_data_in), .a_out(bram_ty_wr_data_out), + .b_addr(bram_ty_rd_addr), .b_out(bram_ty_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_tz (.clk(clk), + .a_addr(bram_tz_wr_addr), .a_wr(bram_tz_wr_en), .a_in(bram_tz_wr_data_in), .a_out(bram_tz_wr_data_out), + .b_addr(bram_tz_rd_addr), .b_out(bram_tz_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_rx (.clk(clk), + .a_addr(bram_rx_wr_addr), .a_wr(bram_rx_wr_en), .a_in(bram_rx_wr_data_in), .a_out(bram_rx_wr_data_out), + .b_addr(bram_rx_rd_addr), .b_out(bram_rx_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_ry (.clk(clk), + .a_addr(bram_ry_wr_addr), .a_wr(bram_ry_wr_en), .a_in(bram_ry_wr_data_in), .a_out(bram_ry_wr_data_out), + .b_addr(bram_ry_rd_addr), .b_out(bram_ry_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_rz (.clk(clk), + .a_addr(bram_rz_wr_addr), .a_wr(bram_rz_wr_en), .a_in(bram_rz_wr_data_in), .a_out(bram_rz_wr_data_out), + .b_addr(bram_rz_rd_addr), .b_out(bram_rz_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_rz1 (.clk(clk), + .a_addr(bram_rz1_wr_addr), .a_wr(bram_rz1_wr_en), .a_in(bram_rz1_wr_data_in), .a_out(), + .b_addr(bram_rz1_rd_addr), .b_out(bram_rz1_rd_data)); + + + // + // FSM + // + localparam [ 3: 0] FSM_STATE_IDLE = 4'd00; + localparam [ 3: 0] FSM_STATE_PREPARE_TRIG = 4'd01; + localparam [ 3: 0] FSM_STATE_PREPARE_WAIT = 4'd02; + localparam [ 3: 0] FSM_STATE_DOUBLE_TRIG = 4'd03; + localparam [ 3: 0] FSM_STATE_DOUBLE_WAIT = 4'd04; + localparam [ 3: 0] FSM_STATE_ADD_TRIG = 4'd05; + localparam [ 3: 0] FSM_STATE_ADD_WAIT = 4'd06; + localparam [ 3: 0] FSM_STATE_COPY_TRIG = 4'd07; + localparam [ 3: 0] FSM_STATE_COPY_WAIT = 4'd08; + localparam [ 3: 0] FSM_STATE_INVERT_TRIG = 4'd09; + localparam [ 3: 0] FSM_STATE_INVERT_WAIT = 4'd10; + localparam [ 3: 0] FSM_STATE_CONVERT_TRIG = 4'd11; + localparam [ 3: 0] FSM_STATE_CONVERT_WAIT = 4'd12; + localparam [ 3: 0] FSM_STATE_DONE = 4'd13; + + reg [3:0] fsm_state = FSM_STATE_IDLE; + + + // + // Round Counter + // + reg [ 8: 0] bit_counter; + wire [ 8: 0] bit_counter_max = 9'd383; + wire [ 8: 0] bit_counter_zero = 9'd0; + wire [ 8: 0] bit_counter_next = + (bit_counter < bit_counter_max) ? bit_counter + 1'b1 : bit_counter_zero; + + + // + // Round Completion + // + wire [ 3: 0] fsm_state_round_next = (bit_counter < bit_counter_max) ? + FSM_STATE_DOUBLE_TRIG : FSM_STATE_INVERT_TRIG; + + + // + // OP Trigger Logic + // + reg op_trig; + wire op_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) op_trig <= 1'b0; + else op_trig <= (fsm_state == FSM_STATE_PREPARE_TRIG) || + (fsm_state == FSM_STATE_DOUBLE_TRIG) || + (fsm_state == FSM_STATE_ADD_TRIG) || + (fsm_state == FSM_STATE_CONVERT_TRIG); + + // + // Microprograms + // + wire [ 5: 0] op_rom_addr; + wire [19: 0] op_rom_init_data; + wire [19: 0] op_rom_dbl_data; + wire [19: 0] op_rom_add_data; + wire [19: 0] op_rom_conv_data; + reg [19: 0] op_rom_mux_data; + + (* RAM_STYLE="BLOCK" *) + uop_init_rom op_rom_init + ( + .clk (clk), + .addr (op_rom_addr), + .data (op_rom_init_data) + ); + + (* RAM_STYLE="BLOCK" *) + uop_dbl_rom op_rom_dbl + ( + .clk (clk), + .addr (op_rom_addr), + .data (op_rom_dbl_data) + ); + + (* RAM_STYLE="BLOCK" *) + uop_add_rom op_rom_add + ( + .clk (clk), + .addr (op_rom_addr), + .data (op_rom_add_data) + ); + + (* RAM_STYLE="BLOCK" *) + uop_conv_rom op_rom_conv + ( + .clk (clk), + .addr (op_rom_addr), + .data (op_rom_conv_data) + ); + + always @(*) + // + case (fsm_state) + FSM_STATE_PREPARE_WAIT: op_rom_mux_data = op_rom_init_data; + FSM_STATE_DOUBLE_WAIT: op_rom_mux_data = op_rom_dbl_data; + FSM_STATE_ADD_WAIT: op_rom_mux_data = op_rom_add_data; + FSM_STATE_CONVERT_WAIT: op_rom_mux_data = op_rom_conv_data; + default: op_rom_mux_data = {20{1'bX}}; + endcase + + + + // + // Modulus + // + reg [ 3: 0] rom_q_addr; + wire [31: 0] rom_q_data; + + brom_p384_q rom_q + ( + .clk (clk), + .b_addr (rom_q_addr), + .b_out (rom_q_data) + ); + + + // + // Worker + // + wire [ 3: 0] worker_addr_px; + wire [ 3: 0] worker_addr_py; + wire [ 3: 0] worker_addr_pz; + + wire [ 3: 0] worker_addr_rx; + wire [ 3: 0] worker_addr_ry; + wire [ 3: 0] worker_addr_rz; + + wire [ 3: 0] worker_addr_q; + + wire worker_wren_rx; + wire worker_wren_ry; + wire worker_wren_rz; + + reg [31: 0] worker_din_px; + reg [31: 0] worker_din_py; + reg [31: 0] worker_din_pz; + + reg [31: 0] worker_din_rx; + reg [31: 0] worker_din_ry; + reg [31: 0] worker_din_rz; + + wire [31: 0] worker_dout_rx; + wire [31: 0] worker_dout_ry; + wire [31: 0] worker_dout_rz; + + curve_dbl_add_384 worker + ( + .clk (clk), + .rst_n (rst_n), + + .ena (op_trig), + .rdy (op_done), + + .uop_addr (op_rom_addr), + .uop (op_rom_mux_data), + + .px_addr (worker_addr_px), + .py_addr (worker_addr_py), + .pz_addr (worker_addr_pz), + + .rx_addr (worker_addr_rx), + .ry_addr (worker_addr_ry), + .rz_addr (worker_addr_rz), + + .q_addr (worker_addr_q), + + .v_addr (bram_rz1_rd_addr), + + .rx_wren (worker_wren_rx), + .ry_wren (worker_wren_ry), + .rz_wren (worker_wren_rz), + + .px_din (worker_din_px), + .py_din (worker_din_py), + .pz_din (worker_din_pz), + + .rx_din (worker_din_rx), + .ry_din (worker_din_ry), + .rz_din (worker_din_rz), + + .rx_dout (worker_dout_rx), + .ry_dout (worker_dout_ry), + .rz_dout (worker_dout_rz), + + .q_din (rom_q_data), + + .v_din (bram_rz1_rd_data) + ); + + + // + // Mover + // + reg move_trig; + wire move_done; + + wire [ 3: 0] mover_addr_x; + wire [ 3: 0] mover_addr_y; + + wire mover_wren_y; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) move_trig <= 1'b0; + else move_trig <= (fsm_state == FSM_STATE_COPY_TRIG); + + mw_mover # + ( + .WORD_COUNTER_WIDTH (4), + .OPERAND_NUM_WORDS (12) + ) + mover + ( + .clk (clk), + .rst_n (rst_n), + + .ena (move_trig), + .rdy (move_done), + + .x_addr (mover_addr_x), + .y_addr (mover_addr_y), + .y_wren (mover_wren_y), + + .x_din ({32{1'bX}}), + .y_dout () + ); + + + // + // Invertor + // + reg invert_trig; + wire invert_done; + + wire [ 3: 0] invertor_addr_a; + wire [ 3: 0] invertor_addr_q; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) invert_trig <= 1'b0; + else invert_trig <= (fsm_state == FSM_STATE_INVERT_TRIG); + + modular_invertor # + ( + .MAX_OPERAND_WIDTH(384) + ) + invertor + ( + .clk (clk), + .rst_n (rst_n), + + .ena (invert_trig), + .rdy (invert_done), + + .a_addr (invertor_addr_a), + .q_addr (invertor_addr_q), + .a1_addr (bram_rz1_wr_addr), + .a1_wren (bram_rz1_wr_en), + + .a_din (bram_rz_rd_data), + .q_din (rom_q_data), + .a1_dout (bram_rz1_wr_data_in) + ); + + + // + // FSM Transition Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; + else case (fsm_state) + + FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_PREPARE_TRIG : FSM_STATE_IDLE; + + FSM_STATE_PREPARE_TRIG: fsm_state <= FSM_STATE_PREPARE_WAIT; + FSM_STATE_PREPARE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DOUBLE_TRIG : FSM_STATE_PREPARE_WAIT; + + FSM_STATE_DOUBLE_TRIG: fsm_state <= FSM_STATE_DOUBLE_WAIT; + FSM_STATE_DOUBLE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_ADD_TRIG : FSM_STATE_DOUBLE_WAIT; + + FSM_STATE_ADD_TRIG: fsm_state <= FSM_STATE_ADD_WAIT; + FSM_STATE_ADD_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_COPY_TRIG : FSM_STATE_ADD_WAIT; + + FSM_STATE_COPY_TRIG: fsm_state <= FSM_STATE_COPY_WAIT; + FSM_STATE_COPY_WAIT: fsm_state <= (!move_trig && move_done) ? fsm_state_round_next : FSM_STATE_COPY_WAIT; + + FSM_STATE_INVERT_TRIG: fsm_state <= FSM_STATE_INVERT_WAIT; + FSM_STATE_INVERT_WAIT: fsm_state <= (!invert_trig && invert_done) ? FSM_STATE_CONVERT_TRIG : FSM_STATE_INVERT_WAIT; + + FSM_STATE_CONVERT_TRIG: fsm_state <= FSM_STATE_CONVERT_WAIT; + FSM_STATE_CONVERT_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DONE : FSM_STATE_CONVERT_WAIT; + + FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE; + + default: fsm_state <= FSM_STATE_IDLE; + + endcase + + + // + // Bit Counter Increment + // + always @(posedge clk) begin + // + if ((fsm_state == FSM_STATE_PREPARE_WAIT) && !op_trig && op_done) + bit_counter <= bit_counter_zero; + // + if ((fsm_state == FSM_STATE_COPY_WAIT) && !move_trig && move_done) + bit_counter <= bit_counter_next; + // + end + + + // + // K Latch Logic + // + reg [ 3: 0] k_addr_reg; + reg [31: 0] k_din_reg; + + assign k_addr = k_addr_reg; + + always @(posedge clk) begin + // + if (fsm_state == FSM_STATE_DOUBLE_TRIG) + k_addr_reg <= 4'd11 - bit_counter[8:5]; + // + if (fsm_state == FSM_STATE_ADD_TRIG) + k_din_reg <= (bit_counter[4:0] == 5'd0) ? k_din : {k_din_reg[30:0], 1'bX}; + // + end + + + + // + // Copy Inhibit Logic + // + wire move_inhibit = k_din_reg[31]; + + wire copy_t2r_int = mover_wren_y & ~move_inhibit; + + + always @(*) begin + // + // Q + // + case (fsm_state) + FSM_STATE_DOUBLE_WAIT: rom_q_addr = worker_addr_q; + FSM_STATE_ADD_WAIT: rom_q_addr = worker_addr_q; + FSM_STATE_INVERT_WAIT: rom_q_addr = invertor_addr_q; + FSM_STATE_CONVERT_WAIT: rom_q_addr = worker_addr_q; + default: rom_q_addr = worker_addr_q; + endcase + + // + // R(X,Y,Z) + // + case (fsm_state) + // + FSM_STATE_PREPARE_WAIT: begin + // + bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; + bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz; + bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz; + bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz; + // + end + // + FSM_STATE_DOUBLE_WAIT: begin + // + bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz; + bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; + bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; + bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; + // + end + // + FSM_STATE_ADD_WAIT: begin + // + bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; + bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz; + bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz; + bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz; + // + end + // + FSM_STATE_COPY_WAIT: begin + // + bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; + bram_rx_wr_addr <= mover_addr_y; bram_ry_wr_addr <= mover_addr_y; bram_rz_wr_addr <= mover_addr_y; + bram_rx_wr_en <= copy_t2r_int; bram_ry_wr_en <= copy_t2r_int; bram_rz_wr_en <= copy_t2r_int; + bram_rx_wr_data_in <= bram_tx_rd_data; bram_ry_wr_data_in <= bram_ty_rd_data; bram_rz_wr_data_in <= bram_tz_rd_data; + // + end + // + FSM_STATE_INVERT_WAIT: begin + // + bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= invertor_addr_a; + bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; + bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; + bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; + // + end + // + FSM_STATE_CONVERT_WAIT: begin + // + bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz; + bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; + bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; + bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; + // + end + + // + default: begin + // + bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; + bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; + bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; + bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; + // + end + // + endcase + // + // T(X,Y,Z) + // + case (fsm_state) + // + FSM_STATE_DOUBLE_WAIT: begin + // + bram_tx_rd_addr <= {4{1'bX}}; bram_ty_rd_addr <= {4{1'bX}}; bram_tz_rd_addr <= {4{1'bX}}; + bram_tx_wr_addr <= worker_addr_rx; bram_ty_wr_addr <= worker_addr_ry; bram_tz_wr_addr <= worker_addr_rz; + bram_tx_wr_en <= worker_wren_rx; bram_ty_wr_en <= worker_wren_ry; bram_tz_wr_en <= worker_wren_rz; + bram_tx_wr_data_in <= worker_dout_rx; bram_ty_wr_data_in <= worker_dout_ry; bram_tz_wr_data_in <= worker_dout_rz; + // + end + // + FSM_STATE_ADD_WAIT: begin + // + bram_tx_rd_addr <= worker_addr_px; bram_ty_rd_addr <= worker_addr_py; bram_tz_rd_addr <= worker_addr_pz; + bram_tx_wr_addr <= {4{1'bX}}; bram_ty_wr_addr <= {4{1'bX}}; bram_tz_wr_addr <= {4{1'bX}}; + bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0; + bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}}; + // + end + // + FSM_STATE_COPY_WAIT: begin + // + bram_tx_rd_addr <= mover_addr_x; bram_ty_rd_addr <= mover_addr_x; bram_tz_rd_addr <= mover_addr_x; + bram_tx_wr_addr <= {4{1'bX}}; bram_ty_wr_addr <= {4{1'bX}}; bram_tz_wr_addr <= {4{1'bX}}; + bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0; + bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}}; + // + end + + // + default: begin + // + bram_tx_rd_addr <= {4{1'bX}}; bram_ty_rd_addr <= {4{1'bX}}; bram_tz_rd_addr <= {4{1'bX}}; + bram_tx_wr_addr <= {4{1'bX}}; bram_ty_wr_addr <= {4{1'bX}}; bram_tz_wr_addr <= {4{1'bX}}; + bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0; + bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}}; + // + end + // + endcase + // + // Worker + // + case (fsm_state) + // + FSM_STATE_DOUBLE_WAIT: begin + // + worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data; + worker_din_rx <= bram_tx_wr_data_out; worker_din_ry <= bram_ty_wr_data_out; worker_din_rz <= bram_tz_wr_data_out; + // + end + // + FSM_STATE_ADD_WAIT: begin + // + worker_din_px <= bram_tx_rd_data; worker_din_py <= bram_ty_rd_data; worker_din_pz <= bram_tz_rd_data; + worker_din_rx <= bram_rx_wr_data_out; worker_din_ry <= bram_ry_wr_data_out; worker_din_rz <= bram_rz_wr_data_out; + // + end + // + FSM_STATE_CONVERT_WAIT: begin + // + worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data; + worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}}; + // + end + // + default: begin + // + worker_din_px <= {32{1'bX}}; worker_din_py <= {32{1'bX}}; worker_din_pz <= {32{1'bX}}; + worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}}; + // + end + // + endcase + // + end + + + // + // Output Mapping + // + assign rx_wren = worker_wren_rx && (fsm_state == FSM_STATE_CONVERT_WAIT); + assign ry_wren = worker_wren_ry && (fsm_state == FSM_STATE_CONVERT_WAIT); + + assign rx_dout = worker_dout_rx; + assign ry_dout = worker_dout_ry; + + assign rx_addr = worker_addr_rx; + assign ry_addr = worker_addr_ry; + + + // + // Ready Flag Logic + // + reg rdy_reg = 1'b1; + assign rdy = rdy_reg; + + always @(posedge clk or negedge rst_n) + + if (rst_n == 1'b0) rdy_reg <= 1'b1; + else begin + + /* clear flag */ + if ((fsm_state == FSM_STATE_IDLE) && ena) + rdy_reg <= 1'b0; + + /* set flag */ + if (fsm_state == FSM_STATE_DONE) + rdy_reg <= 1'b1; + + end + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/curve/rom/brom_p384_delta.v b/rtl/curve/rom/brom_p384_delta.v index 754af3e..165b3d9 100644 --- a/rtl/curve/rom/brom_p384_delta.v +++ b/rtl/curve/rom/brom_p384_delta.v @@ -33,40 +33,40 @@ `timescale 1ns / 1ps module brom_p384_delta - ( - input wire clk, - input wire [ 4-1:0] b_addr, - output wire [32-1:0] b_out - ); + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); // // Output Registers // - reg [31:0] bram_reg_b; + reg [31:0] bram_reg_b; assign b_out = bram_reg_b; // // Read-Only Port B - // - always @(posedge clk) - // - case (b_addr) - 4'b0000: bram_reg_b <= 32'h80000000; - 4'b0001: bram_reg_b <= 32'h00000000; - 4'b0010: bram_reg_b <= 32'h80000000; - 4'b0011: bram_reg_b <= 32'h7fffffff; - 4'b0100: bram_reg_b <= 32'hffffffff; - 4'b0101: bram_reg_b <= 32'hffffffff; - 4'b0110: bram_reg_b <= 32'hffffffff; - 4'b0111: bram_reg_b <= 32'hffffffff; - 4'b1000: bram_reg_b <= 32'hffffffff; - 4'b1001: bram_reg_b <= 32'hffffffff; - 4'b1010: bram_reg_b <= 32'hffffffff; - 4'b1011: bram_reg_b <= 32'h7fffffff; - endcase + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h80000000; + 4'b0001: bram_reg_b <= 32'h00000000; + 4'b0010: bram_reg_b <= 32'h80000000; + 4'b0011: bram_reg_b <= 32'h7fffffff; + 4'b0100: bram_reg_b <= 32'hffffffff; + 4'b0101: bram_reg_b <= 32'hffffffff; + 4'b0110: bram_reg_b <= 32'hffffffff; + 4'b0111: bram_reg_b <= 32'hffffffff; + 4'b1000: bram_reg_b <= 32'hffffffff; + 4'b1001: bram_reg_b <= 32'hffffffff; + 4'b1010: bram_reg_b <= 32'hffffffff; + 4'b1011: bram_reg_b <= 32'h7fffffff; + endcase endmodule diff --git a/rtl/curve/rom/brom_p384_g_x.v b/rtl/curve/rom/brom_p384_g_x.v index 25d5103..614c7fe 100644 --- a/rtl/curve/rom/brom_p384_g_x.v +++ b/rtl/curve/rom/brom_p384_g_x.v @@ -33,40 +33,40 @@ `timescale 1ns / 1ps module brom_p384_g_x - ( - input wire clk, - input wire [ 4-1:0] b_addr, - output wire [32-1:0] b_out - ); + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); // // Output Registers // - reg [31:0] bram_reg_b; + reg [31:0] bram_reg_b; assign b_out = bram_reg_b; // // Read-Only Port B - // - always @(posedge clk) - // - case (b_addr) - 4'b0000: bram_reg_b <= 32'h72760ab7; - 4'b0001: bram_reg_b <= 32'h3a545e38; - 4'b0010: bram_reg_b <= 32'hbf55296c; - 4'b0011: bram_reg_b <= 32'h5502f25d; - 4'b0100: bram_reg_b <= 32'h82542a38; - 4'b0101: bram_reg_b <= 32'h59f741e0; - 4'b0110: bram_reg_b <= 32'h8ba79b98; - 4'b0111: bram_reg_b <= 32'h6e1d3b62; - 4'b1000: bram_reg_b <= 32'hf320ad74; - 4'b1001: bram_reg_b <= 32'h8eb1c71e; - 4'b1010: bram_reg_b <= 32'hbe8b0537; - 4'b1011: bram_reg_b <= 32'haa87ca22; - endcase - + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h72760ab7; + 4'b0001: bram_reg_b <= 32'h3a545e38; + 4'b0010: bram_reg_b <= 32'hbf55296c; + 4'b0011: bram_reg_b <= 32'h5502f25d; + 4'b0100: bram_reg_b <= 32'h82542a38; + 4'b0101: bram_reg_b <= 32'h59f741e0; + 4'b0110: bram_reg_b <= 32'h8ba79b98; + 4'b0111: bram_reg_b <= 32'h6e1d3b62; + 4'b1000: bram_reg_b <= 32'hf320ad74; + 4'b1001: bram_reg_b <= 32'h8eb1c71e; + 4'b1010: bram_reg_b <= 32'hbe8b0537; + 4'b1011: bram_reg_b <= 32'haa87ca22; + endcase + endmodule diff --git a/rtl/curve/rom/brom_p384_g_y.v b/rtl/curve/rom/brom_p384_g_y.v index c2461eb..e64d9aa 100644 --- a/rtl/curve/rom/brom_p384_g_y.v +++ b/rtl/curve/rom/brom_p384_g_y.v @@ -33,40 +33,40 @@ `timescale 1ns / 1ps module brom_p384_g_y - ( - input wire clk, - input wire [ 4-1:0] b_addr, - output wire [32-1:0] b_out - ); + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); // // Output Registers // - reg [31:0] bram_reg_b; + reg [31:0] bram_reg_b; assign b_out = bram_reg_b; - + // // Read-Only Port B - // - always @(posedge clk) - // - case (b_addr) - 4'b0000: bram_reg_b <= 32'h90ea0e5f; - 3'b0001: bram_reg_b <= 32'h7a431d7c; - 4'b0010: bram_reg_b <= 32'h1d7e819d; - 4'b0011: bram_reg_b <= 32'h0a60b1ce; - 4'b0100: bram_reg_b <= 32'hb5f0b8c0; - 4'b0101: bram_reg_b <= 32'he9da3113; - 4'b0110: bram_reg_b <= 32'h289a147c; - 4'b0111: bram_reg_b <= 32'hf8f41dbd; - 4'b1000: bram_reg_b <= 32'h9292dc29; - 4'b1001: bram_reg_b <= 32'h5d9e98bf; - 4'b1010: bram_reg_b <= 32'h96262c6f; - 4'b1011: bram_reg_b <= 32'h3617de4a; - endcase - + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h90ea0e5f; + 3'b0001: bram_reg_b <= 32'h7a431d7c; + 4'b0010: bram_reg_b <= 32'h1d7e819d; + 4'b0011: bram_reg_b <= 32'h0a60b1ce; + 4'b0100: bram_reg_b <= 32'hb5f0b8c0; + 4'b0101: bram_reg_b <= 32'he9da3113; + 4'b0110: bram_reg_b <= 32'h289a147c; + 4'b0111: bram_reg_b <= 32'hf8f41dbd; + 4'b1000: bram_reg_b <= 32'h9292dc29; + 4'b1001: bram_reg_b <= 32'h5d9e98bf; + 4'b1010: bram_reg_b <= 32'h96262c6f; + 4'b1011: bram_reg_b <= 32'h3617de4a; + endcase + endmodule diff --git a/rtl/curve/rom/brom_p384_h_x.v b/rtl/curve/rom/brom_p384_h_x.v index a6c474e..b6a0886 100644 --- a/rtl/curve/rom/brom_p384_h_x.v +++ b/rtl/curve/rom/brom_p384_h_x.v @@ -33,39 +33,39 @@ `timescale 1ns / 1ps module brom_p384_h_x - ( - input wire clk, - input wire [ 4-1:0] b_addr, - output wire [32-1:0] b_out - ); + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); // // Output Registers // - reg [31:0] bram_reg_b; + reg [31:0] bram_reg_b; assign b_out = bram_reg_b; // // Read-Only Port B - // - always @(posedge clk) - // - case (b_addr) - 4'b0000: bram_reg_b <= 32'h1b13ea8a; - 4'b0001: bram_reg_b <= 32'h8b574391; - 4'b0010: bram_reg_b <= 32'h8155ad27; - 4'b0011: bram_reg_b <= 32'h55fa1b42; - 4'b0100: bram_reg_b <= 32'hfb57ab8d; - 4'b0101: bram_reg_b <= 32'h4c117c3e; - 4'b0110: bram_reg_b <= 32'he8b0c8cf; - 4'b0111: bram_reg_b <= 32'h23c5893a; - 4'b1000: bram_reg_b <= 32'h19bea517; - 4'b1001: bram_reg_b <= 32'he29c71c2; - 4'b1010: bram_reg_b <= 32'h82e9f590; - 4'b1011: bram_reg_b <= 32'haaf06bba; - endcase + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h1b13ea8a; + 4'b0001: bram_reg_b <= 32'h8b574391; + 4'b0010: bram_reg_b <= 32'h8155ad27; + 4'b0011: bram_reg_b <= 32'h55fa1b42; + 4'b0100: bram_reg_b <= 32'hfb57ab8d; + 4'b0101: bram_reg_b <= 32'h4c117c3e; + 4'b0110: bram_reg_b <= 32'he8b0c8cf; + 4'b0111: bram_reg_b <= 32'h23c5893a; + 4'b1000: bram_reg_b <= 32'h19bea517; + 4'b1001: bram_reg_b <= 32'he29c71c2; + 4'b1010: bram_reg_b <= 32'h82e9f590; + 4'b1011: bram_reg_b <= 32'haaf06bba; + endcase endmodule diff --git a/rtl/curve/rom/brom_p384_h_y.v b/rtl/curve/rom/brom_p384_h_y.v index 98c59ed..c390e3d 100644 --- a/rtl/curve/rom/brom_p384_h_y.v +++ b/rtl/curve/rom/brom_p384_h_y.v @@ -33,39 +33,39 @@ `timescale 1ns / 1ps module brom_p384_h_y - ( - input wire clk, - input wire [ 4-1:0] b_addr, - output wire [32-1:0] b_out - ); + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); // // Output Registers // - reg [31:0] bram_reg_b; + reg [31:0] bram_reg_b; assign b_out = bram_reg_b; - + // // Read-Only Port B - // - always @(posedge clk) - // - case (b_addr) - 4'b0000: bram_reg_b <= 32'h6f15f19d; - 4'b0001: bram_reg_b <= 32'h85bce284; - 4'b0010: bram_reg_b <= 32'he2817e62; - 4'b0011: bram_reg_b <= 32'hf59f4e30; - 4'b0100: bram_reg_b <= 32'h4a0f473e; - 4'b0101: bram_reg_b <= 32'h1625ceec; - 4'b0110: bram_reg_b <= 32'hd765eb83; - 4'b0111: bram_reg_b <= 32'h070be242; - 4'b1000: bram_reg_b <= 32'h6d6d23d6; - 4'b1001: bram_reg_b <= 32'ha2616740; - 4'b1010: bram_reg_b <= 32'h69d9d390; - 4'b1011: bram_reg_b <= 32'hc9e821b5; - endcase + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h6f15f19d; + 4'b0001: bram_reg_b <= 32'h85bce284; + 4'b0010: bram_reg_b <= 32'he2817e62; + 4'b0011: bram_reg_b <= 32'hf59f4e30; + 4'b0100: bram_reg_b <= 32'h4a0f473e; + 4'b0101: bram_reg_b <= 32'h1625ceec; + 4'b0110: bram_reg_b <= 32'hd765eb83; + 4'b0111: bram_reg_b <= 32'h070be242; + 4'b1000: bram_reg_b <= 32'h6d6d23d6; + 4'b1001: bram_reg_b <= 32'ha2616740; + 4'b1010: bram_reg_b <= 32'h69d9d390; + 4'b1011: bram_reg_b <= 32'hc9e821b5; + endcase endmodule diff --git a/rtl/curve/rom/brom_p384_one.v b/rtl/curve/rom/brom_p384_one.v index fa8caa0..c8ec6c3 100644 --- a/rtl/curve/rom/brom_p384_one.v +++ b/rtl/curve/rom/brom_p384_one.v @@ -33,40 +33,40 @@ `timescale 1ns / 1ps module brom_p384_one - ( - input wire clk, - input wire [ 4-1:0] b_addr, - output wire [32-1:0] b_out - ); + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); // // Output Registers // - reg [31:0] bram_reg_b; + reg [31:0] bram_reg_b; assign b_out = bram_reg_b; // // Read-Only Port B - // - always @(posedge clk) - // - case (b_addr) - 4'b0000: bram_reg_b <= 32'h00000001; - 4'b0001: bram_reg_b <= 32'h00000000; - 4'b0010: bram_reg_b <= 32'h00000000; - 4'b0011: bram_reg_b <= 32'h00000000; - 4'b0100: bram_reg_b <= 32'h00000000; - 4'b0101: bram_reg_b <= 32'h00000000; - 4'b0110: bram_reg_b <= 32'h00000000; - 4'b0111: bram_reg_b <= 32'h00000000; - 4'b1000: bram_reg_b <= 32'h00000000; - 4'b1001: bram_reg_b <= 32'h00000000; - 4'b1010: bram_reg_b <= 32'h00000000; - 4'b1011: bram_reg_b <= 32'h00000000; - endcase + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h00000001; + 4'b0001: bram_reg_b <= 32'h00000000; + 4'b0010: bram_reg_b <= 32'h00000000; + 4'b0011: bram_reg_b <= 32'h00000000; + 4'b0100: bram_reg_b <= 32'h00000000; + 4'b0101: bram_reg_b <= 32'h00000000; + 4'b0110: bram_reg_b <= 32'h00000000; + 4'b0111: bram_reg_b <= 32'h00000000; + 4'b1000: bram_reg_b <= 32'h00000000; + 4'b1001: bram_reg_b <= 32'h00000000; + 4'b1010: bram_reg_b <= 32'h00000000; + 4'b1011: bram_reg_b <= 32'h00000000; + endcase endmodule diff --git a/rtl/curve/rom/brom_p384_q.v b/rtl/curve/rom/brom_p384_q.v index 497c634..7571305 100644 --- a/rtl/curve/rom/brom_p384_q.v +++ b/rtl/curve/rom/brom_p384_q.v @@ -33,40 +33,40 @@ `timescale 1ns / 1ps module brom_p384_q - ( - input wire clk, - input wire [ 4-1:0] b_addr, - output wire [32-1:0] b_out - ); + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); // // Output Registers // - reg [31:0] bram_reg_b; + reg [31:0] bram_reg_b; assign b_out = bram_reg_b; - + // // Read-Only Port B - // - always @(posedge clk) - // - case (b_addr) - 4'b0000: bram_reg_b <= 32'hffffffff; - 4'b0001: bram_reg_b <= 32'h00000000; - 4'b0010: bram_reg_b <= 32'h00000000; - 4'b0011: bram_reg_b <= 32'hffffffff; - 4'b0100: bram_reg_b <= 32'hfffffffe; - 4'b0101: bram_reg_b <= 32'hffffffff; - 4'b0110: bram_reg_b <= 32'hffffffff; - 4'b0111: bram_reg_b <= 32'hffffffff; - 4'b1000: bram_reg_b <= 32'hffffffff; - 4'b1001: bram_reg_b <= 32'hffffffff; - 4'b1010: bram_reg_b <= 32'hffffffff; - 4'b1011: bram_reg_b <= 32'hffffffff; - endcase + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'hffffffff; + 4'b0001: bram_reg_b <= 32'h00000000; + 4'b0010: bram_reg_b <= 32'h00000000; + 4'b0011: bram_reg_b <= 32'hffffffff; + 4'b0100: bram_reg_b <= 32'hfffffffe; + 4'b0101: bram_reg_b <= 32'hffffffff; + 4'b0110: bram_reg_b <= 32'hffffffff; + 4'b0111: bram_reg_b <= 32'hffffffff; + 4'b1000: bram_reg_b <= 32'hffffffff; + 4'b1001: bram_reg_b <= 32'hffffffff; + 4'b1010: bram_reg_b <= 32'hffffffff; + 4'b1011: bram_reg_b <= 32'hffffffff; + endcase + - endmodule diff --git a/rtl/curve/rom/brom_p384_zero.v b/rtl/curve/rom/brom_p384_zero.v index 5166391..efac8e8 100644 --- a/rtl/curve/rom/brom_p384_zero.v +++ b/rtl/curve/rom/brom_p384_zero.v @@ -33,10 +33,10 @@ `timescale 1ns / 1ps module brom_p384_zero - ( - output wire [32-1:0] b_out - ); - - assign b_out = {32{1'b0}}; + ( + output wire [32-1:0] b_out + ); + + assign b_out = {32{1'b0}}; endmodule diff --git a/rtl/curve/uop/uop_add_rom.v b/rtl/curve/uop/uop_add_rom.v deleted file mode 100644 index c807736..0000000 --- a/rtl/curve/uop/uop_add_rom.v +++ /dev/null @@ -1,66 +0,0 @@ -`timescale 1ns / 1ps - -module uop_add_rom - ( - input wire clk, - input wire [ 5: 0] addr, - output reg [19: 0] data - ); - - - // - // Microcode - // -`include "..\uop_ecdsa.v" - - - // - // Addition Microprogram - // - always @(posedge clk) - - case (addr) - -/* 2. */6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS}; -/* 3. */6'd01: data <= {OPCODE_MOV, UOP_SRC_PZ, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; - 6'd02: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS}; -/* 4. */6'd03: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS}; -/* 5. */6'd04: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_G_X, UOP_DST_T1, UOP_EXEC_ALWAYS}; -/* 6. */6'd05: data <= {OPCODE_MUL, UOP_SRC_T2, UOP_SRC_G_Y, UOP_DST_T2, UOP_EXEC_ALWAYS}; -/* 7. */6'd06: data <= {OPCODE_SUB, UOP_SRC_T1, UOP_SRC_PX, UOP_DST_T1, UOP_EXEC_ALWAYS}; -/* 8. */6'd07: data <= {OPCODE_SUB, UOP_SRC_T2, UOP_SRC_PY, UOP_DST_T2, UOP_EXEC_ALWAYS}; -/* 9. */6'd08: data <= {OPCODE_CMP, UOP_SRC_T1, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS}; - 6'd09: data <= {OPCODE_CMP, UOP_SRC_T2, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS}; -/* 10. */6'd10: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_RZ, UOP_EXEC_ALWAYS}; -/* 11. */6'd11: data <= {OPCODE_MOV, UOP_SRC_T1, UOP_SRC_DUMMY, UOP_DST_T3, UOP_EXEC_ALWAYS}; - 6'd12: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS}; -/* 12. */6'd13: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_T4, UOP_EXEC_ALWAYS}; -/* 13. */6'd14: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS}; -/* 14. */6'd15: data <= {OPCODE_ADD, UOP_SRC_T3, UOP_SRC_T3, UOP_DST_T1, UOP_EXEC_ALWAYS}; -/* 15. */6'd16: data <= {OPCODE_MOV, UOP_SRC_T2, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS}; - 6'd17: data <= {OPCODE_MUL, UOP_SRC_RX, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS}; -/* 16. */6'd18: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T1, UOP_DST_RX, UOP_EXEC_ALWAYS}; -/* 17. */6'd19: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T4, UOP_DST_RX, UOP_EXEC_ALWAYS}; -/* 18. */6'd20: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_RX, UOP_DST_T3, UOP_EXEC_ALWAYS}; -/* 19. */6'd21: data <= {OPCODE_MUL, UOP_SRC_T2, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS}; -/* 20. */6'd22: data <= {OPCODE_MUL, UOP_SRC_PY, UOP_SRC_T4, UOP_DST_T4, UOP_EXEC_ALWAYS}; -/* 21. */6'd23: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_T4, UOP_DST_RY, UOP_EXEC_ALWAYS}; - - 6'd24: data <= {OPCODE_MOV, UOP_SRC_G_X, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX}; - 6'd25: data <= {OPCODE_MOV, UOP_SRC_G_Y, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX}; - 6'd26: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_0XX}; - - 6'd27: data <= {OPCODE_MOV, UOP_SRC_H_X, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_100}; - 6'd28: data <= {OPCODE_MOV, UOP_SRC_H_Y, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_100}; - 6'd29: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_100}; - - 6'd30: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_101}; - 6'd31: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_101}; - 6'd32: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_101}; - - default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY}; - - endcase - - -endmodule diff --git a/rtl/curve/uop/uop_conv_rom.v b/rtl/curve/uop/uop_conv_rom.v deleted file mode 100644 index 3097736..0000000 --- a/rtl/curve/uop/uop_conv_rom.v +++ /dev/null @@ -1,38 +0,0 @@ -`timescale 1ns / 1ps - -module uop_conv_rom - ( - input wire clk, - input wire [ 5: 0] addr, - output reg [19: 0] data - ); - - - // - // Microcode - // -`include "..\uop_ecdsa.v" - - - // - // Doubling Microprogram - // - always @(posedge clk) - - case (addr) - - 6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS}; - 6'd01: data <= {OPCODE_MOV, UOP_SRC_V, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; - 6'd02: data <= {OPCODE_MUL, UOP_SRC_V, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS}; - 6'd03: data <= {OPCODE_MUL, UOP_SRC_V, UOP_SRC_T2, UOP_DST_T3, UOP_EXEC_ALWAYS}; - 6'd04: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS}; - 6'd05: data <= {OPCODE_MUL, UOP_SRC_PY, UOP_SRC_T3, UOP_DST_RY, UOP_EXEC_ALWAYS}; - 6'd06: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX}; - 6'd07: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX}; - - default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY}; - - endcase - - -endmodule diff --git a/rtl/curve/uop/uop_dbl_rom.v b/rtl/curve/uop/uop_dbl_rom.v deleted file mode 100644 index 1939ca9..0000000 --- a/rtl/curve/uop/uop_dbl_rom.v +++ /dev/null @@ -1,58 +0,0 @@ -`timescale 1ns / 1ps - -module uop_dbl_rom - ( - input wire clk, - input wire [ 5: 0] addr, - output reg [19: 0] data - ); - - - // - // Microcode - // -`include "..\uop_ecdsa.v" - - - // - // Doubling Microprogram - // - always @(posedge clk) - - case (addr) - -/* 1. */6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS}; -/* 2. */6'd01: data <= {OPCODE_MOV, UOP_SRC_PZ, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; - 5'd02: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS}; -/* 3. */6'd03: data <= {OPCODE_SUB, UOP_SRC_PX, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS}; -/* 4. */6'd04: data <= {OPCODE_ADD, UOP_SRC_PX, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS}; -/* 5. */6'd05: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T2, UOP_EXEC_ALWAYS}; -/* 6. */6'd06: data <= {OPCODE_ADD, UOP_SRC_T2, UOP_SRC_T2, UOP_DST_T1, UOP_EXEC_ALWAYS}; - 6'd07: data <= {OPCODE_ADD, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T2, UOP_EXEC_ALWAYS}; -/* 7. */6'd08: data <= {OPCODE_ADD, UOP_SRC_PY, UOP_SRC_PY, UOP_DST_RY, UOP_EXEC_ALWAYS}; -/* 8. */6'd09: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_RY, UOP_DST_RZ, UOP_EXEC_ALWAYS}; -/* 9. */6'd10: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; - 6'd11: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T3, UOP_EXEC_ALWAYS}; - 6'd12: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_RY, UOP_EXEC_ALWAYS}; -/* 10. */6'd13: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_RY, UOP_DST_T3, UOP_EXEC_ALWAYS}; -/* 11. */6'd14: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; - 6'd15: data <= {OPCODE_MUL, UOP_SRC_RY, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS}; -/* 12. */6'd16: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_DELTA, UOP_DST_RY, UOP_EXEC_ALWAYS}; -/* 13. */6'd17: data <= {OPCODE_MOV, UOP_SRC_T2, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; - 6'd18: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS}; -/* 14. */6'd19: data <= {OPCODE_ADD, UOP_SRC_T3, UOP_SRC_T3, UOP_DST_T1, UOP_EXEC_ALWAYS}; -/* 15. */6'd20: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T1, UOP_DST_RX, UOP_EXEC_ALWAYS}; -/* 16. */6'd21: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_RX, UOP_DST_T1, UOP_EXEC_ALWAYS}; -/* 17. */6'd22: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T1, UOP_EXEC_ALWAYS}; -/* 18. */6'd23: data <= {OPCODE_SUB, UOP_SRC_T1, UOP_SRC_RY, UOP_DST_RY, UOP_EXEC_ALWAYS}; - - 6'd24: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX}; - 6'd25: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX}; - 6'd26: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_0XX}; - - default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY}; - - endcase - - -endmodule diff --git a/rtl/curve/uop/uop_init_rom.v b/rtl/curve/uop/uop_init_rom.v deleted file mode 100644 index ac44b55..0000000 --- a/rtl/curve/uop/uop_init_rom.v +++ /dev/null @@ -1,33 +0,0 @@ -`timescale 1ns / 1ps - -module uop_init_rom - ( - input wire clk, - input wire [ 5: 0] addr, - output reg [19: 0] data - ); - - - // - // Microcode - // -`include "..\uop_ecdsa.v" - - - // - // Doubling Microprogram - // - always @(posedge clk) - - case (addr) - - 6'd00: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS}; - 6'd01: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_ALWAYS}; - 6'd02: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_ALWAYS}; - - default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY}; - - endcase - - -endmodule diff --git a/rtl/curve/uop_ecdsa.v b/rtl/curve/uop_ecdsa.v deleted file mode 100644 index e64119d..0000000 --- a/rtl/curve/uop_ecdsa.v +++ /dev/null @@ -1,50 +0,0 @@ -localparam [ 4: 0] OPCODE_CMP = 5'b00001; -localparam [ 4: 0] OPCODE_MOV = 5'b00010; -localparam [ 4: 0] OPCODE_ADD = 5'b00100; -localparam [ 4: 0] OPCODE_SUB = 5'b01000; -localparam [ 4: 0] OPCODE_MUL = 5'b10000; -localparam [ 4: 0] OPCODE_RDY = 5'b00000; - -localparam [ 4: 0] UOP_SRC_PX = 5'h0_0; -localparam [ 4: 0] UOP_SRC_PY = 5'h0_1; -localparam [ 4: 0] UOP_SRC_PZ = 5'h0_2; - -localparam [ 4: 0] UOP_SRC_RX = 5'h0_3; -localparam [ 4: 0] UOP_SRC_RY = 5'h0_4; -localparam [ 4: 0] UOP_SRC_RZ = 5'h0_5; - -localparam [ 4: 0] UOP_SRC_T1 = 5'h0_6; -localparam [ 4: 0] UOP_SRC_T2 = 5'h0_7; -localparam [ 4: 0] UOP_SRC_T3 = 5'h0_8; -localparam [ 4: 0] UOP_SRC_T4 = 5'h0_9; - -localparam [ 4: 0] UOP_SRC_ONE = 5'h0_A; -localparam [ 4: 0] UOP_SRC_ZERO = 5'h0_B; -localparam [ 4: 0] UOP_SRC_DELTA = 5'h0_C; - -localparam [ 4: 0] UOP_SRC_V = 5'h0_F; - -localparam [ 4: 0] UOP_SRC_G_X = 5'h1_0; -localparam [ 4: 0] UOP_SRC_G_Y = 5'h1_1; - -localparam [ 4: 0] UOP_SRC_H_X = 5'h1_2; -localparam [ 4: 0] UOP_SRC_H_Y = 5'h1_3; - -localparam [ 4: 0] UOP_SRC_DUMMY = 5'hX_X; - -localparam [ 2: 0] UOP_DST_RX = 3'd0; -localparam [ 2: 0] UOP_DST_RY = 3'd1; -localparam [ 2: 0] UOP_DST_RZ = 3'd2; - -localparam [ 2: 0] UOP_DST_T1 = 3'd3; -localparam [ 2: 0] UOP_DST_T2 = 3'd4; -localparam [ 2: 0] UOP_DST_T3 = 3'd5; -localparam [ 2: 0] UOP_DST_T4 = 3'd6; - -localparam [ 2: 0] UOP_DST_DUMMY = 3'dX; - -localparam UOP_EXEC_ALWAYS = 2'b11; // R -localparam UOP_EXEC_PZT1T2_0XX = 2'b10; // G -localparam UOP_EXEC_PZT1T2_100 = 2'b00; // H -localparam UOP_EXEC_PZT1T2_101 = 2'b01; // O - diff --git a/rtl/ecdsa384.v b/rtl/ecdsa384.v index cefef14..7a63c9e 100644 --- a/rtl/ecdsa384.v +++ b/rtl/ecdsa384.v @@ -34,115 +34,115 @@ module ecdsa384 ( - input wire clk, - input wire rst_n, + input wire clk, + input wire rst_n, - input wire next, - output wire valid, + input wire next, + output wire valid, - input wire bus_cs, - input wire bus_we, - input wire [ 5:0] bus_addr, - input wire [31:0] bus_data_wr, + input wire bus_cs, + input wire bus_we, + input wire [ 5:0] bus_addr, + input wire [31:0] bus_data_wr, output wire [31:0] bus_data_rd ); - - // - // Memory Banks - // - localparam [1:0] BUS_ADDR_BANK_K = 2'b00; - localparam [1:0] BUS_ADDR_BANK_X = 2'b01; - localparam [1:0] BUS_ADDR_BANK_Y = 2'b10; - - wire [1:0] bus_addr_upper = bus_addr[5:4]; - wire [3:0] bus_addr_lower = bus_addr[3:0]; - - + + // + // Memory Banks + // + localparam [1:0] BUS_ADDR_BANK_K = 2'b00; + localparam [1:0] BUS_ADDR_BANK_X = 2'b01; + localparam [1:0] BUS_ADDR_BANK_Y = 2'b10; + + wire [1:0] bus_addr_upper = bus_addr[5:4]; + wire [3:0] bus_addr_lower = bus_addr[3:0]; + + // // Memories - // - - wire [31:0] user_rw_k_bram_out; - wire [31:0] user_ro_x_bram_out; - wire [31:0] user_ro_y_bram_out; - - wire [ 3:0] core_ro_k_bram_addr; - wire [ 3:0] core_rw_x_bram_addr; - wire [ 3:0] core_rw_y_bram_addr; - - wire core_rw_x_bram_wren; - wire core_rw_y_bram_wren; - - wire [31:0] core_ro_k_bram_dout; - wire [31:0] core_rw_x_bram_din; - wire [31:0] core_rw_y_bram_din; - - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(4) + // + + wire [31:0] user_rw_k_bram_out; + wire [31:0] user_ro_x_bram_out; + wire [31:0] user_ro_y_bram_out; + + wire [ 3:0] core_ro_k_bram_addr; + wire [ 3:0] core_rw_x_bram_addr; + wire [ 3:0] core_rw_y_bram_addr; + + wire core_rw_x_bram_wren; + wire core_rw_y_bram_wren; + + wire [31:0] core_ro_k_bram_dout; + wire [31:0] core_rw_x_bram_din; + wire [31:0] core_rw_y_bram_din; + + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(4) ) - bram_k - ( .clk(clk), - .a_addr(bus_addr_lower), .a_out(user_rw_k_bram_out), .a_wr(bus_cs && bus_we && (bus_addr_upper == BUS_ADDR_BANK_K)), .a_in(bus_data_wr), - .b_addr(core_ro_k_bram_addr), .b_out(core_ro_k_bram_dout) - ); - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(4) + bram_k + ( .clk(clk), + .a_addr(bus_addr_lower), .a_out(user_rw_k_bram_out), .a_wr(bus_cs && bus_we && (bus_addr_upper == BUS_ADDR_BANK_K)), .a_in(bus_data_wr), + .b_addr(core_ro_k_bram_addr), .b_out(core_ro_k_bram_dout) + ); + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(4) ) - bram_x - ( .clk(clk), - .a_addr(core_rw_x_bram_addr), .a_out(), .a_wr(core_rw_x_bram_wren), .a_in(core_rw_x_bram_din), - .b_addr(bus_addr_lower), .b_out(user_ro_x_bram_out) - ); - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(4) + bram_x + ( .clk(clk), + .a_addr(core_rw_x_bram_addr), .a_out(), .a_wr(core_rw_x_bram_wren), .a_in(core_rw_x_bram_din), + .b_addr(bus_addr_lower), .b_out(user_ro_x_bram_out) + ); + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(4) ) - bram_y - ( .clk(clk), - .a_addr(core_rw_y_bram_addr), .a_out(), .a_wr(core_rw_y_bram_wren), .a_in(core_rw_y_bram_din), - .b_addr(bus_addr_lower), .b_out(user_ro_y_bram_out) - ); + bram_y + ( .clk(clk), + .a_addr(core_rw_y_bram_addr), .a_out(), .a_wr(core_rw_y_bram_wren), .a_in(core_rw_y_bram_din), + .b_addr(bus_addr_lower), .b_out(user_ro_y_bram_out) + ); // // Curve Base Point Multiplier - // - reg next_dly; - - always @(posedge clk) next_dly <= next; - - wire next_trig = next && !next_dly; - - curve_mul_384 base_point_multiplier_p384 - ( - .clk (clk), - .rst_n (rst_n), - - .ena (next_trig), - .rdy (valid), - - .k_addr (core_ro_k_bram_addr), - .rx_addr (core_rw_x_bram_addr), - .ry_addr (core_rw_y_bram_addr), - - .rx_wren (core_rw_x_bram_wren), - .ry_wren (core_rw_y_bram_wren), - - .k_din (core_ro_k_bram_dout), - .rx_dout (core_rw_x_bram_din), - .ry_dout (core_rw_y_bram_din) - ); + // + reg next_dly; + + always @(posedge clk) next_dly <= next; - // + wire next_trig = next && !next_dly; + + curve_mul_384 base_point_multiplier_p384 + ( + .clk (clk), + .rst_n (rst_n), + + .ena (next_trig), + .rdy (valid), + + .k_addr (core_ro_k_bram_addr), + .rx_addr (core_rw_x_bram_addr), + .ry_addr (core_rw_y_bram_addr), + + .rx_wren (core_rw_x_bram_wren), + .ry_wren (core_rw_y_bram_wren), + + .k_din (core_ro_k_bram_dout), + .rx_dout (core_rw_x_bram_din), + .ry_dout (core_rw_y_bram_din) + ); + + // // Output Selector // - reg [1:0] bus_addr_upper_prev; + reg [1:0] bus_addr_upper_prev; always @(posedge clk) bus_addr_upper_prev = bus_addr_upper; - reg [31: 0] bus_data_rd_mux; + reg [31: 0] bus_data_rd_mux; assign bus_data_rd = bus_data_rd_mux; always @(*) diff --git a/rtl/ecdsa384_wrapper.v b/rtl/ecdsa384_wrapper.v index feb10b4..10e7c9c 100644 --- a/rtl/ecdsa384_wrapper.v +++ b/rtl/ecdsa384_wrapper.v @@ -32,11 +32,11 @@ module ecdsa384_wrapper ( - input wire clk, - input wire rst_n, + input wire clk, + input wire reset_n, - input wire cs, - input wire we, + input wire cs, + input wire we, input wire [6: 0] address, input wire [31: 0] write_data, @@ -48,17 +48,17 @@ module ecdsa384_wrapper // Address Decoder // localparam ADDR_MSB_REGS = 1'b0; - localparam ADDR_MSB_CORE = 1'b1; - - wire [0:0] addr_msb = address[6]; - wire [5:0] addr_lsb = address[5:0]; + localparam ADDR_MSB_CORE = 1'b1; + + wire [0:0] addr_msb = address[6]; + wire [5:0] addr_lsb = address[5:0]; // // Output Mux // - wire [31: 0] read_data_regs; - wire [31: 0] read_data_core; + wire [31: 0] read_data_regs; + wire [31: 0] read_data_core; // @@ -70,12 +70,12 @@ module ecdsa384_wrapper localparam ADDR_CONTROL = 6'h08; // {next, init} localparam ADDR_STATUS = 6'h09; // {valid, ready} - localparam ADDR_DUMMY = 6'h0F; // don't care + localparam ADDR_DUMMY = 6'h0F; // don't care -// localparam CONTROL_INIT_BIT = 0; -- not used + // localparam CONTROL_INIT_BIT = 0; -- not used localparam CONTROL_NEXT_BIT = 1; -// localparam STATUS_READY_BIT = 0; -- hardcoded to always read 1 + // localparam STATUS_READY_BIT = 0; -- hardcoded to always read 1 localparam STATUS_VALID_BIT = 1; localparam CORE_NAME0 = 32'h65636473; // "ecds" @@ -86,23 +86,23 @@ module ecdsa384_wrapper // // Registers // - reg reg_control; - reg [31:0] reg_dummy; + reg reg_control; + reg [31:0] reg_dummy; // // Wires // - wire reg_status; + wire reg_status; // // ECDSA384 // ecdsa384 ecdsa384_inst - ( + ( .clk (clk), - .rst_n (rst_n), + .rst_n (reset_n), .next (reg_control), .valid (reg_status), @@ -112,13 +112,13 @@ module ecdsa384_wrapper .bus_addr (addr_lsb), .bus_data_wr (write_data), .bus_data_rd (read_data_core) - ); + ); // // Read Latch // - reg [31: 0] tmp_read_data; + reg [31: 0] tmp_read_data; // @@ -126,7 +126,7 @@ module ecdsa384_wrapper // always @(posedge clk) // - if (!rst_n) begin + if (!reset_n) begin // reg_control <= 1'b0; // @@ -139,7 +139,7 @@ module ecdsa384_wrapper case (addr_lsb) // ADDR_CONTROL: reg_control <= write_data[1]; - ADDR_DUMMY: reg_dummy <= write_data[31:0]; + ADDR_DUMMY: reg_dummy <= write_data[31:0]; // endcase // @@ -154,7 +154,7 @@ module ecdsa384_wrapper ADDR_VERSION: tmp_read_data <= CORE_VERSION; ADDR_CONTROL: tmp_read_data <= {{30{1'b0}}, reg_control, 1'b0}; ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status, 1'b1}; - ADDR_DUMMY: tmp_read_data <= reg_dummy; + ADDR_DUMMY: tmp_read_data <= reg_dummy; // default: tmp_read_data <= 32'h00000000; // diff --git a/rtl/lowlevel/adder32_wrapper.v b/rtl/lowlevel/adder32_wrapper.v deleted file mode 100644 index ebfd8ce..0000000 --- a/rtl/lowlevel/adder32_wrapper.v +++ /dev/null @@ -1,73 +0,0 @@ -//------------------------------------------------------------------------------ -// -// adder32_wrapper.v -// ----------------------------------------------------------------------------- -// Wrapper for 32-bit adder. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module adder32_wrapper - ( - input clk, // clock - input [31: 0] a, // operand input - input [31: 0] b, // operand input - output [31: 0] s, // sum output - input c_in, // carry input - output c_out // carry output - ); - - // - // Include Primitive Selector - // -`include "ecdsa_lowlevel_settings.v" - - - // - // Instantiate Vendor/Generic Primitive - // - `ADDER32_PRIMITIVE adder32_inst - ( - .clk(clk), - .a(a), - .b(b), - .s(s), - .c_in(c_in), - .c_out(c_out) - ); - - -endmodule - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/adder47_wrapper.v b/rtl/lowlevel/adder47_wrapper.v deleted file mode 100644 index 1a0a18e..0000000 --- a/rtl/lowlevel/adder47_wrapper.v +++ /dev/null @@ -1,69 +0,0 @@ -//------------------------------------------------------------------------------ -// -// adder47_wrapper.v -// ----------------------------------------------------------------------------- -// Wrapper for 47-bit adder. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module adder47_wrapper - ( - input clk, // clock - input [46: 0] a, // operand input - input [46: 0] b, // operand input - output [46: 0] s // sum output - ); - - // - // Include Primitive Selector - // -`include "ecdsa_lowlevel_settings.v" - - - // - // Instantiate Vendor/Generic Primitive - // - `ADDER47_PRIMITIVE adder47_inst - ( - .clk(clk), - .a(a), - .b(b), - .s(s) - ); - - -endmodule - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/artix7/adder32_artix7.v b/rtl/lowlevel/artix7/adder32_artix7.v deleted file mode 100644 index 5f9ba79..0000000 --- a/rtl/lowlevel/artix7/adder32_artix7.v +++ /dev/null @@ -1,96 +0,0 @@ -//------------------------------------------------------------------------------ -// -// adder32_artix7.v -// ----------------------------------------------------------------------------- -// Hardware (Artix-7 DSP48E1) 32-bit adder. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module adder32_artix7 - ( - input clk, // clock - input [31: 0] a, // operand input - input [31: 0] b, // operand input - output [31: 0] s, // sum output - input c_in, // carry input - output c_out // carry output - ); - - // - // Lower and higher parts of operand - // - wire [17: 0] bl = b[17: 0]; - wire [13: 0] bh = b[31:18]; - - - // - // DSP48E1 Slice - // - - /* Operation Mode */ - wire [ 3: 0] dsp48e1_alumode = 4'b0000; - wire [ 6: 0] dsp48e1_opmode = 7'b0110011; - - /* Internal Product */ - wire [47: 0] p_int; - - dsp48e1_wrapper dsp_adder - ( - .clk (clk), - - .ce (1'b1), - - .carry (c_in), - - .alumode (dsp48e1_alumode), - .opmode (dsp48e1_opmode), - - .a ({{16{1'b0}}, bh}), - .b (bl), - .c ({{16{1'b0}}, a}), - - .p (p_int) - ); - - // - // Output Mapping - // - assign s = p_int[31: 0]; - assign c_out = p_int[32]; - - -endmodule - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/artix7/adder47_artix7.v b/rtl/lowlevel/artix7/adder47_artix7.v deleted file mode 100644 index 00566e4..0000000 --- a/rtl/lowlevel/artix7/adder47_artix7.v +++ /dev/null @@ -1,91 +0,0 @@ -//------------------------------------------------------------------------------ -// -// adder47_artix7.v -// ----------------------------------------------------------------------------- -// Hardware (Artix-7 DSP48E1) 47-bit adder. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module adder47_artix7 - ( - input clk, // clock - input [46: 0] a, // operand input - input [46: 0] b, // operand input - output [46: 0] s // sum output - ); - - // - // Lower and higher parts of operand - // - wire [17: 0] bl = b[17: 0]; - wire [28: 0] bh = b[46:18]; - - // - // DSP48E1 Slice - // - - /* Operation Mode */ - wire [ 3: 0] dsp48e1_alumode = 4'b0000; - wire [ 6: 0] dsp48e1_opmode = 7'b0110011; - - /* Internal Product */ - wire [47: 0] p_int; - - dsp48e1_wrapper dsp_adder - ( - .clk (clk), - - .ce (1'b1), - - .carry (1'b0), - - .alumode (dsp48e1_alumode), - .opmode (dsp48e1_opmode), - - .a ({1'b0, bh}), - .b (bl), - .c ({1'b0, a}), - - .p (p_int) - ); - - // - // Output Mapping - // - assign s = p_int[46: 0]; - -endmodule - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/artix7/dsp48e1_wrapper.v b/rtl/lowlevel/artix7/dsp48e1_wrapper.v deleted file mode 100644 index 11a21bc..0000000 --- a/rtl/lowlevel/artix7/dsp48e1_wrapper.v +++ /dev/null @@ -1,159 +0,0 @@ -//------------------------------------------------------------------------------ -// -// dsp48e1_wrapper.v -// ----------------------------------------------------------------------------- -// Hardware (Artix-7 DSP48E1) tile wrapper. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module dsp48e1_wrapper - ( - input clk, - - input ce, - - input [ 6: 0] opmode, - input [ 3: 0] alumode, - - input carry, - - input [29: 0] a, - input [17: 0] b, - input [47: 0] c, - - output [47: 0] p - ); - - - // - // Tile instantiation - // - DSP48E1 # - ( - .AREG (0), - .BREG (0), - .CREG (0), - .DREG (0), - .MREG (0), - .PREG (1), - .ADREG (0), - - .ACASCREG (0), - .BCASCREG (0), - .ALUMODEREG (0), - .INMODEREG (0), - .OPMODEREG (0), - .CARRYINREG (0), - .CARRYINSELREG (0), - - .A_INPUT ("DIRECT"), - .B_INPUT ("DIRECT"), - - .USE_DPORT ("FALSE"), - .USE_MULT ("DYNAMIC"), - .USE_SIMD ("ONE48"), - - .USE_PATTERN_DETECT ("NO_PATDET"), - .SEL_PATTERN ("PATTERN"), - .SEL_MASK ("MASK"), - .PATTERN (48'h000000000000), - .MASK (48'h3fffffffffff), - .AUTORESET_PATDET ("NO_RESET") - ) - DSP48E1_inst - ( - .CLK (clk), - - .RSTA (1'b0), - .RSTB (1'b0), - .RSTC (1'b0), - .RSTD (1'b0), - .RSTM (1'b0), - .RSTP (1'b0), - - .RSTCTRL (1'b0), - .RSTINMODE (1'b0), - .RSTALUMODE (1'b0), - .RSTALLCARRYIN (1'b0), - - .CEA1 (1'b0), - .CEA2 (1'b0), - .CEB1 (1'b0), - .CEB2 (1'b0), - .CEC (1'b0), - .CED (1'b0), - .CEM (1'b0), - .CEP (ce), - .CEAD (1'b0), - .CEALUMODE (1'b0), - .CEINMODE (1'b0), - - .CECTRL (1'b0), - .CECARRYIN (1'b0), - - .A (a), - .B (b), - .C (c), - .D ({25{1'b1}}), - .P (p), - - .CARRYIN (carry), - .CARRYOUT (), - .CARRYINSEL (3'b000), - - .CARRYCASCIN (1'b0), - .CARRYCASCOUT (), - - .PATTERNDETECT (), - .PATTERNBDETECT (), - - .OPMODE (opmode), - .ALUMODE (alumode), - .INMODE (5'b00000), - - .MULTSIGNIN (1'b0), - .MULTSIGNOUT (), - - .UNDERFLOW (), - .OVERFLOW (), - - .ACIN (30'd0), - .BCIN (18'd0), - .PCIN (48'd0), - - .ACOUT (), - .BCOUT (), - .PCOUT () - ); - -endmodule diff --git a/rtl/lowlevel/artix7/mac16_artix7.v b/rtl/lowlevel/artix7/mac16_artix7.v deleted file mode 100644 index 63b74ab..0000000 --- a/rtl/lowlevel/artix7/mac16_artix7.v +++ /dev/null @@ -1,90 +0,0 @@ -//------------------------------------------------------------------------------ -// -// mac16_artix7.v -// ----------------------------------------------------------------------------- -// Hardware (Artix-7 DSP48E1) 16-bit multiplier and 47-bit accumulator. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module mac16_artix7 - ( - input clk, // clock - input clr, // clear accumulator (active-high) - input ce, // enable clock (active-high) - input [15: 0] a, // operand input - input [15: 0] b, // operand input - output [46: 0] s // sum output - ); - - - // - // DSP48E1 Slice - // - - /* Operation Mode */ - wire [ 3: 0] dsp48e1_alumode = 4'b0000; - wire [ 6: 0] dsp48e1_opmode = {2'b01, clr, 4'b0101}; - - /* Internal Product */ - wire [47: 0] p_int; - - dsp48e1_wrapper dsp_adder - ( - .clk (clk), - - .ce (ce), - - .carry (1'b0), - - .alumode (dsp48e1_alumode), - .opmode (dsp48e1_opmode), - - .a ({{14{1'b0}}, a}), - .b ({{ 2{1'b0}}, b}), - .c ({48{1'b0}}), - - .p (p_int) - ); - - // - // Output Mapping - // - assign s = p_int[46:0]; - - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/artix7/subtractor32_artix7.v b/rtl/lowlevel/artix7/subtractor32_artix7.v deleted file mode 100644 index b46ac5c..0000000 --- a/rtl/lowlevel/artix7/subtractor32_artix7.v +++ /dev/null @@ -1,94 +0,0 @@ -//------------------------------------------------------------------------------ -// -// subtractor32_artix7.v -// ----------------------------------------------------------------------------- -// Hardware (Artix-7 DSP48E1) 32-bit subtractor. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module subtractor32_artix7 - ( - input clk, - input [31: 0] a, - input [31: 0] b, - output [31: 0] d, - input b_in, - output b_out - ); - - // - // Lower and higher parts of operand - // - wire [17: 0] bl = b[17: 0]; - wire [13: 0] bh = b[31:18]; - - // - // DSP48E1 Slice - // - - /* Operation Mode */ - wire [ 3: 0] dsp48e1_alumode = 4'b0011; - wire [ 6: 0] dsp48e1_opmode = 7'b0110011; - - /* Internal Product */ - wire [47: 0] p_int; - - dsp48e1_wrapper dsp_subtractor - ( - .clk (clk), - - .ce (1'b1), - - .carry (b_in), - - .alumode (dsp48e1_alumode), - .opmode (dsp48e1_opmode), - - .a ({{16{1'b0}}, bh}), - .b (bl), - .c ({{16{1'b0}}, a}), - - .p (p_int) - ); - - // - // Output Mapping - // - assign d = p_int[31: 0]; - assign b_out = p_int[32]; - -endmodule - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/ecdsa_lowlevel_settings.v b/rtl/lowlevel/ecdsa_lowlevel_settings.v deleted file mode 100644 index 8f95e2f..0000000 --- a/rtl/lowlevel/ecdsa_lowlevel_settings.v +++ /dev/null @@ -1,17 +0,0 @@ -`define USE_VENDOR_PRIMITIVES - -`ifdef USE_VENDOR_PRIMITIVES - -`define MAC16_PRIMITIVE mac16_artix7 -`define ADDER32_PRIMITIVE adder32_artix7 -`define ADDER47_PRIMITIVE adder47_artix7 -`define SUBTRACTOR32_PRIMITIVE subtractor32_artix7 - -`else - -`define MAC16_PRIMITIVE mac16_generic -`define ADDER32_PRIMITIVE adder32_generic -`define ADDER47_PRIMITIVE adder47_generic -`define SUBTRACTOR32_PRIMITIVE subtractor32_generic - -`endif diff --git a/rtl/lowlevel/generic/adder32_generic.v b/rtl/lowlevel/generic/adder32_generic.v deleted file mode 100644 index b9c94aa..0000000 --- a/rtl/lowlevel/generic/adder32_generic.v +++ /dev/null @@ -1,67 +0,0 @@ -//------------------------------------------------------------------------------ -// -// adder32_generic.v -// ----------------------------------------------------------------------------- -// Generic 32-bit adder. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module adder32_generic - ( - input clk, // clock - input [31: 0] a, // operand input - input [31: 0] b, // operand input - output [31: 0] s, // sum output - input c_in, // carry input - output c_out // carry output - ); - - // - // Sum - // - reg [32: 0] s_int; - - always @(posedge clk) - s_int <= {1'b0, a} + {1'b0, b} + {{32{1'b0}}, c_in}; - - // - // Output - // - assign s = s_int[31:0]; - assign c_out = s_int[32]; - -endmodule - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/generic/adder47_generic.v b/rtl/lowlevel/generic/adder47_generic.v deleted file mode 100644 index f472061..0000000 --- a/rtl/lowlevel/generic/adder47_generic.v +++ /dev/null @@ -1,64 +0,0 @@ -//------------------------------------------------------------------------------ -// -// adder47_generic.v -// ----------------------------------------------------------------------------- -// Generic 47-bit adder. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module adder47_generic - ( - input clk, // clock - input [46: 0] a, // operand input - input [46: 0] b, // operand input - output [46: 0] s // sum output - ); - - // - // Sum - // - reg [46: 0] s_int; - - always @(posedge clk) - s_int <= a + b; - - // - // Output - // - assign s = s_int; - -endmodule - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/generic/mac16_generic.v b/rtl/lowlevel/generic/mac16_generic.v deleted file mode 100644 index dc95645..0000000 --- a/rtl/lowlevel/generic/mac16_generic.v +++ /dev/null @@ -1,74 +0,0 @@ -//------------------------------------------------------------------------------ -// -// mac16_generic.v -// ----------------------------------------------------------------------------- -// Generic 16-bit multiplier and 47-bit accumulator. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module mac16_generic - ( - input clk, // clock - input clr, // clear accumulator (active-high) - input ce, // enable clock (active-high) - input [15: 0] a, // operand input - input [15: 0] b, // operand input - output [46: 0] s // sum output - ); - - // - // Multiplier - // - wire [31: 0] p = {{16{1'b0}}, a} * {{16{1'b0}}, b}; - wire [46: 0] p_ext = {{15{1'b0}}, p}; - - // - // Accumulator - // - reg [46: 0] s_int; - - always @(posedge clk) - // - if (ce) s_int <= clr ? p_ext : p_ext + s_int; - - // - // Output - // - assign s = s_int; - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/generic/subtractor32_generic.v b/rtl/lowlevel/generic/subtractor32_generic.v deleted file mode 100644 index 46aefe8..0000000 --- a/rtl/lowlevel/generic/subtractor32_generic.v +++ /dev/null @@ -1,67 +0,0 @@ -//------------------------------------------------------------------------------ -// -// subtractor32_generic.v -// ----------------------------------------------------------------------------- -// Generic 32-bit subtractor. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module subtractor32_generic - ( - input clk, - input [31: 0] a, - input [31: 0] b, - output [31: 0] d, - input b_in, - output b_out - ); - - // - // Difference - // - reg [32: 0] d_int; - - always @(posedge clk) - d_int <= {1'b0, a} - {1'b0, b} - {{32{1'b0}}, b_in}; - - // - // Output - // - assign d = d_int[31:0]; - assign b_out = d_int[32]; - -endmodule - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/mac16_wrapper.v b/rtl/lowlevel/mac16_wrapper.v deleted file mode 100644 index b91e518..0000000 --- a/rtl/lowlevel/mac16_wrapper.v +++ /dev/null @@ -1,75 +0,0 @@ -//------------------------------------------------------------------------------ -// -// mac16_wrapper.v -// ----------------------------------------------------------------------------- -// Wrapper for 16-bit multiplier and 48-bit accumulator. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module mac16_wrapper - ( - input clk, // clock - input clr, // clear accumulator (active-high) - input ce, // enable clock (active-high) - input [15: 0] a, // operand input - input [15: 0] b, // operand input - output [46: 0] s // sum output - ); - - - // - // Include Primitive Selector - // -`include "ecdsa_lowlevel_settings.v" - - - // - // Instantiate Vendor/Generic Primitive - // - `MAC16_PRIMITIVE mac16_inst - ( - .clk(clk), - .clr(clr), - .ce(ce), - .a(a), - .b(b), - .s(s) - ); - - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/subtractor32_wrapper.v b/rtl/lowlevel/subtractor32_wrapper.v deleted file mode 100644 index 3c7e5e9..0000000 --- a/rtl/lowlevel/subtractor32_wrapper.v +++ /dev/null @@ -1,72 +0,0 @@ -//------------------------------------------------------------------------------ -// -// subtractor32_wrapper.v -// ----------------------------------------------------------------------------- -// Wrapper for 32-bit subtractor. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module subtractor32_wrapper - ( - input clk, - input [31: 0] a, - input [31: 0] b, - output [31: 0] d, - input b_in, - output b_out - ); - - // - // Include Primitive Selector - // -`include "ecdsa_lowlevel_settings.v" - - - // - // Instantiate Vendor/Generic Primitive - // - `SUBTRACTOR32_PRIMITIVE subtractor32_inst - ( - .clk(clk), - .a(a), - .b(b), - .d(d), - .b_in(b_in), - .b_out(b_out) - ); - -endmodule - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_adder.v b/rtl/modular/modular_adder.v deleted file mode 100644 index 5641feb..0000000 --- a/rtl/modular/modular_adder.v +++ /dev/null @@ -1,298 +0,0 @@ -//------------------------------------------------------------------------------ -// -// modular_adder.v -// ----------------------------------------------------------------------------- -// Modular adder. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module modular_adder - ( - clk, rst_n, - ena, rdy, - ab_addr, n_addr, s_addr, s_wren, - a_din, b_din, n_din, s_dout - ); - - - // - // Parameters - // - parameter OPERAND_NUM_WORDS = 8; - parameter WORD_COUNTER_WIDTH = 3; - - - // - // Handy Numbers - // - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; - - - // - // Handy Functions - // - function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; - input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; - begin - WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? - WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; - end - endfunction - - - // - // Ports - // - input wire clk; // system clock - input wire rst_n; // active-low async reset - - input wire ena; // enable input - output wire rdy; // ready output - - output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words - output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word - output wire [WORD_COUNTER_WIDTH-1:0] s_addr; // index of current S word - output wire s_wren; // store current S word now - - input wire [ 31:0] a_din; // A - input wire [ 31:0] b_din; // B - input wire [ 31:0] n_din; // N - output wire [ 31:0] s_dout; // S = (A + B) mod N - - - // - // Word Indices - // - reg [WORD_COUNTER_WIDTH-1:0] index_ab; - reg [WORD_COUNTER_WIDTH-1:0] index_n; - reg [WORD_COUNTER_WIDTH-1:0] index_s; - - /* map registers to output ports */ - assign ab_addr = index_ab; - assign n_addr = index_n; - assign s_addr = index_s; - - - // - // Adder - // - wire [31: 0] add32_s; - wire add32_c_in; - wire add32_c_out; - - adder32_wrapper adder32 - ( - .clk (clk), - .a (a_din), - .b (b_din), - .s (add32_s), - .c_in (add32_c_in), - .c_out (add32_c_out) - ); - - - // - // Subtractor - // - wire [31: 0] sub32_d; - wire sub32_b_in; - wire sub32_b_out; - - subtractor32_wrapper subtractor32 - ( - .clk (clk), - .a (add32_s), - .b (n_din), - .d (sub32_d), - .b_in (sub32_b_in), - .b_out (sub32_b_out) - ); - - - // - // FSM - // - - localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5; - - reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; - - assign rdy = fsm_shreg[0]; - - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)]; - - wire fsm_latch_msb_carry = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; - wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)]; - - wire inc_index_ab = |fsm_shreg_inc_index_ab; - wire inc_index_n = |fsm_shreg_inc_index_n; - wire store_sum_ab = |fsm_shreg_store_sum_ab; - wire store_sum_ab_n = |fsm_shreg_store_sum_ab_n; - wire store_data_s = |fsm_shreg_store_data_s; - wire inc_index_s = |fsm_shreg_inc_index_s; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) - // - fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; - // - else begin - // - if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; - // - else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; - // - end - - - - - - - - // - // Carry & Borrow Masking Logic - // - reg add32_c_mask; - reg sub32_b_mask; - - always @(posedge clk) begin - // - add32_c_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0; - sub32_b_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0; - // - end - - assign add32_c_in = add32_c_out & ~add32_c_mask; - assign sub32_b_in = sub32_b_out & ~sub32_b_mask; - - - // - // Carry & Borrow Latch Logic - // - reg add32_carry_latch; - reg sub32_borrow_latch; - - always @(posedge clk) begin - // - if (fsm_latch_msb_carry) add32_carry_latch <= add32_c_out; - if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out; - // - end - - - // - // Intermediate Results - // - reg [32*OPERAND_NUM_WORDS-1:0] s_ab; - reg [32*OPERAND_NUM_WORDS-1:0] s_ab_n; - - always @(posedge clk) - // - if (store_data_s) begin - // - s_ab <= {{32{1'bX}}, s_ab[32*OPERAND_NUM_WORDS-1:32]}; - s_ab_n <= {{32{1'bX}}, s_ab_n[32*OPERAND_NUM_WORDS-1:32]}; - // - end else begin - // - if (store_sum_ab) s_ab <= {add32_s, s_ab[32*OPERAND_NUM_WORDS-1:32]}; - if (store_sum_ab_n) s_ab_n <= {sub32_d, s_ab_n[32*OPERAND_NUM_WORDS-1:32]}; - // - end - - - // - // Word Index Increment Logic - // - always @(posedge clk) - // - if (rdy) begin - // - index_ab <= WORD_INDEX_ZERO; - index_n <= WORD_INDEX_ZERO; - index_s <= WORD_INDEX_ZERO; - // - end else begin - // - if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab); - if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n); - if (inc_index_s) index_s <= WORD_INDEX_NEXT_OR_ZERO(index_s); - // - end - - - // - // Output Sum Selector - // - wire mux_select_ab = sub32_borrow_latch && !add32_carry_latch; - - - // - // Output Data and Write Enable Logic - // - reg s_wren_reg; - reg [31: 0] s_dout_reg; - wire [31: 0] s_dout_mux = mux_select_ab ? s_ab[31:0] : s_ab_n[31:0]; - - assign s_wren = s_wren_reg; - assign s_dout = s_dout_reg; - - always @(posedge clk) - // - if (rdy) begin - // - s_wren_reg <= 1'b0; - s_dout_reg <= {32{1'bX}}; - // - end else begin - // - s_wren_reg <= store_data_s; - s_dout_reg <= store_data_s ? s_dout_mux : {32{1'bX}}; - // - end - - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_copy.v b/rtl/modular/modular_invertor/helper/modinv_helper_copy.v deleted file mode 100644 index 07c1b4f..0000000 --- a/rtl/modular/modular_invertor/helper/modinv_helper_copy.v +++ /dev/null @@ -1,148 +0,0 @@ -`timescale 1ns / 1ps - -module modinv_helper_copy - ( - clk, rst_n, - ena, rdy, - s_addr, s_din, - a1_addr, a1_wren, a1_dout - ); - - - // - // Parameters - // - parameter OPERAND_NUM_WORDS = 8; - parameter OPERAND_ADDR_BITS = 3; - - parameter BUFFER_NUM_WORDS = 9; - parameter BUFFER_ADDR_BITS = 4; - - - // - // clog2 - // -`include "..\modinv_clog2.v" - - - // - // Constants - // - localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 2; - localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); - - - // - // Ports - // - input wire clk; - input wire rst_n; - - input wire ena; - output wire rdy; - - output wire [ BUFFER_ADDR_BITS-1:0] s_addr; - output wire [OPERAND_ADDR_BITS-1:0] a1_addr; - - output wire a1_wren; - - input wire [ 31:0] s_din; - - output wire [ 31:0] a1_dout; - - - // - // Counter - // - reg [PROC_CNT_BITS-1:0] proc_cnt; - - wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; - wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; - wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? - proc_cnt + 1'b1 : proc_cnt_zero; - - // - // Addresses - // - reg [OPERAND_ADDR_BITS-1:0] addr_s; - - wire [OPERAND_ADDR_BITS-1:0] addr_s_max = OPERAND_NUM_WORDS - 1; - wire [OPERAND_ADDR_BITS-1:0] addr_s_zero = {OPERAND_ADDR_BITS{1'b0}}; - wire [OPERAND_ADDR_BITS-1:0] addr_s_next = (addr_s < addr_s_max) ? - addr_s + 1'b1 : addr_s_zero; - - reg [OPERAND_ADDR_BITS-1:0] addr_a1; - - wire [OPERAND_ADDR_BITS-1:0] addr_a1_max = OPERAND_NUM_WORDS - 1; - wire [OPERAND_ADDR_BITS-1:0] addr_a1_zero = {OPERAND_ADDR_BITS{1'b0}}; - wire [OPERAND_ADDR_BITS-1:0] addr_a1_next = (addr_a1 < addr_a1_max) ? - addr_a1 + 1'b1 : addr_a1_zero; - - assign s_addr = {{(BUFFER_ADDR_BITS - OPERAND_ADDR_BITS){1'b0}}, addr_s}; - assign a1_addr = addr_a1; - - - // - // Ready Flag - // - assign rdy = (proc_cnt == proc_cnt_zero); - - - // - // Address Increment Logic - // - wire inc_addr_s; - wire inc_addr_a1; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_start = 1; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_stop = OPERAND_NUM_WORDS + 0; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_start = 2; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_stop = OPERAND_NUM_WORDS + 1; - - assign inc_addr_s = (proc_cnt >= cnt_inc_addr_s_start) && (proc_cnt <= cnt_inc_addr_s_stop); - assign inc_addr_a1 = (proc_cnt >= cnt_inc_addr_a1_start) && (proc_cnt <= cnt_inc_addr_a1_stop); - - always @(posedge clk) begin - // - if (inc_addr_s) addr_s <= addr_s_next; - else addr_s <= addr_s_zero; - // - if (inc_addr_a1) addr_a1 <= addr_a1_next; - else addr_a1 <= addr_a1_zero; - // - end - - - // - // Write Enable Logic - // - wire wren_a1; - - wire [PROC_CNT_BITS-1:0] cnt_wren_a1_start = 2; - wire [PROC_CNT_BITS-1:0] cnt_wren_a1_stop = OPERAND_NUM_WORDS + 1; - - assign wren_a1 = (proc_cnt >= cnt_wren_a1_start) && (proc_cnt <= cnt_wren_a1_stop); - - assign a1_wren = wren_a1; - - - // - // Data Logic - // - assign a1_dout = s_din; - - - // - // Primary Counter Logic - // - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; - else begin - if (!rdy) proc_cnt <= proc_cnt_next; - else if (ena) proc_cnt <= proc_cnt_next; - end - - -endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_init.v b/rtl/modular/modular_invertor/helper/modinv_helper_init.v deleted file mode 100644 index 0468134..0000000 --- a/rtl/modular/modular_invertor/helper/modinv_helper_init.v +++ /dev/null @@ -1,172 +0,0 @@ -`timescale 1ns / 1ps - -module modinv_helper_init - ( - clk, rst_n, - ena, rdy, - a_addr, a_din, - q_addr, q_din, - r_addr, r_wren, r_dout, - s_addr, s_wren, s_dout, - u_addr, u_wren, u_dout, - v_addr, v_wren, v_dout - ); - - - // - // Parameters - // - parameter OPERAND_NUM_WORDS = 8; - parameter OPERAND_ADDR_BITS = 3; - - parameter BUFFER_NUM_WORDS = 9; - parameter BUFFER_ADDR_BITS = 4; - - - // - // clog2 - // -`include "..\modinv_clog2.v" - - - // - // Constants - // - localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 3; - localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); - - - // - // Ports - // - input wire clk; - input wire rst_n; - input wire ena; - output wire rdy; - - output wire [OPERAND_ADDR_BITS-1:0] a_addr; - output wire [OPERAND_ADDR_BITS-1:0] q_addr; - output wire [ BUFFER_ADDR_BITS-1:0] r_addr; - output wire [ BUFFER_ADDR_BITS-1:0] s_addr; - output wire [ BUFFER_ADDR_BITS-1:0] u_addr; - output wire [ BUFFER_ADDR_BITS-1:0] v_addr; - - output wire r_wren; - output wire s_wren; - output wire u_wren; - output wire v_wren; - - input wire [ 31:0] a_din; - input wire [ 31:0] q_din; - output wire [ 31:0] r_dout; - output wire [ 31:0] s_dout; - output wire [ 31:0] u_dout; - output wire [ 31:0] v_dout; - - - // - // Counter - // - reg [PROC_CNT_BITS-1:0] proc_cnt; - - wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; - wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; - wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? - proc_cnt + 1'b1 : proc_cnt_zero; - - // - // Addresses - // - reg [OPERAND_ADDR_BITS-1:0] addr_aq; - - wire [OPERAND_ADDR_BITS-1:0] addr_aq_max = OPERAND_NUM_WORDS - 1; - wire [OPERAND_ADDR_BITS-1:0] addr_aq_zero = {OPERAND_ADDR_BITS{1'b0}}; - wire [OPERAND_ADDR_BITS-1:0] addr_aq_next = (addr_aq < addr_aq_max) ? - addr_aq + 1'b1 : addr_aq_zero; - - reg [BUFFER_ADDR_BITS-1:0] addr_rsuv; - - wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_max = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_next = (addr_rsuv < addr_rsuv_max) ? - addr_rsuv + 1'b1 : addr_rsuv_zero; - - assign a_addr = addr_aq; - assign q_addr = addr_aq; - - assign r_addr = addr_rsuv; - assign s_addr = addr_rsuv; - assign u_addr = addr_rsuv; - assign v_addr = addr_rsuv; - - - // - // Ready Flag - // - assign rdy = (proc_cnt == proc_cnt_zero); - - - // - // Address Increment Logic - // - wire inc_addr_aq; - wire inc_addr_rsuv; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_start = 1; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_stop = OPERAND_NUM_WORDS; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_start = 2; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_stop = BUFFER_NUM_WORDS + 1; - - assign inc_addr_aq = (proc_cnt >= cnt_inc_addr_aq_start) && (proc_cnt <= cnt_inc_addr_aq_stop); - assign inc_addr_rsuv = (proc_cnt >= cnt_inc_addr_rsuv_start) && (proc_cnt <= cnt_inc_addr_rsuv_stop); - - always @(posedge clk) begin - // - if (inc_addr_aq) addr_aq <= addr_aq_next; - else addr_aq <= addr_aq_zero; - // - if (inc_addr_rsuv) addr_rsuv <= addr_rsuv_next; - else addr_rsuv <= addr_rsuv_zero; - // - end - - - // - // Write Enable Logic - // - wire wren_rsuv; - - wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_start = 2; - wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_stop = BUFFER_NUM_WORDS + 1; - - assign wren_rsuv = (proc_cnt >= cnt_wren_rsuv_start) && (proc_cnt <= cnt_wren_rsuv_stop); - - assign r_wren = wren_rsuv; - assign s_wren = wren_rsuv; - assign u_wren = wren_rsuv; - assign v_wren = wren_rsuv; - - - // - // Data Logic - // - assign r_dout = 32'd0; - assign s_dout = (proc_cnt == cnt_wren_rsuv_start) ? 32'd1 : 32'd0; - assign u_dout = (proc_cnt != cnt_wren_rsuv_stop) ? q_din : 32'd0; - assign v_dout = (proc_cnt != cnt_wren_rsuv_stop) ? a_din : 32'd0; - - - // - // Primary Counter Logic - // - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; - else begin - if (!rdy) proc_cnt <= proc_cnt_next; - else if (ena) proc_cnt <= proc_cnt_next; - end - - -endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v deleted file mode 100644 index 6b65eb1..0000000 --- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v +++ /dev/null @@ -1,286 +0,0 @@ -`timescale 1ns / 1ps - -module modinv_helper_invert_compare - ( - clk, rst_n, - ena, rdy, - - u_addr, u_din, - v_addr, v_din, - - u_gt_v, v_eq_1, - u_is_even, v_is_even - ); - - - // - // Parameters - // - parameter BUFFER_NUM_WORDS = 9; - parameter BUFFER_ADDR_BITS = 4; - - - // - // clog2 - // -`include "..\modinv_clog2.v" - - - // - // Constants - // - localparam PROC_NUM_CYCLES = 1 * BUFFER_NUM_WORDS + 10; - localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); - - - // - // Ports - // - input wire clk; - input wire rst_n; - input wire ena; - output wire rdy; - - output wire [BUFFER_ADDR_BITS-1:0] u_addr; - output wire [BUFFER_ADDR_BITS-1:0] v_addr; - - input wire [ 32-1:0] u_din; - input wire [ 32-1:0] v_din; - - output wire u_gt_v; - output wire v_eq_1; - output wire u_is_even; - output wire v_is_even; - - - // - // Counter - // - reg [PROC_CNT_BITS-1:0] proc_cnt; - - wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; - wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; - wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? - proc_cnt + 1'b1 : proc_cnt_zero; - - // - // Addresses - // - reg [BUFFER_ADDR_BITS-1:0] addr_in; - - wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ? - addr_in - 1'b1 : addr_in_last; - - assign u_addr = addr_in; - assign v_addr = addr_in; - - - // - // Ready Flag - // - assign rdy = (proc_cnt == proc_cnt_zero); - - - // - // Address Decrement Logic - // - wire dec_addr_in; - - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 0 * BUFFER_NUM_WORDS + 1; - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 1 * BUFFER_NUM_WORDS + 0; - - assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop); - - always @(posedge clk) - // - if (rdy) addr_in <= addr_in_last; - else if (dec_addr_in) addr_in <= addr_in_prev; - - - // - // Comparison Stage Flags - // - wire calc_leg; - wire calc_leg_final; - wire calc_parity; - - wire [PROC_CNT_BITS-1:0] cnt_calc_leg_start = 0 * BUFFER_NUM_WORDS + 3; - wire [PROC_CNT_BITS-1:0] cnt_calc_leg_stop = 1 * BUFFER_NUM_WORDS + 2; - wire [PROC_CNT_BITS-1:0] cnt_calc_parity = 1 * BUFFER_NUM_WORDS + 1; - - assign calc_leg = (proc_cnt >= cnt_calc_leg_start) && (proc_cnt <= cnt_calc_leg_stop); - assign calc_leg_final = (proc_cnt == cnt_calc_leg_stop); - assign calc_parity = (proc_cnt == cnt_calc_parity); - - - // - // Dummy Input - // - reg sub32_din_1_lsb; - wire [31: 0] sub32_din_1 = {{31{1'b0}}, sub32_din_1_lsb}; - - always @(posedge clk) - // - sub32_din_1_lsb <= (addr_in == addr_in_zero) ? 1'b1 : 1'b0; - - - // - // Subtractor (u - v) - // - wire [31: 0] sub32_u_minus_v_difference_out; - wire sub32_u_minus_v_borrow_in; - wire sub32_u_minus_v_borrow_out; - - subtractor32_wrapper sub32_u_minus_v - ( - .clk (clk), - .a (u_din), - .b (v_din), - .d (sub32_u_minus_v_difference_out), - .b_in (sub32_u_minus_v_borrow_in), - .b_out (sub32_u_minus_v_borrow_out) - ); - - - // - // Subtractor (v - 1) - // - wire [31: 0] sub32_v_minus_1_difference_out; - wire sub32_v_minus_1_borrow_in; - wire sub32_v_minus_1_borrow_out; - - subtractor32_wrapper sub32_v_minus_1 - ( - .clk (clk), - .a (v_din), - .b (sub32_din_1), - .d (sub32_v_minus_1_difference_out), - .b_in (sub32_v_minus_1_borrow_in), - .b_out (sub32_v_minus_1_borrow_out) - ); - - - - // - // Borrow Masking Logic - // - reg mask_borrow; - - always @(posedge clk) - // - mask_borrow <= ((proc_cnt > cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop)) ? - 1'b0 : 1'b1; - - assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_borrow; - assign sub32_v_minus_1_borrow_in = sub32_v_minus_1_borrow_out & ~mask_borrow; - - - // - // Comparison Logic - // - reg cmp_u_v_l; - reg cmp_u_v_e; - reg cmp_u_v_g; - - reg cmp_v_1_l; - reg cmp_v_1_e; - reg cmp_v_1_g; - - wire cmp_unresolved_u_v = !(cmp_u_v_l || cmp_u_v_g); - wire cmp_unresolved_v_1 = !(cmp_v_1_l || cmp_v_1_g); - - wire cmp_u_v_borrow_is_set = (sub32_u_minus_v_borrow_out == 1'b1) ? 1'b1 : 1'b0; - wire cmp_u_v_difference_is_nonzero = (sub32_u_minus_v_difference_out != 32'd0) ? 1'b1 : 1'b0; - - wire cmp_v_1_borrow_is_set = (sub32_v_minus_1_borrow_out == 1'b1) ? 1'b1 : 1'b0; - wire cmp_v_1_difference_is_nonzero = (sub32_v_minus_1_difference_out != 32'd0) ? 1'b1 : 1'b0; - - reg u_is_even_reg; - reg v_is_even_reg; - - always @(posedge clk) - // - if (rdy) begin - // - if (ena) begin - // - cmp_u_v_l <= 1'b0; - cmp_u_v_e <= 1'b0; - cmp_u_v_g <= 1'b0; - // - cmp_v_1_l <= 1'b0; - cmp_v_1_e <= 1'b0; - cmp_v_1_g <= 1'b0; - // - u_is_even_reg <= 1'bX; - v_is_even_reg <= 1'bX; - // - end - // - end else begin - // - // parity - // - if (calc_parity) begin - u_is_even_reg <= ~u_din[0]; - v_is_even_reg <= ~v_din[0]; - end - // - // u <> v - // - if (cmp_unresolved_u_v && calc_leg) begin - // - if (cmp_u_v_borrow_is_set) - cmp_u_v_l <= 1'b1; - // - if (!cmp_u_v_borrow_is_set && cmp_u_v_difference_is_nonzero) - cmp_u_v_g <= 1'b1; - // - if (!cmp_u_v_borrow_is_set && !cmp_u_v_difference_is_nonzero && calc_leg_final) - cmp_u_v_e <= 1'b1; - // - end - // - // v <> 1 - // - if (cmp_unresolved_v_1 && calc_leg) begin - // - if (cmp_v_1_borrow_is_set) - cmp_v_1_l <= 1'b1; - // - if (!cmp_v_1_borrow_is_set && cmp_v_1_difference_is_nonzero) - cmp_v_1_g <= 1'b1; - // - if (!cmp_v_1_borrow_is_set && !cmp_v_1_difference_is_nonzero && calc_leg_final) - cmp_v_1_e <= 1'b1; - // - end - // - end - - - // - // Output Flags - // - assign u_gt_v = !cmp_u_v_l && !cmp_u_v_e && cmp_u_v_g; - assign v_eq_1 = !cmp_v_1_l && cmp_v_1_e && !cmp_v_1_g; - - assign u_is_even = u_is_even_reg; - assign v_is_even = v_is_even_reg; - - - // - // Primary Counter Logic - // - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; - else begin - if (!rdy) proc_cnt <= proc_cnt_next; - else if (ena) proc_cnt <= proc_cnt_next; - end - - -endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v deleted file mode 100644 index ab15563..0000000 --- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v +++ /dev/null @@ -1,408 +0,0 @@ -`timescale 1ns / 1ps - -module modinv_helper_invert_precalc - ( - clk, rst_n, - ena, rdy, - - r_addr, r_din, - s_addr, s_din, - u_addr, u_din, - v_addr, v_din, - - r_dbl_addr, r_dbl_wren, r_dbl_dout, - s_dbl_addr, s_dbl_wren, s_dbl_dout, - r_plus_s_addr, r_plus_s_wren, r_plus_s_dout, - u_half_addr, u_half_wren, u_half_dout, - v_half_addr, v_half_wren, v_half_dout, - u_minus_v_addr, u_minus_v_wren, u_minus_v_dout, u_minus_v_din, - v_minus_u_addr, v_minus_u_wren, v_minus_u_dout, v_minus_u_din, - u_minus_v_half_addr, u_minus_v_half_wren, u_minus_v_half_dout, - v_minus_u_half_addr, v_minus_u_half_wren, v_minus_u_half_dout - ); - - - // - // Parameters - // - parameter BUFFER_NUM_WORDS = 9; - parameter BUFFER_ADDR_BITS = 4; - - - // - // clog2 - // -`include "..\modinv_clog2.v" - - - // - // Constants - // - localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4; - localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); - - - // - // Ports - // - input wire clk; - input wire rst_n; - input wire ena; - output wire rdy; - - output wire [BUFFER_ADDR_BITS-1:0] r_addr; - output wire [BUFFER_ADDR_BITS-1:0] s_addr; - output wire [BUFFER_ADDR_BITS-1:0] u_addr; - output wire [BUFFER_ADDR_BITS-1:0] v_addr; - - input wire [ 32-1:0] r_din; - input wire [ 32-1:0] s_din; - input wire [ 32-1:0] u_din; - input wire [ 32-1:0] v_din; - - output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr; - output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr; - output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr; - output wire [BUFFER_ADDR_BITS-1:0] u_half_addr; - output wire [BUFFER_ADDR_BITS-1:0] v_half_addr; - output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_addr; - output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_addr; - output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr; - output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr; - - output wire [ 32-1:0] r_dbl_dout; - output wire [ 32-1:0] s_dbl_dout; - output wire [ 32-1:0] r_plus_s_dout; - output wire [ 32-1:0] u_half_dout; - output wire [ 32-1:0] v_half_dout; - output wire [ 32-1:0] u_minus_v_dout; - output wire [ 32-1:0] v_minus_u_dout; - output wire [ 32-1:0] u_minus_v_half_dout; - output wire [ 32-1:0] v_minus_u_half_dout; - - output wire r_dbl_wren; - output wire s_dbl_wren; - output wire r_plus_s_wren; - output wire u_half_wren; - output wire v_half_wren; - output wire u_minus_v_wren; - output wire v_minus_u_wren; - output wire u_minus_v_half_wren; - output wire v_minus_u_half_wren; - - input wire [ 32-1:0] u_minus_v_din; - input wire [ 32-1:0] v_minus_u_din; - - - - // - // Counter - // - reg [PROC_CNT_BITS-1:0] proc_cnt; - - wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; - wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; - wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? - proc_cnt + 1'b1 : proc_cnt_zero; - - // - // Addresses - // - reg [BUFFER_ADDR_BITS-1:0] addr_in; - - wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_last) ? - addr_in + 1'b1 : addr_in_zero; - wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ? - addr_in - 1'b1 : addr_in_zero; - - reg [BUFFER_ADDR_BITS-1:0] addr_out1; - - wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ? - addr_out1 + 1'b1 : addr_out1_zero; - - reg [BUFFER_ADDR_BITS-1:0] addr_out2; - - wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_out2_next = (addr_out2 < addr_out2_last) ? - addr_out2 + 1'b1 : addr_out2_zero; - wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ? - addr_out2 - 1'b1 : addr_out2_zero; - - reg [BUFFER_ADDR_BITS-1:0] addr_out3; - - wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ? - addr_out3 - 1'b1 : addr_out3_last; - - reg [BUFFER_ADDR_BITS-1:0] addr_out4; - - wire [BUFFER_ADDR_BITS-1:0] addr_out4_last = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_out4_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_out4_prev = (addr_out4 > addr_out4_zero) ? - addr_out4 - 1'b1 : addr_out4_last; - - - assign r_addr = addr_in; - assign s_addr = addr_in; - assign u_addr = addr_in; - assign v_addr = addr_in; - - assign r_dbl_addr = addr_out1; - assign s_dbl_addr = addr_out1; - assign r_plus_s_addr = addr_out2; - assign u_half_addr = addr_out3; - assign v_half_addr = addr_out3; - assign u_minus_v_addr = addr_out2; - assign v_minus_u_addr = addr_out2; - assign u_minus_v_half_addr = addr_out4; - assign v_minus_u_half_addr = addr_out4; - - - // - // Ready Flag - // - assign rdy = (proc_cnt == proc_cnt_zero); - - - // - // Address Increment/Decrement Logic - // - wire inc_addr_in; - wire dec_addr_in; - wire inc_addr_out1; - wire inc_addr_out2; - wire dec_addr_out2; - wire dec_addr_out3; - wire dec_addr_out4; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 0 * BUFFER_NUM_WORDS + 1; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = 1 * BUFFER_NUM_WORDS - 1; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 2; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_start = 0 * BUFFER_NUM_WORDS + 3; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_stop = 1 * BUFFER_NUM_WORDS + 1; - - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 3; - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 1; - - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 1 * BUFFER_NUM_WORDS + 0; - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 2 * BUFFER_NUM_WORDS - 2; - - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 1; - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 0; - - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_start = 1 * BUFFER_NUM_WORDS + 4; - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_stop = 2 * BUFFER_NUM_WORDS + 3; - - assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop); - assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop); - assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop); - assign inc_addr_out2 = (proc_cnt >= cnt_inc_addr_out2_start) && (proc_cnt <= cnt_inc_addr_out2_stop); - assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop); - assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop); - assign dec_addr_out4 = (proc_cnt >= cnt_dec_addr_out4_start) && (proc_cnt <= cnt_dec_addr_out4_stop); - - - always @(posedge clk) begin - // - if (rdy) begin - // - addr_in <= addr_in_zero; - addr_out1 <= addr_out1_zero; - addr_out2 <= addr_out2_zero; - addr_out3 <= addr_out3_last; - addr_out4 <= addr_out4_last; - // - end else begin - // - if (inc_addr_in) addr_in <= addr_in_next; - else if (dec_addr_in) addr_in <= addr_in_prev; - // - if (inc_addr_out1) addr_out1 <= addr_out1_next; - else addr_out1 <= addr_out1_zero; - // - if (inc_addr_out2) addr_out2 <= addr_out2_next; - else if (dec_addr_out2) addr_out2 <= addr_out2_prev; - // - if (dec_addr_out3) addr_out3 <= addr_out3_prev; - else addr_out3 <= addr_out3_last; - // - if (dec_addr_out4) addr_out4 <= addr_out4_prev; - else addr_out4 <= addr_out4_last; - // - end - // - end - - - // - // Write Enable Logic - // - wire wren_out1; - wire wren_out2; - wire wren_out3; - wire wren_out4; - - wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 2; - wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 1; - - wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 0 * BUFFER_NUM_WORDS + 3; - wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 1 * BUFFER_NUM_WORDS + 2; - - wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 1; - wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 0; - - wire [PROC_CNT_BITS-1:0] cnt_wren_out4_start = 1 * BUFFER_NUM_WORDS + 4; - wire [PROC_CNT_BITS-1:0] cnt_wren_out4_stop = 2 * BUFFER_NUM_WORDS + 3; - - assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop); - assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop); - assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop); - assign wren_out4 = (proc_cnt >= cnt_wren_out4_start) && (proc_cnt <= cnt_wren_out4_stop); - - assign r_dbl_wren = wren_out1; - assign s_dbl_wren = wren_out1; - assign r_plus_s_wren = wren_out2; - assign u_half_wren = wren_out3; - assign v_half_wren = wren_out3; - assign u_minus_v_wren = wren_out2; - assign v_minus_u_wren = wren_out2; - assign u_minus_v_half_wren = wren_out4; - assign v_minus_u_half_wren = wren_out4; - - - // - // Adder (r + s) - // - wire [31: 0] add32_r_plus_s_sum_out; - wire add32_r_plus_s_carry_in; - wire add32_r_plus_s_carry_out; - - adder32_wrapper add32_r_plus_s - ( - .clk (clk), - .a (r_din), - .b (s_din), - .s (add32_r_plus_s_sum_out), - .c_in (add32_r_plus_s_carry_in), - .c_out (add32_r_plus_s_carry_out) - ); - - // - // Subtractor (u - v) - // - wire [31: 0] sub32_u_minus_v_difference_out; - wire sub32_u_minus_v_borrow_in; - wire sub32_u_minus_v_borrow_out; - - subtractor32_wrapper sub32_u_minus_v - ( - .clk (clk), - .a (u_din), - .b (v_din), - .d (sub32_u_minus_v_difference_out), - .b_in (sub32_u_minus_v_borrow_in), - .b_out (sub32_u_minus_v_borrow_out) - ); - - // - // Subtractor (v - u) - // - wire [31: 0] sub32_v_minus_u_difference_out; - wire sub32_v_minus_u_borrow_in; - wire sub32_v_minus_u_borrow_out; - - subtractor32_wrapper sub32_v_minus_u - ( - .clk (clk), - .a (v_din), - .b (u_din), - .d (sub32_v_minus_u_difference_out), - .b_in (sub32_v_minus_u_borrow_in), - .b_out (sub32_v_minus_u_borrow_out) - ); - - - // - // Carry & Borrow Masking Logic - // - reg mask_carry_borrow; - - always @(posedge clk) - // - mask_carry_borrow <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? - 1'b0 : 1'b1; - - assign add32_r_plus_s_carry_in = add32_r_plus_s_carry_out & ~mask_carry_borrow; - assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_carry_borrow; - assign sub32_v_minus_u_borrow_in = sub32_v_minus_u_borrow_out & ~mask_carry_borrow; - - - // - // Carry Bits - // - reg r_dbl_carry; - reg s_dbl_carry; - reg u_half_carry; - reg v_half_carry; - reg u_minus_v_half_carry; - reg v_minus_u_half_carry; - - always @(posedge clk) begin - - r_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? - r_din[31] : 1'b0; - - s_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? - s_din[31] : 1'b0; - - u_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ? - u_din[0] : 1'b0; - - v_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ? - v_din[0] : 1'b0; - - u_minus_v_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ? - u_minus_v_din[0] : 1'b0; - - v_minus_u_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ? - v_minus_u_din[0] : 1'b0; - - end - - - // - // Data Mapper - // - assign r_dbl_dout = {r_din[30:0], r_dbl_carry}; - assign s_dbl_dout = {s_din[30:0], s_dbl_carry}; - assign r_plus_s_dout = add32_r_plus_s_sum_out; - assign u_half_dout = {u_half_carry, u_din[31:1]}; - assign v_half_dout = {v_half_carry, v_din[31:1]}; - assign u_minus_v_dout = sub32_u_minus_v_difference_out; - assign v_minus_u_dout = sub32_v_minus_u_difference_out; - assign u_minus_v_half_dout = {u_minus_v_half_carry, u_minus_v_din[31:1]}; - assign v_minus_u_half_dout = {v_minus_u_half_carry, v_minus_u_din[31:1]}; - - - // - // Primary Counter Logic - // - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; - else begin - if (!rdy) proc_cnt <= proc_cnt_next; - else if (ena) proc_cnt <= proc_cnt_next; - end - - -endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v deleted file mode 100644 index 0cd6ac5..0000000 --- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v +++ /dev/null @@ -1,257 +0,0 @@ -`timescale 1ns / 1ps - -module modinv_helper_invert_update - ( - clk, rst_n, - ena, rdy, - - u_gt_v, v_eq_1, - u_is_even, v_is_even, - - r_addr, r_wren, r_dout, - s_addr, s_wren, s_dout, - u_addr, u_wren, u_dout, - v_addr, v_wren, v_dout, - - r_dbl_addr, r_dbl_din, - s_dbl_addr, s_dbl_din, - r_plus_s_addr, r_plus_s_din, - u_half_addr, u_half_din, - v_half_addr, v_half_din, - u_minus_v_half_addr, u_minus_v_half_din, - v_minus_u_half_addr, v_minus_u_half_din - ); - - - // - // Parameters - // - parameter BUFFER_NUM_WORDS = 9; - parameter BUFFER_ADDR_BITS = 4; - - - // - // clog2 - // -`include "..\modinv_clog2.v" - - - // - // Constants - // - localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3; - localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); - - - // - // Ports - // - input wire clk; - input wire rst_n; - input wire ena; - output wire rdy; - - input wire u_gt_v; - input wire v_eq_1; - input wire u_is_even; - input wire v_is_even; - - output wire [BUFFER_ADDR_BITS-1:0] r_addr; - output wire [BUFFER_ADDR_BITS-1:0] s_addr; - output wire [BUFFER_ADDR_BITS-1:0] u_addr; - output wire [BUFFER_ADDR_BITS-1:0] v_addr; - - output wire r_wren; - output wire s_wren; - output wire u_wren; - output wire v_wren; - - output wire [ 32-1:0] r_dout; - output wire [ 32-1:0] s_dout; - output wire [ 32-1:0] u_dout; - output wire [ 32-1:0] v_dout; - - output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr; - output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr; - output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr; - output wire [BUFFER_ADDR_BITS-1:0] u_half_addr; - output wire [BUFFER_ADDR_BITS-1:0] v_half_addr; - output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr; - output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr; - - input wire [ 32-1:0] r_dbl_din; - input wire [ 32-1:0] s_dbl_din; - input wire [ 32-1:0] r_plus_s_din; - input wire [ 32-1:0] u_half_din; - input wire [ 32-1:0] v_half_din; - input wire [ 32-1:0] u_minus_v_half_din; - input wire [ 32-1:0] v_minus_u_half_din; - - - // - // Counter - // - reg [PROC_CNT_BITS-1:0] proc_cnt; - - wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; - wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; - wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? - proc_cnt + 1'b1 : proc_cnt_zero; - - // - // Addresses - // - reg [BUFFER_ADDR_BITS-1:0] addr_in; - - wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ? - addr_in + 1'b1 : addr_in_zero; - - reg [BUFFER_ADDR_BITS-1:0] addr_out; - - wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ? - addr_out + 1'b1 : addr_out_zero; - - assign r_addr = addr_out; - assign s_addr = addr_out; - assign u_addr = addr_out; - assign v_addr = addr_out; - - assign r_dbl_addr = addr_in; - assign s_dbl_addr = addr_in; - assign r_plus_s_addr = addr_in; - assign u_half_addr = addr_in; - assign v_half_addr = addr_in; - assign u_minus_v_half_addr = addr_in; - assign v_minus_u_half_addr = addr_in; - - - // - // Ready Flag - // - assign rdy = (proc_cnt == proc_cnt_zero); - - - // - // Address Increment Logic - // - wire inc_addr_in; - wire inc_addr_out; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1; - - assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop); - assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop); - - always @(posedge clk) begin - // - if (inc_addr_in) addr_in <= addr_in_next; - else addr_in <= addr_in_zero; - // - if (inc_addr_out) addr_out <= addr_out_next; - else addr_out <= addr_out_zero; - // - end - - // - // Write Enable Logic - // - wire wren_out; - - wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2; - wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1; - - assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop); - - reg r_wren_allow; - reg s_wren_allow; - reg u_wren_allow; - reg v_wren_allow; - - assign r_wren = wren_out && r_wren_allow && !v_eq_1 && !rdy; - assign s_wren = wren_out && s_wren_allow && !v_eq_1 && !rdy; - assign u_wren = wren_out && u_wren_allow && !v_eq_1 && !rdy; - assign v_wren = wren_out && v_wren_allow && !v_eq_1 && !rdy; - - - // - // Data Logic - // - reg [31: 0] r_dout_mux; - reg [31: 0] s_dout_mux; - reg [31: 0] u_dout_mux; - reg [31: 0] v_dout_mux; - - assign r_dout = r_dout_mux; - assign s_dout = s_dout_mux; - assign u_dout = u_dout_mux; - assign v_dout = v_dout_mux; - - always @(*) begin - // - // r, s, u, v - // - if (u_is_even) begin - // - u_dout_mux = u_half_din; - v_dout_mux = {32{1'bX}}; - r_dout_mux = {32{1'bX}}; - s_dout_mux = s_dbl_din; - // - u_wren_allow = 1'b1; - v_wren_allow = 1'b0; - r_wren_allow = 1'b0; - s_wren_allow = 1'b1; - // - end else begin - // - if (v_is_even) begin - // - u_dout_mux = {32{1'bX}}; - v_dout_mux = v_half_din; - r_dout_mux = r_dbl_din; - s_dout_mux = {32{1'bX}}; - // - u_wren_allow = 1'b0; - v_wren_allow = 1'b1; - r_wren_allow = 1'b1; - s_wren_allow = 1'b0; - // - end else begin - // - u_dout_mux = u_gt_v ? u_minus_v_half_din : {32{1'bX}}; - v_dout_mux = u_gt_v ? {32{1'bX}} : v_minus_u_half_din; - r_dout_mux = u_gt_v ? r_plus_s_din : r_dbl_din; - s_dout_mux = u_gt_v ? s_dbl_din : r_plus_s_din; - // - u_wren_allow = u_gt_v; - v_wren_allow = !u_gt_v; - r_wren_allow = 1'b1; - s_wren_allow = 1'b1; - // - end - // - end - // - end - - - // - // Primary Counter Logic - // - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; - else begin - if (!rdy) proc_cnt <= proc_cnt_next; - else if (ena) proc_cnt <= proc_cnt_next; - end - -endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v deleted file mode 100644 index fb858a6..0000000 --- a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v +++ /dev/null @@ -1,328 +0,0 @@ -`timescale 1ns / 1ps - -module modinv_helper_reduce_precalc - ( - clk, rst_n, - ena, rdy, - - k, - - s_is_odd, k_is_nul, - - r_addr, r_din, r_wren, r_dout, - s_addr, s_din, - u_addr, u_wren, u_dout, - v_addr, v_wren, v_dout, - q_addr, q_din - ); - - - // - // Parameters - // - parameter OPERAND_NUM_WORDS = 8; - parameter OPERAND_ADDR_BITS = 3; - parameter BUFFER_NUM_WORDS = 9; - parameter BUFFER_ADDR_BITS = 4; - parameter K_NUM_BITS = 10; - - - // - // clog2 - // -`include "..\modinv_clog2.v" - - - // - // Constants - // - localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4; - localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); - - - // - // Ports - // - input wire clk; - input wire rst_n; - input wire ena; - output wire rdy; - - input wire [ K_NUM_BITS-1:0] k; - - output wire s_is_odd; - output wire k_is_nul; - - output wire [ BUFFER_ADDR_BITS-1:0] r_addr; - output wire [ BUFFER_ADDR_BITS-1:0] s_addr; - output wire [ BUFFER_ADDR_BITS-1:0] u_addr; - output wire [ BUFFER_ADDR_BITS-1:0] v_addr; - output wire [OPERAND_ADDR_BITS-1:0] q_addr; - - input wire [ 32-1:0] r_din; - input wire [ 32-1:0] s_din; - input wire [ 32-1:0] q_din; - - output wire r_wren; - output wire u_wren; - output wire v_wren; - - output wire [ 32-1:0] r_dout; - output wire [ 32-1:0] u_dout; - output wire [ 32-1:0] v_dout; - - - // - // Counter - // - reg [PROC_CNT_BITS-1:0] proc_cnt; - - wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; - wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; - wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? - proc_cnt + 1'b1 : proc_cnt_zero; - - // - // Addresses - // - reg [ BUFFER_ADDR_BITS-1:0] addr_in_buf; - reg [OPERAND_ADDR_BITS-1:0] addr_in_op; - reg [ BUFFER_ADDR_BITS-1:0] addr_out1; - reg [ BUFFER_ADDR_BITS-1:0] addr_out2; - reg [ BUFFER_ADDR_BITS-1:0] addr_out3; - - wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_last = BUFFER_NUM_WORDS - 1; - wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_next = (addr_in_buf < addr_in_buf_last) ? - addr_in_buf + 1'b1 : addr_in_buf_zero; - wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_prev = (addr_in_buf > addr_in_buf_zero) ? - addr_in_buf - 1'b1 : addr_in_buf_zero; - - wire [OPERAND_ADDR_BITS-1:0] addr_in_op_last = OPERAND_NUM_WORDS - 1; - wire [OPERAND_ADDR_BITS-1:0] addr_in_op_zero = {OPERAND_ADDR_BITS{1'b0}}; - wire [OPERAND_ADDR_BITS-1:0] addr_in_op_next = (addr_in_op < addr_in_op_last) ? - addr_in_op + 1'b1 : addr_in_op_zero; - - wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ? - addr_out1 + 1'b1 : addr_out1_zero; - wire [BUFFER_ADDR_BITS-1:0] addr_out1_prev = (addr_out1 > addr_out1_zero) ? - addr_out1 - 1'b1 : addr_out1_zero; - - wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ? - addr_out2 - 1'b1 : addr_out2_last; - - wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ? - addr_out3 - 1'b1 : addr_out3_last; - - - assign s_addr = addr_in_buf; - assign q_addr = addr_in_op; - assign r_addr = addr_out1; - assign u_addr = addr_out2; - assign v_addr = addr_out3; - - - // - // Ready Flag - // - assign rdy = (proc_cnt == proc_cnt_zero); - - - // - // Address Increment/Decrement Logic - // - wire inc_addr_buf_in; - wire dec_addr_buf_in; - wire inc_addr_op_in; - wire inc_addr_out1; - wire dec_addr_out1; - wire dec_addr_out2; - wire dec_addr_out3; - - wire [PROC_CNT_BITS-1:0] cnt_calc_flags = 0 * BUFFER_NUM_WORDS + 2; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_start = 0 * BUFFER_NUM_WORDS + 1; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_stop = 1 * BUFFER_NUM_WORDS - 1; - - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_start = 1 * BUFFER_NUM_WORDS + 0; - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_stop = 2 * BUFFER_NUM_WORDS - 2; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_start = 0 * OPERAND_NUM_WORDS + 1; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_stop = 1 * OPERAND_NUM_WORDS + 0; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 3; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1; - - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_start = 1 * BUFFER_NUM_WORDS + 3; - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_stop = 2 * BUFFER_NUM_WORDS + 1; - - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 1; - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 0; - - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 4; - wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 3; - - assign inc_addr_buf_in = (proc_cnt >= cnt_inc_addr_buf_in_start) && (proc_cnt <= cnt_inc_addr_buf_in_stop); - assign dec_addr_buf_in = (proc_cnt >= cnt_dec_addr_buf_in_start) && (proc_cnt <= cnt_dec_addr_buf_in_stop); - assign inc_addr_op_in = (proc_cnt >= cnt_inc_addr_op_in_start) && (proc_cnt <= cnt_inc_addr_op_in_stop); - assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop); - assign dec_addr_out1 = (proc_cnt >= cnt_dec_addr_out1_start) && (proc_cnt <= cnt_dec_addr_out1_stop); - assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop); - assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop); - - always @(posedge clk) begin - // - if (rdy) begin - // - addr_in_buf <= addr_in_buf_zero; - addr_in_op <= addr_in_op_zero; - addr_out1 <= addr_out1_zero; - addr_out2 <= addr_out2_last; - addr_out3 <= addr_out3_last; - // - end else begin - // - if (inc_addr_buf_in) addr_in_buf <= addr_in_buf_next; - else if (dec_addr_buf_in) addr_in_buf <= addr_in_buf_prev; - // - if (inc_addr_op_in) addr_in_op <= addr_in_op_next; - else addr_in_op <= addr_in_op_zero; - // - if (inc_addr_out1) addr_out1 <= addr_out1_next; - else if (dec_addr_out1) addr_out1 <= addr_out1_prev; - // - if (dec_addr_out2) addr_out2 <= addr_out2_prev; - else addr_out2 <= addr_out2_last; - // - if (dec_addr_out3) addr_out3 <= addr_out3_prev; - else addr_out3 <= addr_out3_last; - // - end - // - end - - - // - // Write Enable Logic - // - wire wren_out1; - wire wren_out2; - wire wren_out3; - - wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 3; - wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 2; - - wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 1 * BUFFER_NUM_WORDS + 1; - wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 2 * BUFFER_NUM_WORDS + 0; - - wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 4; - wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 3; - - assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop); - assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop); - assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop); - - assign r_wren = wren_out1; - assign u_wren = wren_out2; - assign v_wren = wren_out3; - - // - // Adder (s + q) - // - wire [31: 0] q_din_masked; - wire [31: 0] add32_s_plus_q_sum_out; - wire add32_s_plus_q_carry_in; - wire add32_s_plus_q_carry_out; - - adder32_wrapper add32_r_plus_s - ( - .clk (clk), - .a (s_din), - .b (q_din_masked), - .s (add32_s_plus_q_sum_out), - .c_in (add32_s_plus_q_carry_in), - .c_out (add32_s_plus_q_carry_out) - ); - - - // - // Carry Masking Logic - // - wire mask_carry; - - assign mask_carry = ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? 1'b0 : 1'b1; - - - // - // Addend Masking Logic - // - reg q_din_mask; - - always @(posedge clk) - q_din_mask <= (addr_in_buf == addr_in_buf_last) ? 1'b1 : 1'b0; - - assign q_din_masked = q_din_mask ? {32{1'b0}} : q_din; - - assign add32_s_plus_q_carry_in = add32_s_plus_q_carry_out & ~mask_carry; - - - // - // Carry Bits - // - reg s_half_carry; - reg s_plus_q_half_carry; - - always @(posedge clk) begin - // - s_half_carry <= ((proc_cnt >= cnt_wren_out2_start) && (proc_cnt < cnt_wren_out2_stop)) ? - s_din[0] : 1'b0; - // - s_plus_q_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ? - r_din[0] : 1'b0; - // - end - - // - // Data Mapper - // - assign r_dout = add32_s_plus_q_sum_out; - assign u_dout = {s_half_carry, s_din[31:1]}; - assign v_dout = {s_plus_q_half_carry, r_din[31:1]}; - - - // - // Primary Counter Logic - // - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; - else begin - if (!rdy) proc_cnt <= proc_cnt_next; - else if (ena) proc_cnt <= proc_cnt_next; - end - - - // - // Output Flags - // - reg s_is_odd_reg; - reg k_is_nul_reg; - - assign s_is_odd = s_is_odd_reg; - assign k_is_nul = k_is_nul_reg; - - always @(posedge clk) - // - if (proc_cnt == cnt_calc_flags) begin - s_is_odd_reg <= s_din[0]; - k_is_nul_reg <= (k == {K_NUM_BITS{1'b0}}) ? 1'b1 : 1'b0; - end - - -endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v deleted file mode 100644 index ea5b854..0000000 --- a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v +++ /dev/null @@ -1,153 +0,0 @@ -`timescale 1ns / 1ps - -module modinv_helper_reduce_update - ( - clk, rst_n, - ena, rdy, - - s_is_odd, k_is_nul, - - s_addr, s_wren, s_dout, - u_addr, u_din, - v_addr, v_din - ); - - - // - // Parameters - // - parameter BUFFER_NUM_WORDS = 9; - parameter BUFFER_ADDR_BITS = 4; - - - // - // clog2 - // -`include "..\modinv_clog2.v" - - - // - // Constants - // - localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3; - localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); - - - // - // Ports - // - input wire clk; - input wire rst_n; - input wire ena; - output wire rdy; - - input wire s_is_odd; - input wire k_is_nul; - - output wire [BUFFER_ADDR_BITS-1:0] s_addr; - output wire [BUFFER_ADDR_BITS-1:0] u_addr; - output wire [BUFFER_ADDR_BITS-1:0] v_addr; - - output wire s_wren; - - output wire [ 32-1:0] s_dout; - - input wire [ 32-1:0] u_din; - input wire [ 32-1:0] v_din; - - - // - // Counter - // - reg [PROC_CNT_BITS-1:0] proc_cnt; - - wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; - wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; - wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? - proc_cnt + 1'b1 : proc_cnt_zero; - - // - // Addresses - // - reg [BUFFER_ADDR_BITS-1:0] addr_in; - - wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ? - addr_in + 1'b1 : addr_in_zero; - - reg [BUFFER_ADDR_BITS-1:0] addr_out; - - wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1; - wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}}; - wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ? - addr_out + 1'b1 : addr_out_zero; - - assign s_addr = addr_out; - assign u_addr = addr_in; - assign v_addr = addr_in; - - - // - // Ready Flag - // - assign rdy = (proc_cnt == proc_cnt_zero); - - - // - // Address Increment Logic - // - wire inc_addr_in; - wire inc_addr_out; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS; - - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2; - wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1; - - assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop); - assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop); - - always @(posedge clk) begin - // - if (inc_addr_in) addr_in <= addr_in_next; - else addr_in <= addr_in_zero; - // - if (inc_addr_out) addr_out <= addr_out_next; - else addr_out <= addr_out_zero; - // - end - - // - // Write Enable Logic - // - wire wren_out; - - wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2; - wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1; - - assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop); - - assign s_wren = wren_out && !k_is_nul; //s_wren_allow && !v_eq_1 && !rdy; - - - // - // Data Logic - // - assign s_dout = s_is_odd ? v_din : u_din; - - - // - // Primary Counter Logic - // - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; - else begin - if (!rdy) proc_cnt <= proc_cnt_next; - else if (ena) proc_cnt <= proc_cnt_next; - end - - -endmodule diff --git a/rtl/modular/modular_invertor/modinv_clog2.v b/rtl/modular/modular_invertor/modinv_clog2.v deleted file mode 100644 index 2f7b64d..0000000 --- a/rtl/modular/modular_invertor/modinv_clog2.v +++ /dev/null @@ -1,10 +0,0 @@ -function integer clog2; - input integer value; - integer result; - begin - value = value - 1; - for (result = 0; value > 0; result = result + 1) - value = value >> 1; - clog2 = result; - end -endfunction diff --git a/rtl/modular/modular_invertor/modular_invertor.v b/rtl/modular/modular_invertor/modular_invertor.v deleted file mode 100644 index e9f2460..0000000 --- a/rtl/modular/modular_invertor/modular_invertor.v +++ /dev/null @@ -1,981 +0,0 @@ -//------------------------------------------------------------------------------ -// -// modular_invertor.v -// ----------------------------------------------------------------------------- -// Modular invertor. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module modular_invertor - ( - clk, rst_n, - ena, rdy, - a_addr, q_addr, a1_addr, a1_wren, - a_din, q_din, a1_dout - ); - - - // - // Parameters - // - parameter MAX_OPERAND_WIDTH = 256; - - - // - // clog2 - // -`include "modinv_clog2.v" - - - // - // More Parameters - // - localparam OPERAND_NUM_WORDS = MAX_OPERAND_WIDTH / 32; - localparam OPERAND_ADDR_BITS = clog2(OPERAND_NUM_WORDS); - - localparam BUFFER_NUM_WORDS = OPERAND_NUM_WORDS + 1; - localparam BUFFER_ADDR_BITS = clog2(BUFFER_NUM_WORDS); - - localparam LOOP_NUM_ROUNDS = 2 * MAX_OPERAND_WIDTH; - localparam ROUND_COUNTER_BITS = clog2(LOOP_NUM_ROUNDS); - - localparam K_NUM_BITS = clog2(LOOP_NUM_ROUNDS + 1); - - - // - // Ports - // - input wire clk; - input wire rst_n; - - input wire ena; - output wire rdy; - - output wire [OPERAND_ADDR_BITS-1:0] a_addr; - output reg [OPERAND_ADDR_BITS-1:0] q_addr; - output wire [OPERAND_ADDR_BITS-1:0] a1_addr; - output wire a1_wren; - - input wire [32-1:0] a_din; - input wire [32-1:0] q_din; - output wire [32-1:0] a1_dout; - - - // - // "Redundant" Power of 2 (K) - // - reg [K_NUM_BITS-1:0] k; - - - // - // Buffers - // - reg [BUFFER_ADDR_BITS-1:0] buf_r_wr_addr; - reg [BUFFER_ADDR_BITS-1:0] buf_r_rd_addr; - reg buf_r_wr_en; - reg [ 32-1:0] buf_r_wr_din; - wire [ 32-1:0] buf_r_wr_dout; - wire [ 32-1:0] buf_r_rd_dout; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_r - ( .clk(clk), - .a_addr(buf_r_wr_addr), .a_out(buf_r_wr_dout), .a_wr(buf_r_wr_en), .a_in(buf_r_wr_din), - .b_addr(buf_r_rd_addr), .b_out(buf_r_rd_dout) - ); - - reg [BUFFER_ADDR_BITS-1:0] buf_s_wr_addr; - reg [BUFFER_ADDR_BITS-1:0] buf_s_rd_addr; - reg buf_s_wr_en; - reg [ 32-1:0] buf_s_wr_din; - wire [ 32-1:0] buf_s_rd_dout; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_s - ( .clk(clk), - .a_addr(buf_s_wr_addr), .a_out(), .a_wr(buf_s_wr_en), .a_in(buf_s_wr_din), - .b_addr(buf_s_rd_addr), .b_out(buf_s_rd_dout) - ); - - reg [BUFFER_ADDR_BITS-1:0] buf_u_wr_addr; - reg [BUFFER_ADDR_BITS-1:0] buf_u_rd_addr; - reg buf_u_wr_en; - reg [ 32-1:0] buf_u_wr_din; - wire [ 32-1:0] buf_u_rd_dout; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_u - ( .clk(clk), - .a_addr(buf_u_wr_addr), .a_out(), .a_wr(buf_u_wr_en), .a_in(buf_u_wr_din), - .b_addr(buf_u_rd_addr), .b_out(buf_u_rd_dout) - ); - - reg [BUFFER_ADDR_BITS-1:0] buf_v_wr_addr; - reg [BUFFER_ADDR_BITS-1:0] buf_v_rd_addr; - reg buf_v_wr_en; - reg [ 32-1:0] buf_v_wr_din; - wire [ 32-1:0] buf_v_rd_dout; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_v - ( .clk(clk), - .a_addr(buf_v_wr_addr), .a_out(), .a_wr(buf_v_wr_en), .a_in(buf_v_wr_din), - .b_addr(buf_v_rd_addr), .b_out(buf_v_rd_dout) - ); - - wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_wr_addr; - wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_rd_addr; - wire buf_r_dbl_wr_en; - wire [ 32-1:0] buf_r_dbl_wr_din; - wire [ 32-1:0] buf_r_dbl_rd_dout; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_r_dbl - ( .clk(clk), - .a_addr(buf_r_dbl_wr_addr), .a_out(), .a_wr(buf_r_dbl_wr_en), .a_in(buf_r_dbl_wr_din), - .b_addr(buf_r_dbl_rd_addr), .b_out(buf_r_dbl_rd_dout) - ); - - wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_wr_addr; - wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_rd_addr; - wire buf_s_dbl_wr_en; - wire [ 32-1:0] buf_s_dbl_wr_din; - wire [ 32-1:0] buf_s_dbl_rd_dout; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_s_dbl - ( .clk(clk), - .a_addr(buf_s_dbl_wr_addr), .a_out(), .a_wr(buf_s_dbl_wr_en), .a_in(buf_s_dbl_wr_din), - .b_addr(buf_s_dbl_rd_addr), .b_out(buf_s_dbl_rd_dout) - ); - - wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_wr_addr; - wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_rd_addr; - wire buf_r_plus_s_wr_en; - wire [ 32-1:0] buf_r_plus_s_wr_din; - wire [ 32-1:0] buf_r_plus_s_rd_dout; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_r_plus_s - ( .clk(clk), - .a_addr(buf_r_plus_s_wr_addr), .a_out(), .a_wr(buf_r_plus_s_wr_en), .a_in(buf_r_plus_s_wr_din), - .b_addr(buf_r_plus_s_rd_addr), .b_out(buf_r_plus_s_rd_dout) - ); - - wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_wr_addr; - wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_rd_addr; - wire buf_u_minus_v_wr_en; - wire [ 32-1:0] buf_u_minus_v_wr_din; - wire [ 32-1:0] buf_u_minus_v_wr_dout; - - assign buf_u_minus_v_rd_addr = ~buf_u_minus_v_wr_addr; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_u_minus_v - ( .clk(clk), - .a_addr(buf_u_minus_v_wr_addr), .a_out(buf_u_minus_v_wr_dout), .a_wr(buf_u_minus_v_wr_en), .a_in(buf_u_minus_v_wr_din), - .b_addr(buf_u_minus_v_rd_addr), .b_out() - ); - - wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_wr_addr; - wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_rd_addr; - wire buf_v_minus_u_wr_en; - wire [ 32-1:0] buf_v_minus_u_wr_din; - wire [ 32-1:0] buf_v_minus_u_wr_dout; - - assign buf_v_minus_u_rd_addr = ~buf_v_minus_u_wr_addr; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_v_minus_u - ( .clk(clk), - .a_addr(buf_v_minus_u_wr_addr), .a_out(buf_v_minus_u_wr_dout), .a_wr(buf_v_minus_u_wr_en), .a_in(buf_v_minus_u_wr_din), - .b_addr(buf_v_minus_u_rd_addr), .b_out() - ); - - wire [BUFFER_ADDR_BITS-1:0] buf_u_half_wr_addr; - wire [BUFFER_ADDR_BITS-1:0] buf_u_half_rd_addr; - wire buf_u_half_wr_en; - wire [ 32-1:0] buf_u_half_wr_din; - wire [ 32-1:0] buf_u_half_rd_dout; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_u_half - ( .clk(clk), - .a_addr(buf_u_half_wr_addr), .a_out(), .a_wr(buf_u_half_wr_en), .a_in(buf_u_half_wr_din), - .b_addr(buf_u_half_rd_addr), .b_out(buf_u_half_rd_dout) - ); - - wire [BUFFER_ADDR_BITS-1:0] buf_v_half_wr_addr; - wire [BUFFER_ADDR_BITS-1:0] buf_v_half_rd_addr; - wire buf_v_half_wr_en; - wire [ 32-1:0] buf_v_half_wr_din; - wire [ 32-1:0] buf_v_half_rd_dout; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_v_half - ( .clk(clk), - .a_addr(buf_v_half_wr_addr), .a_out(), .a_wr(buf_v_half_wr_en), .a_in(buf_v_half_wr_din), - .b_addr(buf_v_half_rd_addr), .b_out(buf_v_half_rd_dout) - ); - - wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_wr_addr; - wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_rd_addr; - wire buf_u_minus_v_half_wr_en; - wire [ 32-1:0] buf_u_minus_v_half_wr_din; - wire [ 32-1:0] buf_u_minus_v_half_rd_dout; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_u_minus_v_half - ( .clk(clk), - .a_addr(buf_u_minus_v_half_wr_addr), .a_out(), .a_wr(buf_u_minus_v_half_wr_en), .a_in(buf_u_minus_v_half_wr_din), - .b_addr(buf_u_minus_v_half_rd_addr), .b_out(buf_u_minus_v_half_rd_dout) - ); - - wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_wr_addr; - wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_rd_addr; - wire buf_v_minus_u_half_wr_en; - wire [ 32-1:0] buf_v_minus_u_half_wr_din; - wire [ 32-1:0] buf_v_minus_u_half_rd_dout; - - bram_1rw_1ro_readfirst # - ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) - ) - buf_v_minus_u_half - ( .clk(clk), - .a_addr(buf_v_minus_u_half_wr_addr), .a_out(), .a_wr(buf_v_minus_u_half_wr_en), .a_in(buf_v_minus_u_half_wr_din), - .b_addr(buf_v_minus_u_half_rd_addr), .b_out(buf_v_minus_u_half_rd_dout) - ); - - - // - // Helper Modules - // - wire helper_init_ena; - wire helper_invert_precalc_ena; - wire helper_invert_compare_ena; - wire helper_invert_update_ena; - wire helper_reduce_precalc_ena; - wire helper_reduce_update_ena; - wire helper_copy_ena; - - wire helper_init_rdy; - wire helper_invert_precalc_rdy; - wire helper_invert_compare_rdy; - wire helper_invert_update_rdy; - wire helper_reduce_precalc_rdy; - wire helper_reduce_update_rdy; - wire helper_copy_rdy; - - wire helper_init_done = helper_init_rdy && !helper_init_ena; - wire helper_invert_precalc_done = helper_invert_precalc_rdy && !helper_invert_precalc_ena; - wire helper_invert_compare_done = helper_invert_compare_rdy && !helper_invert_compare_ena; - wire helper_invert_update_done = helper_invert_update_rdy && !helper_invert_update_ena; - wire helper_reduce_precalc_done = helper_reduce_precalc_rdy && !helper_reduce_precalc_ena; - wire helper_reduce_update_done = helper_reduce_update_rdy && !helper_reduce_update_ena; - wire helper_copy_done = helper_copy_rdy && !helper_copy_ena; - - - // - // Helper Module - Initialization - // - wire [ BUFFER_ADDR_BITS-1:0] helper_init_r_addr; - wire [ BUFFER_ADDR_BITS-1:0] helper_init_s_addr; - wire [ BUFFER_ADDR_BITS-1:0] helper_init_u_addr; - wire [ BUFFER_ADDR_BITS-1:0] helper_init_v_addr; - wire [OPERAND_ADDR_BITS-1:0] helper_init_q_addr; - - wire helper_init_r_wren; - wire helper_init_s_wren; - wire helper_init_u_wren; - wire helper_init_v_wren; - - wire [ 32-1:0] helper_init_r_data; - wire [ 32-1:0] helper_init_s_data; - wire [ 32-1:0] helper_init_u_data; - wire [ 32-1:0] helper_init_v_data; - - modinv_helper_init # - ( - .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), - .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS), - - .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), - .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) - ) - helper_init - ( - .clk (clk), - .rst_n (rst_n), - - .ena (helper_init_ena), - .rdy (helper_init_rdy), - - .a_addr (a_addr), - .q_addr (helper_init_q_addr), - - .r_addr (helper_init_r_addr), - .s_addr (helper_init_s_addr), - .u_addr (helper_init_u_addr), - .v_addr (helper_init_v_addr), - - .q_din (q_din), - .a_din (a_din), - - .r_dout (helper_init_r_data), - .s_dout (helper_init_s_data), - .u_dout (helper_init_u_data), - .v_dout (helper_init_v_data), - - .r_wren (helper_init_r_wren), - .s_wren (helper_init_s_wren), - .u_wren (helper_init_u_wren), - .v_wren (helper_init_v_wren) - ); - - - // - // Helper Module - Inversion Pre-Calculation - // - wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_r_addr; - wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_s_addr; - wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_u_addr; - wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_v_addr; - - modinv_helper_invert_precalc # - ( - .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), - .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) - ) - helper_invert_precalc - ( - .clk (clk), - .rst_n (rst_n), - - .ena (helper_invert_precalc_ena), - .rdy (helper_invert_precalc_rdy), - - .r_addr (helper_invert_precalc_r_addr), - .s_addr (helper_invert_precalc_s_addr), - .u_addr (helper_invert_precalc_u_addr), - .v_addr (helper_invert_precalc_v_addr), - - .r_din (buf_r_rd_dout), - .s_din (buf_s_rd_dout), - .u_din (buf_u_rd_dout), - .v_din (buf_v_rd_dout), - - .r_dbl_addr (buf_r_dbl_wr_addr), - .s_dbl_addr (buf_s_dbl_wr_addr), - .r_plus_s_addr (buf_r_plus_s_wr_addr), - - .u_half_addr (buf_u_half_wr_addr), - .v_half_addr (buf_v_half_wr_addr), - .u_minus_v_addr (buf_u_minus_v_wr_addr), - .v_minus_u_addr (buf_v_minus_u_wr_addr), - .u_minus_v_half_addr (buf_u_minus_v_half_wr_addr), - .v_minus_u_half_addr (buf_v_minus_u_half_wr_addr), - - .r_dbl_dout (buf_r_dbl_wr_din), - .s_dbl_dout (buf_s_dbl_wr_din), - .r_plus_s_dout (buf_r_plus_s_wr_din), - - .u_half_dout (buf_u_half_wr_din), - .v_half_dout (buf_v_half_wr_din), - .u_minus_v_dout (buf_u_minus_v_wr_din), - .v_minus_u_dout (buf_v_minus_u_wr_din), - .u_minus_v_half_dout (buf_u_minus_v_half_wr_din), - .v_minus_u_half_dout (buf_v_minus_u_half_wr_din), - - .r_dbl_wren (buf_r_dbl_wr_en), - .s_dbl_wren (buf_s_dbl_wr_en), - .r_plus_s_wren (buf_r_plus_s_wr_en), - - .u_half_wren (buf_u_half_wr_en), - .v_half_wren (buf_v_half_wr_en), - .u_minus_v_wren (buf_u_minus_v_wr_en), - .v_minus_u_wren (buf_v_minus_u_wr_en), - .u_minus_v_half_wren (buf_u_minus_v_half_wr_en), - .v_minus_u_half_wren (buf_v_minus_u_half_wr_en), - - .u_minus_v_din (buf_u_minus_v_wr_dout), - .v_minus_u_din (buf_v_minus_u_wr_dout) - ); - - - // - // Helper Module - Inversion Comparison - // - wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_u_addr; - wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_v_addr; - - wire flag_invert_u_gt_v; - wire flag_invert_v_eq_1; - wire flag_invert_u_is_even; - wire flag_invert_v_is_even; - - modinv_helper_invert_compare # - ( - .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), - .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) - ) - helper_invert_compare - ( - .clk (clk), - .rst_n (rst_n), - - .ena (helper_invert_compare_ena), - .rdy (helper_invert_compare_rdy), - - .u_addr (helper_invert_compare_u_addr), - .v_addr (helper_invert_compare_v_addr), - - .u_din (buf_u_rd_dout), - .v_din (buf_v_rd_dout), - - .u_gt_v (flag_invert_u_gt_v), - .v_eq_1 (flag_invert_v_eq_1), - .u_is_even (flag_invert_u_is_even), - .v_is_even (flag_invert_v_is_even) - ); - - - // - // Helper Module - Inversion Update - // - wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_r_addr; - wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_s_addr; - wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_u_addr; - wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_v_addr; - - wire helper_invert_update_r_wren; - wire helper_invert_update_s_wren; - wire helper_invert_update_u_wren; - wire helper_invert_update_v_wren; - - wire [ 32-1:0] helper_invert_update_r_data; - wire [ 32-1:0] helper_invert_update_s_data; - wire [ 32-1:0] helper_invert_update_u_data; - wire [ 32-1:0] helper_invert_update_v_data; - - modinv_helper_invert_update # - ( - .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), - .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) - ) - helper_invert_update - ( - .clk (clk), - .rst_n (rst_n), - - .ena (helper_invert_update_ena), - .rdy (helper_invert_update_rdy), - - .u_gt_v (flag_invert_u_gt_v), - .v_eq_1 (flag_invert_v_eq_1), - .u_is_even (flag_invert_u_is_even), - .v_is_even (flag_invert_v_is_even), - - .r_addr (helper_invert_update_r_addr), - .s_addr (helper_invert_update_s_addr), - .u_addr (helper_invert_update_u_addr), - .v_addr (helper_invert_update_v_addr), - - .r_wren (helper_invert_update_r_wren), - .s_wren (helper_invert_update_s_wren), - .u_wren (helper_invert_update_u_wren), - .v_wren (helper_invert_update_v_wren), - - .r_dout (helper_invert_update_r_data), - .s_dout (helper_invert_update_s_data), - .u_dout (helper_invert_update_u_data), - .v_dout (helper_invert_update_v_data), - - .r_dbl_addr (buf_r_dbl_rd_addr), - .s_dbl_addr (buf_s_dbl_rd_addr), - .r_plus_s_addr (buf_r_plus_s_rd_addr), - .u_half_addr (buf_u_half_rd_addr), - .v_half_addr (buf_v_half_rd_addr), - .u_minus_v_half_addr (buf_u_minus_v_half_rd_addr), - .v_minus_u_half_addr (buf_v_minus_u_half_rd_addr), - - .r_dbl_din (buf_r_dbl_rd_dout), - .s_dbl_din (buf_s_dbl_rd_dout), - .r_plus_s_din (buf_r_plus_s_rd_dout), - .u_half_din (buf_u_half_rd_dout), - .v_half_din (buf_v_half_rd_dout), - .u_minus_v_half_din (buf_u_minus_v_half_rd_dout), - .v_minus_u_half_din (buf_v_minus_u_half_rd_dout) - ); - - - // - // Helper Module - Reduction Pre-Calculation - // - wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_r_addr; - wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_s_addr; - wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_u_addr; - wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_v_addr; - wire [OPERAND_ADDR_BITS-1:0] helper_reduce_precalc_q_addr; - - wire helper_reduce_precalc_r_wren; - wire helper_reduce_precalc_u_wren; - wire helper_reduce_precalc_v_wren; - - wire [ 32-1:0] helper_reduce_precalc_r_data; - wire [ 32-1:0] helper_reduce_precalc_u_data; - wire [ 32-1:0] helper_reduce_precalc_v_data; - - wire flag_reduce_s_is_odd; - wire flag_invert_k_is_nul; - - modinv_helper_reduce_precalc # - ( - .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), - .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS), - .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), - .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS), - .K_NUM_BITS (K_NUM_BITS) - ) - helper_reduce_precalc - ( - .clk (clk), - .rst_n (rst_n), - - .ena (helper_reduce_precalc_ena), - .rdy (helper_reduce_precalc_rdy), - - .r_addr (helper_reduce_precalc_r_addr), - .s_addr (helper_reduce_precalc_s_addr), - .u_addr (helper_reduce_precalc_u_addr), - .v_addr (helper_reduce_precalc_v_addr), - .q_addr (helper_reduce_precalc_q_addr), - - .k (k), - - .s_is_odd (flag_reduce_s_is_odd), - .k_is_nul (flag_reduce_k_is_nul), - - .r_din (buf_r_wr_dout), - .s_din (buf_s_rd_dout), - .q_din (q_din), - - .r_wren (helper_reduce_precalc_r_wren), - .u_wren (helper_reduce_precalc_u_wren), - .v_wren (helper_reduce_precalc_v_wren), - - .r_dout (helper_reduce_precalc_r_data), - .u_dout (helper_reduce_precalc_u_data), - .v_dout (helper_reduce_precalc_v_data) - ); - - // - // Helper Module - Reduction Update - // - wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_s_addr; - wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_u_addr; - wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_v_addr; - - wire helper_reduce_update_s_wren; - - wire [ 32-1:0] helper_reduce_update_s_data; - - modinv_helper_reduce_update # - ( - .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), - .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) - ) - helper_reduce_update - ( - .clk (clk), - .rst_n (rst_n), - - .ena (helper_reduce_update_ena), - .rdy (helper_reduce_update_rdy), - - .s_is_odd (flag_reduce_s_is_odd), - .k_is_nul (flag_reduce_k_is_nul), - - .s_addr (helper_reduce_update_s_addr), - .u_addr (helper_reduce_update_u_addr), - .v_addr (helper_reduce_update_v_addr), - - .s_wren (helper_reduce_update_s_wren), - - .s_dout (helper_reduce_update_s_data), - - .u_din (buf_u_rd_dout), - .v_din (buf_v_rd_dout) - ); - - - // - // Helper Module - Copying - // - wire [BUFFER_ADDR_BITS-1:0] helper_copy_s_addr; - - modinv_helper_copy # - ( - .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), - .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS), - - .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), - .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) - ) - helper_copy - ( - .clk (clk), - .rst_n (rst_n), - - .ena (helper_copy_ena), - .rdy (helper_copy_rdy), - - .s_addr (helper_copy_s_addr), - .a1_addr (a1_addr), - - .s_din (buf_s_rd_dout), - - .a1_dout (a1_dout), - - .a1_wren (a1_wren) - ); - - - // - // Round Counter - // - reg [ROUND_COUNTER_BITS-1:0] round_counter; - wire [ROUND_COUNTER_BITS-1:0] round_counter_max = LOOP_NUM_ROUNDS - 1; - wire [ROUND_COUNTER_BITS-1:0] round_counter_zero = {ROUND_COUNTER_BITS{1'b0}}; - wire [ROUND_COUNTER_BITS-1:0] round_counter_next = - (round_counter < round_counter_max) ? round_counter + 1'b1 : round_counter_zero; - - - // - // FSM - // - localparam FSM_STATE_IDLE = 4'd0; - - localparam FSM_STATE_INIT = 4'd1; - - localparam FSM_STATE_INVERT_PRECALC = 4'd11; - localparam FSM_STATE_INVERT_COMPARE = 4'd12; - localparam FSM_STATE_INVERT_UPDATE = 4'd13; - - localparam FSM_STATE_REDUCE_PRECALC = 4'd14; - localparam FSM_STATE_REDUCE_UPDATE = 4'd15; - - localparam FSM_STATE_COPY = 4'd2; - - localparam FSM_STATE_DONE = 4'd3; - - reg [3:0] fsm_state = FSM_STATE_IDLE; - reg [3:0] fsm_state_dly = FSM_STATE_IDLE; - - wire fsm_state_new = (fsm_state != fsm_state_dly); - - wire [3:0] fsm_state_invert_next = (round_counter < round_counter_max) ? - FSM_STATE_INVERT_PRECALC : FSM_STATE_REDUCE_PRECALC; - - wire [3:0] fsm_state_reduce_next = (round_counter < round_counter_max) ? - FSM_STATE_REDUCE_PRECALC : FSM_STATE_COPY; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; - else case (fsm_state) - FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_INIT : FSM_STATE_IDLE; - FSM_STATE_INIT: fsm_state <= helper_init_done ? FSM_STATE_INVERT_PRECALC : FSM_STATE_INIT; - FSM_STATE_INVERT_PRECALC: fsm_state <= helper_invert_precalc_done ? FSM_STATE_INVERT_COMPARE : FSM_STATE_INVERT_PRECALC; - FSM_STATE_INVERT_COMPARE: fsm_state <= helper_invert_compare_done ? FSM_STATE_INVERT_UPDATE : FSM_STATE_INVERT_COMPARE; - FSM_STATE_INVERT_UPDATE: fsm_state <= helper_invert_update_done ? fsm_state_invert_next : FSM_STATE_INVERT_UPDATE; - FSM_STATE_REDUCE_PRECALC: fsm_state <= helper_reduce_precalc_done ? FSM_STATE_REDUCE_UPDATE : FSM_STATE_REDUCE_PRECALC; - FSM_STATE_REDUCE_UPDATE: fsm_state <= helper_reduce_update_done ? fsm_state_reduce_next : FSM_STATE_REDUCE_UPDATE; - FSM_STATE_COPY: fsm_state <= helper_copy_done ? FSM_STATE_DONE : FSM_STATE_COPY; - FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE; - default: fsm_state <= FSM_STATE_IDLE; - endcase - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) fsm_state_dly <= FSM_STATE_IDLE; - else fsm_state_dly <= fsm_state; - - - assign helper_init_ena = (fsm_state == FSM_STATE_INIT) && fsm_state_new; - assign helper_invert_precalc_ena = (fsm_state == FSM_STATE_INVERT_PRECALC) && fsm_state_new; - assign helper_invert_compare_ena = (fsm_state == FSM_STATE_INVERT_COMPARE) && fsm_state_new; - assign helper_invert_update_ena = (fsm_state == FSM_STATE_INVERT_UPDATE) && fsm_state_new; - assign helper_reduce_precalc_ena = (fsm_state == FSM_STATE_REDUCE_PRECALC) && fsm_state_new; - assign helper_reduce_update_ena = (fsm_state == FSM_STATE_REDUCE_UPDATE) && fsm_state_new; - assign helper_copy_ena = (fsm_state == FSM_STATE_COPY) && fsm_state_new; - - - // - // Counter Increment - // - always @(posedge clk) begin - // - if ((fsm_state == FSM_STATE_INIT) && helper_init_done) - round_counter <= round_counter_zero; - // - if ((fsm_state == FSM_STATE_INVERT_UPDATE) && helper_invert_update_done) - round_counter <= round_counter_next; - // - if ((fsm_state == FSM_STATE_REDUCE_UPDATE) && helper_reduce_update_done) - round_counter <= round_counter_next; - // - end - - - // - // Q Address Selector - // - always @(*) begin - // - case (fsm_state) - FSM_STATE_INIT: q_addr = helper_init_q_addr; - FSM_STATE_REDUCE_PRECALC: q_addr = helper_reduce_precalc_q_addr; - default: q_addr = {OPERAND_ADDR_BITS{1'bX}}; - endcase - // - end - - - // - // Buffer Address Selector - // - always @(*) begin - // - // Write Ports - // - case (fsm_state) - FSM_STATE_INIT: buf_r_wr_addr = helper_init_r_addr; - FSM_STATE_INVERT_UPDATE: buf_r_wr_addr = helper_invert_update_r_addr; - FSM_STATE_REDUCE_PRECALC: buf_r_wr_addr = helper_reduce_precalc_r_addr; - default: buf_r_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; - endcase - // - case (fsm_state) - FSM_STATE_INIT: buf_s_wr_addr = helper_init_s_addr; - FSM_STATE_INVERT_UPDATE: buf_s_wr_addr = helper_invert_update_s_addr; - FSM_STATE_REDUCE_UPDATE: buf_s_wr_addr = helper_reduce_update_s_addr; - default: buf_s_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; - endcase - // - case (fsm_state) - FSM_STATE_INIT: buf_u_wr_addr = helper_init_u_addr; - FSM_STATE_INVERT_UPDATE: buf_u_wr_addr = helper_invert_update_u_addr; - FSM_STATE_REDUCE_PRECALC: buf_u_wr_addr = helper_reduce_precalc_u_addr; - default: buf_u_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; - endcase - // - case (fsm_state) - FSM_STATE_INIT: buf_v_wr_addr = helper_init_v_addr; - FSM_STATE_INVERT_UPDATE: buf_v_wr_addr = helper_invert_update_v_addr; - FSM_STATE_REDUCE_PRECALC: buf_v_wr_addr = helper_reduce_precalc_v_addr; - default: buf_v_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; - endcase - // - // Read Ports - // - case (fsm_state) - FSM_STATE_INVERT_PRECALC: buf_r_rd_addr = helper_invert_precalc_r_addr; - default: buf_r_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; - endcase - // - case (fsm_state) - FSM_STATE_INVERT_PRECALC: buf_s_rd_addr = helper_invert_precalc_s_addr; - FSM_STATE_REDUCE_PRECALC: buf_s_rd_addr = helper_reduce_precalc_s_addr; - FSM_STATE_COPY: buf_s_rd_addr = helper_copy_s_addr; - default: buf_s_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; - endcase - // - case (fsm_state) - FSM_STATE_INVERT_PRECALC: buf_u_rd_addr = helper_invert_precalc_u_addr; - FSM_STATE_INVERT_COMPARE: buf_u_rd_addr = helper_invert_compare_u_addr; - FSM_STATE_REDUCE_UPDATE: buf_u_rd_addr = helper_reduce_update_u_addr; - default: buf_u_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; - endcase - // - case (fsm_state) - FSM_STATE_INVERT_PRECALC: buf_v_rd_addr = helper_invert_precalc_v_addr; - FSM_STATE_INVERT_COMPARE: buf_v_rd_addr = helper_invert_compare_v_addr; - FSM_STATE_REDUCE_UPDATE: buf_v_rd_addr = helper_reduce_update_v_addr; - default: buf_v_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; - endcase - // - end - - - // - // Buffer Write Enable Logic - // - always @(*) begin - // - // Write Ports - // - case (fsm_state) - FSM_STATE_INIT: buf_r_wr_en = helper_init_r_wren; - FSM_STATE_INVERT_UPDATE: buf_r_wr_en = helper_invert_update_r_wren; - FSM_STATE_REDUCE_PRECALC: buf_r_wr_en = helper_reduce_precalc_r_wren; - default: buf_r_wr_en = 1'b0; - endcase - // - case (fsm_state) - FSM_STATE_INIT: buf_s_wr_en = helper_init_s_wren; - FSM_STATE_INVERT_UPDATE: buf_s_wr_en = helper_invert_update_s_wren; - FSM_STATE_REDUCE_UPDATE: buf_s_wr_en = helper_reduce_update_s_wren; - default: buf_s_wr_en = 1'b0; - endcase - // - case (fsm_state) - FSM_STATE_INIT: buf_u_wr_en = helper_init_u_wren; - FSM_STATE_INVERT_UPDATE: buf_u_wr_en = helper_invert_update_u_wren; - FSM_STATE_REDUCE_PRECALC: buf_u_wr_en = helper_reduce_precalc_u_wren; - default: buf_u_wr_en = 1'b0; - endcase - // - case (fsm_state) - FSM_STATE_INIT: buf_v_wr_en = helper_init_v_wren; - FSM_STATE_INVERT_UPDATE: buf_v_wr_en = helper_invert_update_v_wren; - FSM_STATE_REDUCE_PRECALC: buf_v_wr_en = helper_reduce_precalc_v_wren; - default: buf_v_wr_en = 1'b0; - endcase - // - end - - - // - // Buffer Write Data Selector - // - always @(*) begin - // - case (fsm_state) - FSM_STATE_INIT: buf_r_wr_din = helper_init_r_data; - FSM_STATE_INVERT_UPDATE: buf_r_wr_din = helper_invert_update_r_data; - FSM_STATE_REDUCE_PRECALC: buf_r_wr_din = helper_reduce_precalc_r_data; - default: buf_r_wr_din = {32{1'bX}}; - endcase - // - case (fsm_state) - FSM_STATE_INIT: buf_s_wr_din = helper_init_s_data; - FSM_STATE_INVERT_UPDATE: buf_s_wr_din = helper_invert_update_s_data; - FSM_STATE_REDUCE_UPDATE: buf_s_wr_din = helper_reduce_update_s_data; - default: buf_s_wr_din = {32{1'bX}}; - endcase - // - case (fsm_state) - FSM_STATE_INIT: buf_u_wr_din = helper_init_u_data; - FSM_STATE_INVERT_UPDATE: buf_u_wr_din = helper_invert_update_u_data; - FSM_STATE_REDUCE_PRECALC: buf_u_wr_din = helper_reduce_precalc_u_data; - default: buf_u_wr_din = {32{1'bX}}; - endcase - // - case (fsm_state) - FSM_STATE_INIT: buf_v_wr_din = helper_init_v_data; - FSM_STATE_INVERT_UPDATE: buf_v_wr_din = helper_invert_update_v_data; - FSM_STATE_REDUCE_PRECALC: buf_v_wr_din = helper_reduce_precalc_v_data; - default: buf_v_wr_din = {32{1'bX}}; - endcase - // - end - - - // - // Ready Logic - // - reg rdy_reg = 1'b1; - - assign rdy = rdy_reg; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) rdy_reg <= 1'b1; - else begin - - /* clear */ - if (rdy && ena) rdy_reg <= 1'b0; - - /* set */ - if (!rdy && (fsm_state == FSM_STATE_DONE)) rdy_reg <= 1'b1; - - end - - - // - // Store Redundant Power of 2 (K) - // - always @(posedge clk) - // - if (helper_init_ena) - k <= {K_NUM_BITS{1'b0}}; - else begin - - if (helper_invert_update_ena && !flag_invert_v_eq_1) - k <= k + 1'b1; - - if (helper_reduce_update_ena && (k != {K_NUM_BITS{1'b0}})) - k <= k - 1'b1; - - end - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_multiplier_384.v b/rtl/modular/modular_multiplier_384.v index 705cd44..b2e1251 100644 --- a/rtl/modular/modular_multiplier_384.v +++ b/rtl/modular/modular_multiplier_384.v @@ -1,402 +1,402 @@ -//------------------------------------------------------------------------------ -// -// modular_multiplier_384.v -// ----------------------------------------------------------------------------- -// Modular multiplier. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module modular_multiplier_384 - ( - clk, rst_n, - ena, rdy, - a_addr, b_addr, n_addr, p_addr, p_wren, - a_din, b_din, n_din, p_dout - ); - - - // - // Constants - // - localparam OPERAND_NUM_WORDS = 12; - localparam WORD_COUNTER_WIDTH = 4; - - - // - // Handy Numbers - // - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; - - - // - // Handy Functions - // - function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; - input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; - begin - WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? - WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; - end - endfunction - - function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST; - input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; - begin - WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? - WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; - end - endfunction - - - // - // Ports - // - input wire clk; // system clock - input wire rst_n; // active-low async reset - - input wire ena; // enable input - output wire rdy; // ready output - - output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word - output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word - output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word - output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word - output wire p_wren; // store current P word now - - input wire [ 31:0] a_din; // A - input wire [ 31:0] b_din; // B - input wire [ 31:0] n_din; // N (must be P-384!) - output wire [ 31:0] p_dout; // P = A * B mod N - - - // - // Word Indices - // - reg [WORD_COUNTER_WIDTH-1:0] index_a; - reg [WORD_COUNTER_WIDTH-1:0] index_b; - - /* map registers to output ports */ - assign a_addr = index_a; - assign b_addr = index_b; - - // - // FSM - // - localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1; - - reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; - - assign rdy = fsm_shreg[0]; - - wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; - wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; - wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; - wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; - wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)]; - wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)]; - wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)]; - wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)]; - wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)]; - wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)]; - - wire inc_index_a = |fsm_shreg_inc_index_a; - wire store_word_a = |fsm_shreg_store_word_a; - wire inc_index_b = |fsm_shreg_inc_index_b; - wire clear_mac_ab = |fsm_shreg_inc_index_b; - wire shift_wide_a = |fsm_shreg_inc_index_b; - wire enable_mac_ab = |fsm_shreg_inc_index_b; - wire store_si_msb = |fsm_shreg_store_si_msb; - wire store_si_lsb = fsm_shreg_store_si_lsb; - wire shift_si = |fsm_shreg_shift_si; - wire mask_cw1_sum = fsm_shreg_mask_cw1_sum; - wire store_c_word = |fsm_shreg_store_c_word; - wire reduce_start = fsm_shreg_reduce_start; - wire reduce_stop = fsm_shreg_reduce_stop; - - - // - // FSM Logic - // - wire reduce_done; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) - // - fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; - // - else begin - // - if (rdy) - fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; - // - else if (!reduce_stop || reduce_done) - fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; - // - end - - - // - // Word Index Increment Logic - // - reg index_b_ff; - - always @(posedge clk) - // - if (inc_index_b) index_b_ff <= ~index_b_ff; - else index_b_ff <= 1'b0; - - always @(posedge clk) - // - if (rdy) begin - // - index_a <= WORD_INDEX_ZERO; - index_b <= WORD_INDEX_LAST; - // - end else begin - // - if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); - if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); - // - end - - - // - // Wide Operand Buffer - // - reg [383:0] buf_a_wide; - - always @(posedge clk) - // - if (store_word_a) - buf_a_wide <= {buf_a_wide[16 +: 384 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[384 - 2 * 16 +: 16]}; - else if (shift_wide_a) - buf_a_wide <= {buf_a_wide[384-(16+1):0], buf_a_wide[384-16+:16]}; - - - // - // Multiplier Array - // - wire mac_inhibit; // control signal to pause all accumulators - - wire [46: 0] mac[0:23]; // outputs of all accumulators - reg [23: 0] mac_clear; // individual per-accumulator clear flag - - assign mac_inhibit = ~enable_mac_ab; - - always @(posedge clk) - // - if (!clear_mac_ab) - mac_clear <= {24{1'b1}}; - else begin - - if (mac_clear == {24{1'b1}}) - mac_clear <= {{22{1'b0}}, 1'b1, 1'b0}; - else - mac_clear <= (mac_clear[23] == 1'b0) ? {mac_clear[22:0], 1'b0} : {24{1'b1}}; - - - end - - // - // Array of parallel multipliers - // - genvar i; - generate for (i=0; i<24; i=i+1) - begin : gen_mac_array - // - mac16_wrapper mac16_inst - ( - .clk (clk), - .ce (~mac_inhibit), - - .clr (mac_clear[i]), - - .a (buf_a_wide[16*i+:16]), - .b (index_b_ff ? b_din[15:0] : b_din[31:16]), - .s (mac[i]) - ); - // - end - endgenerate - - // - // Intermediate Words - // - reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; - reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; - - - wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; - wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; - - generate for (i=0; i<24; i=i+1) - begin : gen_si_lsb_new - assign si_lsb_new[47*i+:47] = mac[23-i]; - end - endgenerate - - generate for (i=1; i<24; i=i+1) - begin : gen_si_msb_new - assign si_msb_new[47*(23-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(23-i)+:47]; - end - endgenerate - - always @(posedge clk) begin - // - if (shift_si) begin - si_msb <= {{2*47{1'b0}}, si_msb[23*47-1:2*47]}; - si_lsb <= {si_msb[2*47-1:0], si_lsb[24*47-1:2*47]}; - end else begin - - if (store_si_msb) - si_msb <= si_msb_new; - - if (store_si_lsb) - si_lsb <= si_lsb_new; - end - - end - - - // - // Accumulators - // - wire [46: 0] add47_cw0_s; - wire [46: 0] add47_cw1_s; - - - // - // cw0, b, cw1, b - // - reg [30: 0] si_prev_dly; - reg [15: 0] si_next_dly; - - always @(posedge clk) - // - if (shift_si) - si_prev_dly <= si_lsb[93:63]; - else - si_prev_dly <= {31{1'b0}}; - - always @(posedge clk) - // - si_next_dly <= si_lsb[62:47]; - - wire [46: 0] add47_cw0_a = si_lsb[46:0]; - wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; - - wire [46: 0] add47_cw1_a = add47_cw0_s; - wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}}; - - adder47_wrapper add47_cw0_inst - ( - .clk (clk), - .a (add47_cw0_a), - .b (add47_cw0_b), - .s (add47_cw0_s) - ); - - adder47_wrapper add47_cw1_inst - ( - .clk (clk), - .a (add47_cw1_a), - .b (add47_cw1_b), - .s (add47_cw1_s) - ); - - - - // - // Full-Size Product - // - reg [WORD_COUNTER_WIDTH:0] bram_c_addr; - - wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; - wire [ 31:0] reduce_c_word; - - always @(posedge clk) - // - if (store_c_word) - bram_c_addr <= bram_c_addr + 1'b1; - else - bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}}; - - bram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1) - ) - bram_c_inst - ( - .clk (clk), - - .a_addr (bram_c_addr), - .a_wr (store_c_word), - .a_in (add47_cw1_s[31:0]), - .a_out (), - - .b_addr (reduce_c_addr), - .b_out (reduce_c_word) - ); - - - // - // Reduction Stage - // - modular_reductor_384 reduce_384_inst - ( - .clk (clk), - .rst_n (rst_n), - - .ena (reduce_start), - .rdy (reduce_done), - - .x_addr (reduce_c_addr), - .n_addr (n_addr), - .p_addr (p_addr), - .p_wren (p_wren), - - .x_din (reduce_c_word), - .n_din (n_din), - .p_dout (p_dout) - ); - - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ +// +// modular_multiplier_384.v +// ----------------------------------------------------------------------------- +// Modular multiplier. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_multiplier_384 + ( + clk, rst_n, + ena, rdy, + a_addr, b_addr, n_addr, p_addr, p_wren, + a_din, b_din, n_din, p_dout + ); + + + // + // Constants + // + localparam OPERAND_NUM_WORDS = 12; + localparam WORD_COUNTER_WIDTH = 4; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction + + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word + output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + output wire p_wren; // store current P word now + + input wire [ 31:0] a_din; // A + input wire [ 31:0] b_din; // B + input wire [ 31:0] n_din; // N (must be P-384!) + output wire [ 31:0] p_dout; // P = A * B mod N + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_a; + reg [WORD_COUNTER_WIDTH-1:0] index_b; + + /* map registers to output ports */ + assign a_addr = index_a; + assign b_addr = index_b; + + // + // FSM + // + localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; + wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)]; + wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)]; + wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)]; + + wire inc_index_a = |fsm_shreg_inc_index_a; + wire store_word_a = |fsm_shreg_store_word_a; + wire inc_index_b = |fsm_shreg_inc_index_b; + wire clear_mac_ab = |fsm_shreg_inc_index_b; + wire shift_wide_a = |fsm_shreg_inc_index_b; + wire enable_mac_ab = |fsm_shreg_inc_index_b; + wire store_si_msb = |fsm_shreg_store_si_msb; + wire store_si_lsb = fsm_shreg_store_si_lsb; + wire shift_si = |fsm_shreg_shift_si; + wire mask_cw1_sum = fsm_shreg_mask_cw1_sum; + wire store_c_word = |fsm_shreg_store_c_word; + wire reduce_start = fsm_shreg_reduce_start; + wire reduce_stop = fsm_shreg_reduce_stop; + + + // + // FSM Logic + // + wire reduce_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) + fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else if (!reduce_stop || reduce_done) + fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Word Index Increment Logic + // + reg index_b_ff; + + always @(posedge clk) + // + if (inc_index_b) index_b_ff <= ~index_b_ff; + else index_b_ff <= 1'b0; + + always @(posedge clk) + // + if (rdy) begin + // + index_a <= WORD_INDEX_ZERO; + index_b <= WORD_INDEX_LAST; + // + end else begin + // + if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); + if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); + // + end + + + // + // Wide Operand Buffer + // + reg [383:0] buf_a_wide; + + always @(posedge clk) + // + if (store_word_a) + buf_a_wide <= {buf_a_wide[16 +: 384 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[384 - 2 * 16 +: 16]}; + else if (shift_wide_a) + buf_a_wide <= {buf_a_wide[384-(16+1):0], buf_a_wide[384-16+:16]}; + + + // + // Multiplier Array + // + wire mac_inhibit; // control signal to pause all accumulators + + wire [46: 0] mac[0:23]; // outputs of all accumulators + reg [23: 0] mac_clear; // individual per-accumulator clear flag + + assign mac_inhibit = ~enable_mac_ab; + + always @(posedge clk) + // + if (!clear_mac_ab) + mac_clear <= {24{1'b1}}; + else begin + + if (mac_clear == {24{1'b1}}) + mac_clear <= {{22{1'b0}}, 1'b1, 1'b0}; + else + mac_clear <= (mac_clear[23] == 1'b0) ? {mac_clear[22:0], 1'b0} : {24{1'b1}}; + + + end + + // + // Array of parallel multipliers + // + genvar i; + generate for (i=0; i<24; i=i+1) + begin : gen_mac_array + // + mac16_wrapper mac16_inst + ( + .clk (clk), + .ce (~mac_inhibit), + + .clr (mac_clear[i]), + + .a (buf_a_wide[16*i+:16]), + .b (index_b_ff ? b_din[15:0] : b_din[31:16]), + .s (mac[i]) + ); + // + end + endgenerate + + // + // Intermediate Words + // + reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; + reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; + + + wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; + wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; + + generate for (i=0; i<24; i=i+1) + begin : gen_si_lsb_new + assign si_lsb_new[47*i+:47] = mac[23-i]; + end + endgenerate + + generate for (i=1; i<24; i=i+1) + begin : gen_si_msb_new + assign si_msb_new[47*(23-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(23-i)+:47]; + end + endgenerate + + always @(posedge clk) begin + // + if (shift_si) begin + si_msb <= {{2*47{1'b0}}, si_msb[23*47-1:2*47]}; + si_lsb <= {si_msb[2*47-1:0], si_lsb[24*47-1:2*47]}; + end else begin + + if (store_si_msb) + si_msb <= si_msb_new; + + if (store_si_lsb) + si_lsb <= si_lsb_new; + end + + end + + + // + // Accumulators + // + wire [46: 0] add47_cw0_s; + wire [46: 0] add47_cw1_s; + + + // + // cw0, b, cw1, b + // + reg [30: 0] si_prev_dly; + reg [15: 0] si_next_dly; + + always @(posedge clk) + // + if (shift_si) + si_prev_dly <= si_lsb[93:63]; + else + si_prev_dly <= {31{1'b0}}; + + always @(posedge clk) + // + si_next_dly <= si_lsb[62:47]; + + wire [46: 0] add47_cw0_a = si_lsb[46:0]; + wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; + + wire [46: 0] add47_cw1_a = add47_cw0_s; + wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}}; + + adder47_wrapper add47_cw0_inst + ( + .clk (clk), + .a (add47_cw0_a), + .b (add47_cw0_b), + .s (add47_cw0_s) + ); + + adder47_wrapper add47_cw1_inst + ( + .clk (clk), + .a (add47_cw1_a), + .b (add47_cw1_b), + .s (add47_cw1_s) + ); + + + + // + // Full-Size Product + // + reg [WORD_COUNTER_WIDTH:0] bram_c_addr; + + wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; + wire [ 31:0] reduce_c_word; + + always @(posedge clk) + // + if (store_c_word) + bram_c_addr <= bram_c_addr + 1'b1; + else + bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}}; + + bram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1) + ) + bram_c_inst + ( + .clk (clk), + + .a_addr (bram_c_addr), + .a_wr (store_c_word), + .a_in (add47_cw1_s[31:0]), + .a_out (), + + .b_addr (reduce_c_addr), + .b_out (reduce_c_word) + ); + + + // + // Reduction Stage + // + modular_reductor_384 reduce_384_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_done), + + .x_addr (reduce_c_addr), + .n_addr (n_addr), + .p_addr (p_addr), + .p_wren (p_wren), + + .x_din (reduce_c_word), + .n_din (n_din), + .p_dout (p_dout) + ); + + + endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_reductor_384.v b/rtl/modular/modular_reductor_384.v index 4aea5fe..866ad7d 100644 --- a/rtl/modular/modular_reductor_384.v +++ b/rtl/modular/modular_reductor_384.v @@ -1,739 +1,739 @@ -//------------------------------------------------------------------------------ -// -// modular_reductor_384.v -// ----------------------------------------------------------------------------- -// Modular reductor. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module modular_reductor_384 - ( - clk, rst_n, - ena, rdy, - x_addr, n_addr, p_addr, p_wren, - x_din, n_din, p_dout - ); - - // - // Constants - // - localparam OPERAND_NUM_WORDS = 12; - localparam WORD_COUNTER_WIDTH = 4; - - - // - // Handy Numbers - // - localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0; - localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1; - - - // - // Handy Functions - // - function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST; - input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT; - begin - WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? - WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; - end - endfunction - - - // - // Ports - // - input wire clk; // system clock - input wire rst_n; // active-low async reset - - input wire ena; // enable input - output wire rdy; // ready output - - output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word - output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word - output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word - output wire p_wren; // store current P word now - - input wire [ 31:0] x_din; // X - input wire [ 31:0] n_din; // N (must be P-256!) - output wire [ 31:0] p_dout; // P = X mod N - - - // - // Word Indices - // - reg [WORD_COUNTER_WIDTH:0] index_x; - - - /* map registers to output ports */ - assign x_addr = index_x; - - - // - // FSM - // - localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1; - - reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; - - assign rdy = fsm_shreg[0]; - - wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS]; - wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS]; - wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5]; - - wire [5-1:0] fsm_shreg_reduce_stage_start; - wire [5-1:0] fsm_shreg_reduce_stage_stop; - - genvar s; - generate for (s=0; s<5; s=s+1) - begin : gen_fsm_shreg_reduce_stages - assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1]; - assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2]; - end - endgenerate - - wire inc_index_x = |fsm_shreg_inc_index_x; - wire store_word_z = |fsm_shreg_store_word_z; - wire reduce_start = |fsm_shreg_reduce_stage_start; - wire reduce_stop = |fsm_shreg_reduce_stage_stop; - wire store_p = fsm_shreg_reduce_stage_stop[0]; - - - wire reduce_adder0_done; - wire reduce_adder1_done; - wire reduce_subtractor_done; - - wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) - // - fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; - // - else begin - // - if (rdy) - // - fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; - // - else if (!reduce_stop || reduce_done_all) - // - fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; - // - end - - - // - // Word Index Increment Logic - // - always @(posedge clk) - // - if (rdy) - // - index_x <= WORD_INDEX_LAST; - // - else if (inc_index_x) - // - index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x); - - - // - // Look-up Table - // - - // - // Take a look at the corresponding C model for more information - // on how exactly the math behind reduction works. The first step - // is to assemble nine 384-bit values ("z-words") from 32-bit parts - // of the full 768-bit product ("c-word"). The problem with z10 is - // that it contains c23 two times. This implementation scans from - // c23 to c0 and writes current part of c-word into corresponding - // parts of z-words. Since those 32-bit parts are stored in block - // memories, one source word can only be written to one location in - // every z-word at a time. The trick is to delay c23 and then write - // the delayed value at the corresponding location in z10 instead of - // the next c22. "z_save" flag is used to indicate that the current - // word should be delayed and written once again during the next cycle. - // - - - reg [10*WORD_COUNTER_WIDTH-1:0] z_addr; // - reg [10 -1:0] z_wren; // - reg [10 -1:0] z_mask; // mask input to store zero word - reg [10 -1:0] z_save; // save previous word once again - - always @(posedge clk) - // - if (inc_index_x) - // - case (index_x) - // - // s10 s9 s8 s7 s6 s5 s4 s3 s2 s1 - // || || || || || || || || || || - 5'd00: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd00}; - 5'd01: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd01}; - 5'd02: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd02}; - 5'd03: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd03}; - 5'd04: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd04}; - 5'd05: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd05}; - 5'd06: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd06}; - 5'd07: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd07}; - 5'd08: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd08}; - 5'd09: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd09}; - 5'd10: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd10}; - 5'd11: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd11}; - - 5'd12: z_addr <= {4'd00, 4'd00, 4'd01, 4'd01, 4'd00, 4'd04, 4'd03, 4'd00, 4'd00, 4'dxx}; - 5'd13: z_addr <= {4'd01, 4'd05, 4'd02, 4'd02, 4'd01, 4'd05, 4'd04, 4'd01, 4'd01, 4'dxx}; - 5'd14: z_addr <= {4'd02, 4'd06, 4'd03, 4'd06, 4'd02, 4'd06, 4'd05, 4'd02, 4'd02, 4'dxx}; - 5'd15: z_addr <= {4'd05, 4'd07, 4'd04, 4'd07, 4'd03, 4'd07, 4'd06, 4'd03, 4'd03, 4'dxx}; - 5'd16: z_addr <= {4'd06, 4'd08, 4'd05, 4'd08, 4'd08, 4'd08, 4'd07, 4'd04, 4'd07, 4'dxx}; - 5'd17: z_addr <= {4'd07, 4'd09, 4'd06, 4'd09, 4'd09, 4'd09, 4'd08, 4'd05, 4'd08, 4'dxx}; - 5'd18: z_addr <= {4'd08, 4'd10, 4'd07, 4'd10, 4'd10, 4'd10, 4'd09, 4'd06, 4'd09, 4'dxx}; - 5'd19: z_addr <= {4'd09, 4'd11, 4'd08, 4'd11, 4'd11, 4'd11, 4'd10, 4'd07, 4'd10, 4'dxx}; - 5'd20: z_addr <= {4'd10, 4'd01, 4'd09, 4'd00, 4'd04, 4'd03, 4'd11, 4'd08, 4'd11, 4'dxx}; - 5'd21: z_addr <= {4'd11, 4'd02, 4'd10, 4'd03, 4'd05, 4'd00, 4'd00, 4'd09, 4'd04, 4'dxx}; - 5'd22: z_addr <= {4'd04, 4'd03, 4'd11, 4'd04, 4'd06, 4'd02, 4'd01, 4'd10, 4'd05, 4'dxx}; - 5'd23: z_addr <= {4'd03, 4'd04, 4'd00, 4'd05, 4'd07, 4'd01, 4'd02, 4'd11, 4'd06, 4'dxx}; - // - default: z_addr <= {10*WORD_COUNTER_WIDTH{1'bX}}; - // - endcase - - always @(posedge clk) - // - case (index_x) - // - // 10 9 8 7 6 5 4 3 2 1 - // | | | | | | | | | | - 5'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - 5'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - 5'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - 5'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - 5'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - 5'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - 5'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - 5'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - 5'd08: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - 5'd09: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - 5'd10: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - 5'd11: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; - - 5'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - 5'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - 5'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - 5'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - 5'd16: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - 5'd17: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - 5'd18: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - 5'd19: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - 5'd20: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - 5'd21: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - 5'd22: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - 5'd23: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; - // - default: z_wren <= {10{1'b0}}; - // - endcase - - always @(posedge clk) - // - if (inc_index_x) - // - case (index_x) - // - // 10 9 8 7 6 5 4 3 2 1 - // | | | | | | | | | | - 5'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd08: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd10: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd11: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - - 5'd12: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; - 5'd13: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; - 5'd14: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; - 5'd15: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; - 5'd16: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; - 5'd17: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; - 5'd18: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; - 5'd19: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; - 5'd20: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; - 5'd21: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd22: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd23: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - // - default: z_mask <= {10{1'bX}}; - // - endcase - - always @(posedge clk) - // - if (inc_index_x) - // - case (index_x) - // - // 10 9 8 7 6 5 4 3 2 1 - // | | | | | | | | | | - 5'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - - 5'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd16: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd17: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd18: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd19: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd20: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd21: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd22: z_save <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - 5'd23: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; - // - default: z_save <= {10{1'bX}}; - // - endcase - - - // - // Intermediate Numbers - // - reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:10]; - wire [ 32-1:0] reduce_z_dout[1:10]; - - reg [31: 0] x_din_dly; - always @(posedge clk) - // - x_din_dly <= x_din; - - - genvar z; - generate for (z=1; z<=10; z=z+1) - // - begin : gen_z_bram - // - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) - bram_c_inst - ( - .clk (clk), - - .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]), - .a_wr (z_wren[z-1] & store_word_z), - .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)), - .a_out (), - - .b_addr (reduce_z_addr[z]), - .b_out (reduce_z_dout[z]) - ); - // - end - // - endgenerate - - - - - wire [ 32-1:0] bram_sum0_wr_din; - wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr; - wire bram_sum0_wr_wren; - - wire [ 32-1:0] bram_sum1_wr_din; - wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr; - wire bram_sum1_wr_wren; - - wire [ 32-1:0] bram_diff_wr_din; - wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr; - wire bram_diff_wr_wren; - - wire [ 32-1:0] bram_sum0_rd_dout; - reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr; - - wire [ 32-1:0] bram_sum1_rd_dout; - reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr; - - wire [ 32-1:0] bram_diff_rd_dout; - reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr; - - - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) - bram_sum0_inst - ( - .clk (clk), - - .a_addr (bram_sum0_wr_addr), - .a_wr (bram_sum0_wr_wren), - .a_in (bram_sum0_wr_din), - .a_out (), - - .b_addr (bram_sum0_rd_addr), - .b_out (bram_sum0_rd_dout) - ); - - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) - bram_sum1_inst - ( - .clk (clk), - - .a_addr (bram_sum1_wr_addr), - .a_wr (bram_sum1_wr_wren), - .a_in (bram_sum1_wr_din), - .a_out (), - - .b_addr (bram_sum1_rd_addr), - .b_out (bram_sum1_rd_dout) - ); - - bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) - bram_diff_inst - ( - .clk (clk), - - .a_addr (bram_diff_wr_addr), - .a_wr (bram_diff_wr_wren), - .a_in (bram_diff_wr_din), - .a_out (), - - .b_addr (bram_diff_rd_addr), - .b_out (bram_diff_rd_dout) - ); - - - wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr; - wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr; - wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr; - - reg [ 32-1:0] adder0_a_din; - reg [ 32-1:0] adder0_b_din; - - reg [ 32-1:0] adder1_a_din; - reg [ 32-1:0] adder1_b_din; - - reg [ 32-1:0] subtractor_a_din; - reg [ 32-1:0] subtractor_b_din; - - // n_addr - only 1 output, because all modules are in sync - - modular_adder # - ( - .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), - .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) - ) - adder_inst0 - ( - .clk (clk), - .rst_n (rst_n), - - .ena (reduce_start), - .rdy (reduce_adder0_done), - - .ab_addr (adder0_ab_addr), - .n_addr (), - .s_addr (bram_sum0_wr_addr), - .s_wren (bram_sum0_wr_wren), - - .a_din (adder0_a_din), - .b_din (adder0_b_din), - .n_din (n_din), - .s_dout (bram_sum0_wr_din) - ); - - modular_adder # - ( - .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), - .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) - ) - adder_inst1 - ( - .clk (clk), - .rst_n (rst_n), - - .ena (reduce_start), - .rdy (reduce_adder1_done), - - .ab_addr (adder1_ab_addr), - .n_addr (), - .s_addr (bram_sum1_wr_addr), - .s_wren (bram_sum1_wr_wren), - - .a_din (adder1_a_din), - .b_din (adder1_b_din), - .n_din (n_din), - .s_dout (bram_sum1_wr_din) - ); - - modular_subtractor # - ( - .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), - .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) - ) - subtractor_inst - ( - .clk (clk), - .rst_n (rst_n), - - .ena (reduce_start), - .rdy (reduce_subtractor_done), - - .ab_addr (subtractor_ab_addr), - .n_addr (n_addr), - .d_addr (bram_diff_wr_addr), - .d_wren (bram_diff_wr_wren), - - .a_din (subtractor_a_din), - .b_din (subtractor_b_din), - .n_din (n_din), - .d_dout (bram_diff_wr_din) - ); - - - // - // Address (Operand) Selector - // - always @(*) - // - case (fsm_shreg_reduce_stage_stop) - // - 5'b10000: begin - reduce_z_addr[ 1] = adder0_ab_addr; - reduce_z_addr[ 2] = adder1_ab_addr; - reduce_z_addr[ 3] = adder0_ab_addr; - reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 8] = subtractor_ab_addr; - reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; - bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; - bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; - bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; - end - // - 5'b01000: begin - // - reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 4] = adder0_ab_addr; - reduce_z_addr[ 5] = adder1_ab_addr; - reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 9] = subtractor_ab_addr; - reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; - bram_sum0_rd_addr = adder0_ab_addr; - bram_sum1_rd_addr = adder1_ab_addr; - bram_diff_rd_addr = subtractor_ab_addr; - end - // - 5'b00100: begin - // - reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[6] = adder0_ab_addr; - reduce_z_addr[7] = adder1_ab_addr; - reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[10] = subtractor_ab_addr; - bram_sum0_rd_addr = adder0_ab_addr; - bram_sum1_rd_addr = adder1_ab_addr; - bram_diff_rd_addr = subtractor_ab_addr; - end - // - 5'b00010: begin - // - reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; - bram_sum0_rd_addr = adder0_ab_addr; - bram_sum1_rd_addr = adder0_ab_addr; - bram_diff_rd_addr = subtractor_ab_addr; - end - // - 5'b00001: begin - // - reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; - bram_sum0_rd_addr = adder0_ab_addr; - bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; - bram_diff_rd_addr = adder0_ab_addr; - end - // - default: begin - reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; - reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; - bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; - bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; - bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; - end - // - endcase - - - // - // adder 0 - // - always @(*) begin - // - case (fsm_shreg_reduce_stage_stop) - 5'b10000: adder0_a_din = reduce_z_dout[1]; - 5'b01000: adder0_a_din = bram_sum0_rd_dout; - 5'b00100: adder0_a_din = bram_sum0_rd_dout; - 5'b00010: adder0_a_din = bram_sum0_rd_dout; - 5'b00001: adder0_a_din = bram_sum0_rd_dout; - default: adder0_a_din = {32{1'bX}}; - endcase - // - case (fsm_shreg_reduce_stage_stop) - 5'b10000: adder0_b_din = reduce_z_dout[3]; - 5'b01000: adder0_b_din = reduce_z_dout[4]; - 5'b00100: adder0_b_din = reduce_z_dout[6]; - 5'b00010: adder0_b_din = bram_sum1_rd_dout; - 5'b00001: adder0_b_din = bram_diff_rd_dout; - default: adder0_b_din = {32{1'bX}}; - endcase - // - end - - // - // adder 1 - // - always @(*) begin - // - case (fsm_shreg_reduce_stage_stop) - 5'b10000: adder1_a_din = reduce_z_dout[2]; - 5'b01000: adder1_a_din = bram_sum1_rd_dout; - 5'b00100: adder1_a_din = bram_sum1_rd_dout; - 5'b00010: adder1_a_din = {32{1'bX}}; - 5'b00001: adder1_a_din = {32{1'bX}}; - default: adder1_a_din = {32{1'bX}}; - endcase - // - case (fsm_shreg_reduce_stage_stop) - 5'b10000: adder1_b_din = reduce_z_dout[2]; - 5'b01000: adder1_b_din = reduce_z_dout[5]; - 5'b00100: adder1_b_din = reduce_z_dout[7]; - 5'b00010: adder1_b_din = {32{1'bX}}; - 5'b00001: adder1_b_din = {32{1'bX}}; - default: adder1_b_din = {32{1'bX}}; - endcase - // - end - - - // - // subtractor - // - always @(*) begin - // - case (fsm_shreg_reduce_stage_stop) - 5'b10000: subtractor_a_din = {32{1'b0}}; - 5'b01000: subtractor_a_din = bram_diff_rd_dout; - 5'b00100: subtractor_a_din = bram_diff_rd_dout; - 5'b00010: subtractor_a_din = bram_diff_rd_dout; - 5'b00001: subtractor_a_din = {32{1'bX}}; - default: subtractor_a_din = {32{1'bX}}; - endcase - // - case (fsm_shreg_reduce_stage_stop) - 5'b10000: subtractor_b_din = reduce_z_dout[8]; - 5'b01000: subtractor_b_din = reduce_z_dout[9]; - 5'b00100: subtractor_b_din = reduce_z_dout[10]; - 5'b00010: subtractor_b_din = {32{1'b0}}; - 5'b00001: subtractor_b_din = {32{1'bX}}; - default: subtractor_b_din = {32{1'bX}}; - endcase - // - end - - - // - // Address Mapping - // - assign p_addr = bram_sum0_wr_addr; - assign p_wren = bram_sum0_wr_wren & store_p; - assign p_dout = bram_sum0_wr_din; - - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ +// +// modular_reductor_384.v +// ----------------------------------------------------------------------------- +// Modular reductor. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_reductor_384 + ( + clk, rst_n, + ena, rdy, + x_addr, n_addr, p_addr, p_wren, + x_din, n_din, p_dout + ); + + // + // Constants + // + localparam OPERAND_NUM_WORDS = 12; + localparam WORD_COUNTER_WIDTH = 4; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST; + input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + output wire p_wren; // store current P word now + + input wire [ 31:0] x_din; // X + input wire [ 31:0] n_din; // N (must be P-256!) + output wire [ 31:0] p_dout; // P = X mod N + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH:0] index_x; + + + /* map registers to output ports */ + assign x_addr = index_x; + + + // + // FSM + // + localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS]; + wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS]; + wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5]; + + wire [5-1:0] fsm_shreg_reduce_stage_start; + wire [5-1:0] fsm_shreg_reduce_stage_stop; + + genvar s; + generate for (s=0; s<5; s=s+1) + begin : gen_fsm_shreg_reduce_stages + assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1]; + assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2]; + end + endgenerate + + wire inc_index_x = |fsm_shreg_inc_index_x; + wire store_word_z = |fsm_shreg_store_word_z; + wire reduce_start = |fsm_shreg_reduce_stage_start; + wire reduce_stop = |fsm_shreg_reduce_stage_stop; + wire store_p = fsm_shreg_reduce_stage_stop[0]; + + + wire reduce_adder0_done; + wire reduce_adder1_done; + wire reduce_subtractor_done; + + wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) + // + fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else if (!reduce_stop || reduce_done_all) + // + fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) + // + index_x <= WORD_INDEX_LAST; + // + else if (inc_index_x) + // + index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x); + + + // + // Look-up Table + // + + // + // Take a look at the corresponding C model for more information + // on how exactly the math behind reduction works. The first step + // is to assemble nine 384-bit values ("z-words") from 32-bit parts + // of the full 768-bit product ("c-word"). The problem with z10 is + // that it contains c23 two times. This implementation scans from + // c23 to c0 and writes current part of c-word into corresponding + // parts of z-words. Since those 32-bit parts are stored in block + // memories, one source word can only be written to one location in + // every z-word at a time. The trick is to delay c23 and then write + // the delayed value at the corresponding location in z10 instead of + // the next c22. "z_save" flag is used to indicate that the current + // word should be delayed and written once again during the next cycle. + // + + + reg [10*WORD_COUNTER_WIDTH-1:0] z_addr; // + reg [10 -1:0] z_wren; // + reg [10 -1:0] z_mask; // mask input to store zero word + reg [10 -1:0] z_save; // save previous word once again + + always @(posedge clk) + // + if (inc_index_x) + // + case (index_x) + // + // s10 s9 s8 s7 s6 s5 s4 s3 s2 s1 + // || || || || || || || || || || + 5'd00: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd00}; + 5'd01: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd01}; + 5'd02: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd02}; + 5'd03: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd03}; + 5'd04: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd04}; + 5'd05: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd05}; + 5'd06: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd06}; + 5'd07: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd07}; + 5'd08: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd08}; + 5'd09: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd09}; + 5'd10: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd10}; + 5'd11: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd11}; + + 5'd12: z_addr <= {4'd00, 4'd00, 4'd01, 4'd01, 4'd00, 4'd04, 4'd03, 4'd00, 4'd00, 4'dxx}; + 5'd13: z_addr <= {4'd01, 4'd05, 4'd02, 4'd02, 4'd01, 4'd05, 4'd04, 4'd01, 4'd01, 4'dxx}; + 5'd14: z_addr <= {4'd02, 4'd06, 4'd03, 4'd06, 4'd02, 4'd06, 4'd05, 4'd02, 4'd02, 4'dxx}; + 5'd15: z_addr <= {4'd05, 4'd07, 4'd04, 4'd07, 4'd03, 4'd07, 4'd06, 4'd03, 4'd03, 4'dxx}; + 5'd16: z_addr <= {4'd06, 4'd08, 4'd05, 4'd08, 4'd08, 4'd08, 4'd07, 4'd04, 4'd07, 4'dxx}; + 5'd17: z_addr <= {4'd07, 4'd09, 4'd06, 4'd09, 4'd09, 4'd09, 4'd08, 4'd05, 4'd08, 4'dxx}; + 5'd18: z_addr <= {4'd08, 4'd10, 4'd07, 4'd10, 4'd10, 4'd10, 4'd09, 4'd06, 4'd09, 4'dxx}; + 5'd19: z_addr <= {4'd09, 4'd11, 4'd08, 4'd11, 4'd11, 4'd11, 4'd10, 4'd07, 4'd10, 4'dxx}; + 5'd20: z_addr <= {4'd10, 4'd01, 4'd09, 4'd00, 4'd04, 4'd03, 4'd11, 4'd08, 4'd11, 4'dxx}; + 5'd21: z_addr <= {4'd11, 4'd02, 4'd10, 4'd03, 4'd05, 4'd00, 4'd00, 4'd09, 4'd04, 4'dxx}; + 5'd22: z_addr <= {4'd04, 4'd03, 4'd11, 4'd04, 4'd06, 4'd02, 4'd01, 4'd10, 4'd05, 4'dxx}; + 5'd23: z_addr <= {4'd03, 4'd04, 4'd00, 4'd05, 4'd07, 4'd01, 4'd02, 4'd11, 4'd06, 4'dxx}; + // + default: z_addr <= {10*WORD_COUNTER_WIDTH{1'bX}}; + // + endcase + + always @(posedge clk) + // + case (index_x) + // + // 10 9 8 7 6 5 4 3 2 1 + // | | | | | | | | | | + 5'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd08: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd09: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd10: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd11: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + + 5'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd16: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd17: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd18: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd19: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd20: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd21: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd22: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd23: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + // + default: z_wren <= {10{1'b0}}; + // + endcase + + always @(posedge clk) + // + if (inc_index_x) + // + case (index_x) + // + // 10 9 8 7 6 5 4 3 2 1 + // | | | | | | | | | | + 5'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd08: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd10: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd11: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + + 5'd12: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd13: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd14: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd15: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd16: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd17: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd18: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd19: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd20: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd21: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd22: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd23: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + // + default: z_mask <= {10{1'bX}}; + // + endcase + + always @(posedge clk) + // + if (inc_index_x) + // + case (index_x) + // + // 10 9 8 7 6 5 4 3 2 1 + // | | | | | | | | | | + 5'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + + 5'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd16: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd17: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd18: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd19: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd20: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd21: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd22: z_save <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd23: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + // + default: z_save <= {10{1'bX}}; + // + endcase + + + // + // Intermediate Numbers + // + reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:10]; + wire [ 32-1:0] reduce_z_dout[1:10]; + + reg [31: 0] x_din_dly; + always @(posedge clk) + // + x_din_dly <= x_din; + + + genvar z; + generate for (z=1; z<=10; z=z+1) + // + begin : gen_z_bram + // + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_c_inst + ( + .clk (clk), + + .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]), + .a_wr (z_wren[z-1] & store_word_z), + .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)), + .a_out (), + + .b_addr (reduce_z_addr[z]), + .b_out (reduce_z_dout[z]) + ); + // + end + // + endgenerate + + + + + wire [ 32-1:0] bram_sum0_wr_din; + wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr; + wire bram_sum0_wr_wren; + + wire [ 32-1:0] bram_sum1_wr_din; + wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr; + wire bram_sum1_wr_wren; + + wire [ 32-1:0] bram_diff_wr_din; + wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr; + wire bram_diff_wr_wren; + + wire [ 32-1:0] bram_sum0_rd_dout; + reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr; + + wire [ 32-1:0] bram_sum1_rd_dout; + reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr; + + wire [ 32-1:0] bram_diff_rd_dout; + reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr; + + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_sum0_inst + ( + .clk (clk), + + .a_addr (bram_sum0_wr_addr), + .a_wr (bram_sum0_wr_wren), + .a_in (bram_sum0_wr_din), + .a_out (), + + .b_addr (bram_sum0_rd_addr), + .b_out (bram_sum0_rd_dout) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_sum1_inst + ( + .clk (clk), + + .a_addr (bram_sum1_wr_addr), + .a_wr (bram_sum1_wr_wren), + .a_in (bram_sum1_wr_din), + .a_out (), + + .b_addr (bram_sum1_rd_addr), + .b_out (bram_sum1_rd_dout) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_diff_inst + ( + .clk (clk), + + .a_addr (bram_diff_wr_addr), + .a_wr (bram_diff_wr_wren), + .a_in (bram_diff_wr_din), + .a_out (), + + .b_addr (bram_diff_rd_addr), + .b_out (bram_diff_rd_dout) + ); + + + wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr; + wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr; + wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr; + + reg [ 32-1:0] adder0_a_din; + reg [ 32-1:0] adder0_b_din; + + reg [ 32-1:0] adder1_a_din; + reg [ 32-1:0] adder1_b_din; + + reg [ 32-1:0] subtractor_a_din; + reg [ 32-1:0] subtractor_b_din; + + // n_addr - only 1 output, because all modules are in sync + + modular_adder # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) + ) + adder_inst0 + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_adder0_done), + + .ab_addr (adder0_ab_addr), + .n_addr (), + .s_addr (bram_sum0_wr_addr), + .s_wren (bram_sum0_wr_wren), + + .a_din (adder0_a_din), + .b_din (adder0_b_din), + .n_din (n_din), + .s_dout (bram_sum0_wr_din) + ); + + modular_adder # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) + ) + adder_inst1 + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_adder1_done), + + .ab_addr (adder1_ab_addr), + .n_addr (), + .s_addr (bram_sum1_wr_addr), + .s_wren (bram_sum1_wr_wren), + + .a_din (adder1_a_din), + .b_din (adder1_b_din), + .n_din (n_din), + .s_dout (bram_sum1_wr_din) + ); + + modular_subtractor # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) + ) + subtractor_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_subtractor_done), + + .ab_addr (subtractor_ab_addr), + .n_addr (n_addr), + .d_addr (bram_diff_wr_addr), + .d_wren (bram_diff_wr_wren), + + .a_din (subtractor_a_din), + .b_din (subtractor_b_din), + .n_din (n_din), + .d_dout (bram_diff_wr_din) + ); + + + // + // Address (Operand) Selector + // + always @(*) + // + case (fsm_shreg_reduce_stage_stop) + // + 5'b10000: begin + reduce_z_addr[ 1] = adder0_ab_addr; + reduce_z_addr[ 2] = adder1_ab_addr; + reduce_z_addr[ 3] = adder0_ab_addr; + reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 8] = subtractor_ab_addr; + reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + 5'b01000: begin + // + reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 4] = adder0_ab_addr; + reduce_z_addr[ 5] = adder1_ab_addr; + reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 9] = subtractor_ab_addr; + reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = adder1_ab_addr; + bram_diff_rd_addr = subtractor_ab_addr; + end + // + 5'b00100: begin + // + reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[6] = adder0_ab_addr; + reduce_z_addr[7] = adder1_ab_addr; + reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[10] = subtractor_ab_addr; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = adder1_ab_addr; + bram_diff_rd_addr = subtractor_ab_addr; + end + // + 5'b00010: begin + // + reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = adder0_ab_addr; + bram_diff_rd_addr = subtractor_ab_addr; + end + // + 5'b00001: begin + // + reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_diff_rd_addr = adder0_ab_addr; + end + // + default: begin + reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + endcase + + + // + // adder 0 + // + always @(*) begin + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder0_a_din = reduce_z_dout[1]; + 5'b01000: adder0_a_din = bram_sum0_rd_dout; + 5'b00100: adder0_a_din = bram_sum0_rd_dout; + 5'b00010: adder0_a_din = bram_sum0_rd_dout; + 5'b00001: adder0_a_din = bram_sum0_rd_dout; + default: adder0_a_din = {32{1'bX}}; + endcase + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder0_b_din = reduce_z_dout[3]; + 5'b01000: adder0_b_din = reduce_z_dout[4]; + 5'b00100: adder0_b_din = reduce_z_dout[6]; + 5'b00010: adder0_b_din = bram_sum1_rd_dout; + 5'b00001: adder0_b_din = bram_diff_rd_dout; + default: adder0_b_din = {32{1'bX}}; + endcase + // + end + + // + // adder 1 + // + always @(*) begin + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder1_a_din = reduce_z_dout[2]; + 5'b01000: adder1_a_din = bram_sum1_rd_dout; + 5'b00100: adder1_a_din = bram_sum1_rd_dout; + 5'b00010: adder1_a_din = {32{1'bX}}; + 5'b00001: adder1_a_din = {32{1'bX}}; + default: adder1_a_din = {32{1'bX}}; + endcase + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder1_b_din = reduce_z_dout[2]; + 5'b01000: adder1_b_din = reduce_z_dout[5]; + 5'b00100: adder1_b_din = reduce_z_dout[7]; + 5'b00010: adder1_b_din = {32{1'bX}}; + 5'b00001: adder1_b_din = {32{1'bX}}; + default: adder1_b_din = {32{1'bX}}; + endcase + // + end + + + // + // subtractor + // + always @(*) begin + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: subtractor_a_din = {32{1'b0}}; + 5'b01000: subtractor_a_din = bram_diff_rd_dout; + 5'b00100: subtractor_a_din = bram_diff_rd_dout; + 5'b00010: subtractor_a_din = bram_diff_rd_dout; + 5'b00001: subtractor_a_din = {32{1'bX}}; + default: subtractor_a_din = {32{1'bX}}; + endcase + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: subtractor_b_din = reduce_z_dout[8]; + 5'b01000: subtractor_b_din = reduce_z_dout[9]; + 5'b00100: subtractor_b_din = reduce_z_dout[10]; + 5'b00010: subtractor_b_din = {32{1'b0}}; + 5'b00001: subtractor_b_din = {32{1'bX}}; + default: subtractor_b_din = {32{1'bX}}; + endcase + // + end + + + // + // Address Mapping + // + assign p_addr = bram_sum0_wr_addr; + assign p_wren = bram_sum0_wr_wren & store_p; + assign p_dout = bram_sum0_wr_din; + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_subtractor.v b/rtl/modular/modular_subtractor.v deleted file mode 100644 index 322aec4..0000000 --- a/rtl/modular/modular_subtractor.v +++ /dev/null @@ -1,292 +0,0 @@ -//------------------------------------------------------------------------------ -// -// modular_subtractor.v -// ----------------------------------------------------------------------------- -// Modular subtractor. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module modular_subtractor - ( - clk, rst_n, - ena, rdy, - ab_addr, n_addr, d_addr, d_wren, - a_din, b_din, n_din, d_dout - ); - - - // - // Parameters - // - parameter OPERAND_NUM_WORDS = 8; - parameter WORD_COUNTER_WIDTH = 3; - - - // - // Handy Numbers - // - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; - - - // - // Handy Functions - // - function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; - input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; - begin - WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? - WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; - end - endfunction - - - // - // Ports - // - input wire clk; // system clock - input wire rst_n; // active-low async reset - - input wire ena; // enable input - output wire rdy; // ready output - - output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words - output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word - output wire [WORD_COUNTER_WIDTH-1:0] d_addr; // index of current D word - output wire d_wren; // store current D word now - - input wire [ 31:0] a_din; // A - input wire [ 31:0] b_din; // B - input wire [ 31:0] n_din; // N - output wire [ 31:0] d_dout; // D = (A - B) mod N - - - // - // Word Indices - // - reg [WORD_COUNTER_WIDTH-1:0] index_ab; - reg [WORD_COUNTER_WIDTH-1:0] index_n; - reg [WORD_COUNTER_WIDTH-1:0] index_d; - - /* map registers to output ports */ - assign ab_addr = index_ab; - assign n_addr = index_n; - assign d_addr = index_d; - - - // - // Subtractor - // - wire [31: 0] sub32_d; - wire sub32_b_in; - wire sub32_b_out; - - subtractor32_wrapper subtractor32 - ( - .clk (clk), - .a (a_din), - .b (b_din), - .d (sub32_d), - .b_in (sub32_b_in), - .b_out (sub32_b_out) - ); - - - // - // Adder - // - wire [31: 0] add32_s; - wire add32_c_in; - wire add32_c_out; - - adder32_wrapper adder32 - ( - .clk (clk), - .a (sub32_d), - .b (n_din), - .s (add32_s), - .c_in (add32_c_in), - .c_out (add32_c_out) - ); - - - // - // FSM - // - - localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5; - - reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; - - assign rdy = fsm_shreg[0]; - - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)]; - - wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; - - wire inc_index_ab = |fsm_shreg_inc_index_ab; - wire inc_index_n = |fsm_shreg_inc_index_n; - wire store_dif_ab = |fsm_shreg_store_dif_ab; - wire store_dif_ab_n = |fsm_shreg_store_dif_ab_n; - wire store_data_d = |fsm_shreg_store_data_d; - wire inc_index_d = |fsm_shreg_inc_index_d; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) - // - fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; - // - else begin - // - if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; - // - else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; - // - end - - - // - // Borrow & Carry Masking Logic - // - reg sub32_b_mask; - reg add32_c_mask; - - - always @(posedge clk) begin - // - sub32_b_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0; - add32_c_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0; - // - end - - assign sub32_b_in = sub32_b_out & ~sub32_b_mask; - assign add32_c_in = add32_c_out & ~add32_c_mask; - - - - // - // Borrow & Carry Latch Logic - // - reg sub32_borrow_latch; - - always @(posedge clk) begin - // - if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out; - // - end - - - // - // Intermediate Results - // - reg [32*OPERAND_NUM_WORDS-1:0] d_ab; - reg [32*OPERAND_NUM_WORDS-1:0] d_ab_n; - - always @(posedge clk) - // - if (store_data_d) begin - // - d_ab <= {{32{1'bX}}, d_ab[32*OPERAND_NUM_WORDS-1:32]}; - d_ab_n <= {{32{1'bX}}, d_ab_n[32*OPERAND_NUM_WORDS-1:32]}; - // - end else begin - // - if (store_dif_ab) d_ab <= {sub32_d, d_ab[32*OPERAND_NUM_WORDS-1:32]}; - if (store_dif_ab_n) d_ab_n <= {add32_s, d_ab_n[32*OPERAND_NUM_WORDS-1:32]}; - // - end - - - // - // Word Index Increment Logic - // - always @(posedge clk) - // - if (rdy) begin - // - index_ab <= WORD_INDEX_ZERO; - index_n <= WORD_INDEX_ZERO; - index_d <= WORD_INDEX_ZERO; - // - end else begin - // - if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab); - if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n); - if (inc_index_d) index_d <= WORD_INDEX_NEXT_OR_ZERO(index_d); - // - end - - - // - // Output Sum Selector - // - wire mux_select_ab_n = sub32_borrow_latch; - - - // - // Output Data and Write Enable Logic - // - reg d_wren_reg; - reg [31: 0] d_dout_reg; - wire [31: 0] d_dout_mux = mux_select_ab_n ? d_ab_n[31:0] : d_ab[31:0]; - - assign d_wren = d_wren_reg; - assign d_dout = d_dout_reg; - - always @(posedge clk) - // - if (rdy) begin - // - d_wren_reg <= 1'b0; - d_dout_reg <= {32{1'bX}}; - // - end else begin - // - d_wren_reg <= store_data_d; - d_dout_reg <= store_data_d ? d_dout_mux : {32{1'bX}}; - // - end - - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/multiword/mw_comparator.v b/rtl/multiword/mw_comparator.v deleted file mode 100644 index b97a6cf..0000000 --- a/rtl/multiword/mw_comparator.v +++ /dev/null @@ -1,220 +0,0 @@ -//------------------------------------------------------------------------------ -// -// mw_comparator.v -// ----------------------------------------------------------------------------- -// Multi-word comparator. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module mw_comparator - ( - clk, rst_n, - ena, rdy, - xy_addr, x_din, y_din, - cmp_l, cmp_e, cmp_g - ); - - - // - // Parameters - // - parameter WORD_COUNTER_WIDTH = 3; - parameter OPERAND_NUM_WORDS = 8; - - - // - // Handy Numbers - // - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; - - - // - // Handy Functions - // - function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREV_OR_LAST; - input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; - begin - WORD_INDEX_PREV_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? - WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; - end - endfunction - - - // - // Ports - // - input wire clk; // system clock - input wire rst_n; // active-low async reset - - input wire ena; // enable input - output wire rdy; // ready output - - output wire [WORD_COUNTER_WIDTH-1:0] xy_addr; // address of current X and Y words - input wire [ 32-1:0] x_din; // current X word - input wire [ 32-1:0] y_din; // current Y word - - output wire cmp_l; // X < Y ? - output wire cmp_e; // X = Y ? - output wire cmp_g; // X > Y ? - - - // - // Word Indices - // - reg [WORD_COUNTER_WIDTH-1:0] index_xy; - - reg reg_cmp_l; - reg reg_cmp_e; - reg reg_cmp_g; - - - // - // Output Mapping - // - assign xy_addr = index_xy; - - assign cmp_l = reg_cmp_l; - assign cmp_e = reg_cmp_e; - assign cmp_g = reg_cmp_g; - - - // - // FSM - // - localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 3; - - reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; - - assign rdy = fsm_shreg[0]; - - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_dec_index_xy = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_calc_leg = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; - wire fsm_shreg_calc_leg_last = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; - - wire dec_index_xy = |fsm_shreg_dec_index_xy; - wire calc_leg = |fsm_shreg_calc_leg; - wire calc_leg_last = fsm_shreg_calc_leg_last; - - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) - // - fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; - // - else begin - // - if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; - // - else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; - // - end - - - // - // Word Index Increment Logic - // - always @(posedge clk) - // - if (rdy) index_xy <= WORD_INDEX_LAST; - else if (dec_index_xy) index_xy <= WORD_INDEX_PREV_OR_LAST(index_xy); - - - // - // 32-bit Subtractor - // - wire [31: 0] sub32_d_out; - wire sub32_b_in; - wire sub32_b_out; - - subtractor32_wrapper subtractor32_inst - ( - .clk (clk), - - .a (x_din), - .b (y_din), - - .d (sub32_d_out), - - .b_in (sub32_b_in), - .b_out (sub32_b_out) - ); - - - // - // Borrow Masking Logic - // - reg sub32_b_mask; - - always @(posedge clk) - // - sub32_b_mask <= (index_xy == WORD_INDEX_LAST) ? 1'b1 : 1'b0; - - assign sub32_b_in = sub32_b_out & ~sub32_b_mask; - - // - // Output Logic - // - wire cmp_unresolved = !(cmp_l || cmp_g); - - wire cmp_borrow_is_set = (sub32_b_out == 1'b1) ? 1'b1 : 1'b0; - wire cmp_difference_is_nonzero = (sub32_d_out != 32'd0) ? 1'b1 : 1'b0; - - always @(posedge clk) - // - if (rdy) begin - // - if (ena) begin - // - reg_cmp_l <= 1'b0; - reg_cmp_e <= 1'b0; - reg_cmp_g <= 1'b0; - // - end - // - end else if (cmp_unresolved && calc_leg) begin - // - if ( cmp_borrow_is_set) reg_cmp_l <= 1'b1; - if (!cmp_borrow_is_set && cmp_difference_is_nonzero) reg_cmp_g <= 1'b1; - if (!cmp_borrow_is_set && !cmp_difference_is_nonzero && calc_leg_last) reg_cmp_e <= 1'b1; - // - end - - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/multiword/mw_mover.v b/rtl/multiword/mw_mover.v deleted file mode 100644 index 5db95a7..0000000 --- a/rtl/multiword/mw_mover.v +++ /dev/null @@ -1,175 +0,0 @@ -//------------------------------------------------------------------------------ -// -// mw_mover.v -// ----------------------------------------------------------------------------- -// Multi-word data mover. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module mw_mover - ( - clk, rst_n, - ena, rdy, - x_addr, y_addr, y_wren, - x_din, y_dout - ); - - - // - // Parameters - // - parameter WORD_COUNTER_WIDTH = 3; - parameter OPERAND_NUM_WORDS = 8; - - - // - // Handy Numbers - // - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; - - - // - // Handy Functions - // - function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; - input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; - begin - WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? - WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; - end - endfunction - - - // - // Ports - // - input wire clk; // system clock - input wire rst_n; // active-low async reset - - input wire ena; // enable input - output wire rdy; // ready output - - output wire [WORD_COUNTER_WIDTH-1:0] x_addr; // address of current X word - output wire [WORD_COUNTER_WIDTH-1:0] y_addr; // address of current Y word - output wire y_wren; // store current Y word - - input wire [ 32-1:0] x_din; // current X word - output wire [ 32-1:0] y_dout; // current Y word - - - // - // Word Indices - // - reg [WORD_COUNTER_WIDTH-1:0] index_x; - reg [WORD_COUNTER_WIDTH-1:0] index_y; - - - // - // Output Mapping - // - assign x_addr = index_x; - assign y_addr = index_y; - - - // - // FSM - // - localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 2; - - reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; - - assign rdy = fsm_shreg[0]; - - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; - wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_y = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; - - wire inc_index_x = |fsm_shreg_inc_index_x; - wire inc_index_y = |fsm_shreg_inc_index_y; - wire store_word_y = |fsm_shreg_inc_index_x; - - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) - // - fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; - // - else begin - // - if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; - // - else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; - // - end - - - // - // Word Index Increment Logic - // - always @(posedge clk) - // - if (rdy) begin - index_x <= WORD_INDEX_ZERO; - index_y <= WORD_INDEX_ZERO; - end else begin - if (inc_index_x) index_x <= WORD_INDEX_NEXT_OR_ZERO(index_x); - if (inc_index_y) index_y <= WORD_INDEX_NEXT_OR_ZERO(index_y); - end - - - // - // Write Enable Logic - // - reg y_wren_reg; - - assign y_wren = y_wren_reg; - - always @(posedge clk) - // - if (rdy) y_wren_reg <= 1'b0; - else y_wren_reg <= store_word_y; - - - // - // Output Logic - // - assign y_dout = x_din; - - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ diff --git a/rtl/util/bram_1rw_1ro_readfirst.v b/rtl/util/bram_1rw_1ro_readfirst.v deleted file mode 100644 index 28782c2..0000000 --- a/rtl/util/bram_1rw_1ro_readfirst.v +++ /dev/null @@ -1,101 +0,0 @@ -//====================================================================== -// -// Copyright (c) 2015, NORDUnet A/S All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// - Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may -// be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -//====================================================================== - -`timescale 1ns / 1ps - -module bram_1rw_1ro_readfirst - #(parameter MEM_WIDTH = 32, - parameter MEM_ADDR_BITS = 8) - ( - input wire clk, - - input wire [MEM_ADDR_BITS-1:0] a_addr, - input wire a_wr, - input wire [MEM_WIDTH-1:0] a_in, - output wire [MEM_WIDTH-1:0] a_out, - - input wire [MEM_ADDR_BITS-1:0] b_addr, - output wire [MEM_WIDTH-1:0] b_out - ); - - - // - // BRAM - // - (* RAM_STYLE="BLOCK" *) - reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1]; - - - // - // Initialization - // - /** - integer c; - initial begin - for (c=0; c<(2**MEM_ADDR_BITS); c=c+1) - bram[c] = {MEM_WIDTH{1'b0}}; - end - **/ - - - - // - // Output Registers - // - reg [MEM_WIDTH-1:0] bram_reg_a; - reg [MEM_WIDTH-1:0] bram_reg_b; - - assign a_out = bram_reg_a; - assign b_out = bram_reg_b; - - - // - // Read-Write Port A - // - always @(posedge clk) begin - // - bram_reg_a <= bram[a_addr]; - // - if (a_wr) bram[a_addr] <= a_in; - // - end - - - // - // Read-Only Port B - // - always @(posedge clk) - // - bram_reg_b <= bram[b_addr]; - - -endmodule -- cgit v1.2.3