From 747c165611c3016c997e092797bc20a4f900cdea Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Sun, 12 Feb 2017 22:10:17 +0300 Subject: Initial commit of base point multiplier core for ECDSA curve P-384. --- rtl/curve/curve_dbl_add_384.v | 874 ++++++++++++++++++ rtl/curve/curve_mul_384.v | 720 +++++++++++++++ rtl/curve/rom/brom_p384_delta.v | 72 ++ rtl/curve/rom/brom_p384_g_x.v | 72 ++ rtl/curve/rom/brom_p384_g_y.v | 72 ++ rtl/curve/rom/brom_p384_h_x.v | 71 ++ rtl/curve/rom/brom_p384_h_y.v | 71 ++ rtl/curve/rom/brom_p384_one.v | 72 ++ rtl/curve/rom/brom_p384_q.v | 72 ++ rtl/curve/rom/brom_p384_zero.v | 42 + rtl/curve/uop/uop_add_rom.v | 66 ++ rtl/curve/uop/uop_conv_rom.v | 38 + rtl/curve/uop/uop_dbl_rom.v | 58 ++ rtl/curve/uop/uop_init_rom.v | 33 + rtl/curve/uop_ecdsa.v | 50 ++ rtl/ecdsa384.v | 160 ++++ rtl/ecdsa384_wrapper.v | 177 ++++ rtl/lowlevel/adder32_wrapper.v | 73 ++ rtl/lowlevel/adder47_wrapper.v | 69 ++ rtl/lowlevel/artix7/adder32_artix7.v | 96 ++ rtl/lowlevel/artix7/adder47_artix7.v | 91 ++ rtl/lowlevel/artix7/dsp48e1_wrapper.v | 159 ++++ rtl/lowlevel/artix7/mac16_artix7.v | 90 ++ rtl/lowlevel/artix7/subtractor32_artix7.v | 94 ++ rtl/lowlevel/ecdsa_lowlevel_settings.v | 17 + rtl/lowlevel/generic/adder32_generic.v | 67 ++ rtl/lowlevel/generic/adder47_generic.v | 64 ++ rtl/lowlevel/generic/mac16_generic.v | 74 ++ rtl/lowlevel/generic/subtractor32_generic.v | 67 ++ rtl/lowlevel/mac16_wrapper.v | 75 ++ rtl/lowlevel/subtractor32_wrapper.v | 72 ++ rtl/modular/modular_adder.v | 298 +++++++ .../modular_invertor/helper/modinv_helper_copy.v | 148 ++++ .../modular_invertor/helper/modinv_helper_init.v | 172 ++++ .../helper/modinv_helper_invert_compare.v | 286 ++++++ .../helper/modinv_helper_invert_precalc.v | 408 +++++++++ .../helper/modinv_helper_invert_update.v | 257 ++++++ .../helper/modinv_helper_reduce_precalc.v | 328 +++++++ .../helper/modinv_helper_reduce_update.v | 153 ++++ rtl/modular/modular_invertor/modinv_clog2.v | 10 + rtl/modular/modular_invertor/modular_invertor.v | 981 +++++++++++++++++++++ rtl/modular/modular_multiplier_384.v | 402 +++++++++ rtl/modular/modular_reductor_384.v | 739 ++++++++++++++++ rtl/modular/modular_subtractor.v | 292 ++++++ rtl/multiword/mw_comparator.v | 220 +++++ rtl/multiword/mw_mover.v | 175 ++++ rtl/util/bram_1rw_1ro_readfirst.v | 101 +++ 47 files changed, 8798 insertions(+) create mode 100644 rtl/curve/curve_dbl_add_384.v create mode 100644 rtl/curve/curve_mul_384.v create mode 100644 rtl/curve/rom/brom_p384_delta.v create mode 100644 rtl/curve/rom/brom_p384_g_x.v create mode 100644 rtl/curve/rom/brom_p384_g_y.v create mode 100644 rtl/curve/rom/brom_p384_h_x.v create mode 100644 rtl/curve/rom/brom_p384_h_y.v create mode 100644 rtl/curve/rom/brom_p384_one.v create mode 100644 rtl/curve/rom/brom_p384_q.v create mode 100644 rtl/curve/rom/brom_p384_zero.v create mode 100644 rtl/curve/uop/uop_add_rom.v create mode 100644 rtl/curve/uop/uop_conv_rom.v create mode 100644 rtl/curve/uop/uop_dbl_rom.v create mode 100644 rtl/curve/uop/uop_init_rom.v create mode 100644 rtl/curve/uop_ecdsa.v create mode 100644 rtl/ecdsa384.v create mode 100644 rtl/ecdsa384_wrapper.v create mode 100644 rtl/lowlevel/adder32_wrapper.v create mode 100644 rtl/lowlevel/adder47_wrapper.v create mode 100644 rtl/lowlevel/artix7/adder32_artix7.v create mode 100644 rtl/lowlevel/artix7/adder47_artix7.v create mode 100644 rtl/lowlevel/artix7/dsp48e1_wrapper.v create mode 100644 rtl/lowlevel/artix7/mac16_artix7.v create mode 100644 rtl/lowlevel/artix7/subtractor32_artix7.v create mode 100644 rtl/lowlevel/ecdsa_lowlevel_settings.v create mode 100644 rtl/lowlevel/generic/adder32_generic.v create mode 100644 rtl/lowlevel/generic/adder47_generic.v create mode 100644 rtl/lowlevel/generic/mac16_generic.v create mode 100644 rtl/lowlevel/generic/subtractor32_generic.v create mode 100644 rtl/lowlevel/mac16_wrapper.v create mode 100644 rtl/lowlevel/subtractor32_wrapper.v create mode 100644 rtl/modular/modular_adder.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_copy.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_init.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v create mode 100644 rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v create mode 100644 rtl/modular/modular_invertor/modinv_clog2.v create mode 100644 rtl/modular/modular_invertor/modular_invertor.v create mode 100644 rtl/modular/modular_multiplier_384.v create mode 100644 rtl/modular/modular_reductor_384.v create mode 100644 rtl/modular/modular_subtractor.v create mode 100644 rtl/multiword/mw_comparator.v create mode 100644 rtl/multiword/mw_mover.v create mode 100644 rtl/util/bram_1rw_1ro_readfirst.v (limited to 'rtl') diff --git a/rtl/curve/curve_dbl_add_384.v b/rtl/curve/curve_dbl_add_384.v new file mode 100644 index 0000000..70a23bf --- /dev/null +++ b/rtl/curve/curve_dbl_add_384.v @@ -0,0 +1,874 @@ +//------------------------------------------------------------------------------ +// +// curve_dbl_add_384.v +// ----------------------------------------------------------------------------- +// Elliptic curve point adder and doubler. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module curve_dbl_add_384 + ( + clk, rst_n, + ena, rdy, + uop_addr, uop, + px_addr, py_addr, pz_addr, rx_addr, ry_addr, rz_addr, q_addr, v_addr, + rx_wren, ry_wren, rz_wren, + px_din, py_din, pz_din, + rx_din, ry_din, rz_din, + rx_dout, ry_dout, rz_dout, q_din, v_din + ); + + + // + // Microcode + // +`include "uop_ecdsa.v" + + + // + // Constants + // + localparam WORD_COUNTER_WIDTH = 4; // 0 .. 11 + localparam OPERAND_NUM_WORDS = 12; // 12 * 32 = 384 + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output reg [ 6-1: 0] uop_addr; + input wire [20-1: 0] uop; + + output reg [WORD_COUNTER_WIDTH-1:0] px_addr; + output reg [WORD_COUNTER_WIDTH-1:0] py_addr; + output reg [WORD_COUNTER_WIDTH-1:0] pz_addr; + output reg [WORD_COUNTER_WIDTH-1:0] rx_addr; + output reg [WORD_COUNTER_WIDTH-1:0] ry_addr; + output reg [WORD_COUNTER_WIDTH-1:0] rz_addr; + output reg [WORD_COUNTER_WIDTH-1:0] v_addr; + output wire [WORD_COUNTER_WIDTH-1:0] q_addr; + + output wire rx_wren; + output wire ry_wren; + output wire rz_wren; + + input wire [ 32-1:0] px_din; + input wire [ 32-1:0] py_din; + input wire [ 32-1:0] pz_din; + input wire [ 32-1:0] rx_din; + input wire [ 32-1:0] ry_din; + input wire [ 32-1:0] rz_din; + output wire [ 32-1:0] rx_dout; + output wire [ 32-1:0] ry_dout; + output wire [ 32-1:0] rz_dout; + input wire [ 32-1:0] q_din; + input wire [ 32-1:0] v_din; + + + // + // Microcode + // + wire [ 4: 0] uop_opcode = uop[19:15]; + wire [ 4: 0] uop_src_a = uop[14:10]; + wire [ 4: 0] uop_src_b = uop[ 9: 5]; + wire [ 2: 0] uop_dst = uop[ 4: 2]; + wire [ 1: 0] uop_exec = uop[ 1: 0]; + + + // + // Multi-Word Comparator + // + wire mw_cmp_ena; + wire mw_cmp_rdy; + + wire mw_cmp_out_l; + wire mw_cmp_out_e; + wire mw_cmp_out_g; + + wire [WORD_COUNTER_WIDTH-1:0] mw_cmp_addr_xy; + + wire [ 32-1:0] mw_cmp_din_x; + wire [ 32-1:0] mw_cmp_din_y; + + // flags + reg flag_pz_is_zero; + reg flag_t1_is_zero; + reg flag_t2_is_zero; + + mw_comparator # + ( + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) + ) + mw_comparator_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mw_cmp_ena), + .rdy (mw_cmp_rdy), + + .xy_addr (mw_cmp_addr_xy), + .x_din (mw_cmp_din_x), + .y_din (mw_cmp_din_y), + + .cmp_l (mw_cmp_out_l), + .cmp_e (mw_cmp_out_e), + .cmp_g (mw_cmp_out_g) + ); + + + // + // Modular Adder + // + wire mod_add_ena; + wire mod_add_rdy; + + wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_ab; + wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_n; + wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_s; + wire mod_add_wren_s; + + wire [ 32-1:0] mod_add_din_a; + wire [ 32-1:0] mod_add_din_b; + wire [ 32-1:0] mod_add_din_n; + wire [ 32-1:0] mod_add_dout_s; + + assign mod_add_din_n = q_din; + + modular_adder # + ( + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) + ) + modular_adder_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mod_add_ena), + .rdy (mod_add_rdy), + + .ab_addr (mod_add_addr_ab), + .n_addr (mod_add_addr_n), + .s_addr (mod_add_addr_s), + .s_wren (mod_add_wren_s), + + .a_din (mod_add_din_a), + .b_din (mod_add_din_b), + .n_din (mod_add_din_n), + .s_dout (mod_add_dout_s) + ); + + + // + // Modular Subtractor + // + wire mod_sub_ena; + wire mod_sub_rdy; + + wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_ab; + wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_n; + wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_d; + wire mod_sub_wren_d; + + wire [ 32-1:0] mod_sub_din_a; + wire [ 32-1:0] mod_sub_din_b; + wire [ 32-1:0] mod_sub_din_n; + wire [ 32-1:0] mod_sub_dout_d; + + assign mod_sub_din_n = q_din; + + modular_subtractor # + ( + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) + ) + modular_subtractor_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mod_sub_ena), + .rdy (mod_sub_rdy), + + .ab_addr (mod_sub_addr_ab), + .n_addr (mod_sub_addr_n), + .d_addr (mod_sub_addr_d), + .d_wren (mod_sub_wren_d), + + .a_din (mod_sub_din_a), + .b_din (mod_sub_din_b), + .n_din (mod_sub_din_n), + .d_dout (mod_sub_dout_d) + ); + + + // + // Modular Multiplier + // + wire mod_mul_ena; + wire mod_mul_rdy; + + wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_a; + wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_b; + wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_n; + wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_p; + wire mod_mul_wren_p; + + wire [ 32-1:0] mod_mul_din_a; + wire [ 32-1:0] mod_mul_din_b; + wire [ 32-1:0] mod_mul_din_n; + wire [ 32-1:0] mod_mul_dout_p; + + assign mod_mul_din_n = q_din; + + modular_multiplier_384 modular_multiplier_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mod_mul_ena), + .rdy (mod_mul_rdy), + + .a_addr (mod_mul_addr_a), + .b_addr (mod_mul_addr_b), + .n_addr (mod_mul_addr_n), + .p_addr (mod_mul_addr_p), + .p_wren (mod_mul_wren_p), + + .a_din (mod_mul_din_a), + .b_din (mod_mul_din_b), + .n_din (mod_mul_din_n), + .p_dout (mod_mul_dout_p) + ); + + + // + // Multi-Word Data Mover + // + wire mw_mov_ena; + wire mw_mov_rdy; + + wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_x; + wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_y; + wire mw_mov_wren_y; + + wire [ 32-1:0] mw_mov_din_x; + wire [ 32-1:0] mw_mov_dout_y; + + mw_mover # + ( + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH), + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS) + + ) + mw_mover_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (mw_mov_ena), + .rdy (mw_mov_rdy), + + .x_addr (mw_mov_addr_x), + .y_addr (mw_mov_addr_y), + .y_wren (mw_mov_wren_y), + + .x_din (mw_mov_din_x), + .y_dout (mw_mov_dout_y) + ); + + + // + // ROMs + // + reg [WORD_COUNTER_WIDTH-1:0] brom_one_addr; + //reg [WORD_COUNTER_WIDTH-1:0] brom_zero_addr; + reg [WORD_COUNTER_WIDTH-1:0] brom_delta_addr; + reg [WORD_COUNTER_WIDTH-1:0] brom_g_x_addr; + reg [WORD_COUNTER_WIDTH-1:0] brom_g_y_addr; + reg [WORD_COUNTER_WIDTH-1:0] brom_h_x_addr; + reg [WORD_COUNTER_WIDTH-1:0] brom_h_y_addr; + + wire [ 32-1:0] brom_one_dout; + wire [ 32-1:0] brom_zero_dout; + wire [ 32-1:0] brom_delta_dout; + wire [ 32-1:0] brom_g_x_dout; + wire [ 32-1:0] brom_g_y_dout; + wire [ 32-1:0] brom_h_x_dout; + wire [ 32-1:0] brom_h_y_dout; + + (* ROM_STYLE="BLOCK" *) brom_p384_one brom_one_inst + (.clk(clk), .b_addr(brom_one_addr), .b_out(brom_one_dout)); + + brom_p384_zero brom_zero_inst + (.b_out(brom_zero_dout)); + + (* ROM_STYLE="BLOCK" *) brom_p384_delta brom_delta_inst + (.clk(clk), .b_addr(brom_delta_addr), .b_out(brom_delta_dout)); + + (* ROM_STYLE="BLOCK" *) brom_p384_g_x brom_g_x_inst + (.clk(clk), .b_addr(brom_g_x_addr), .b_out(brom_g_x_dout)); + + (* ROM_STYLE="BLOCK" *) brom_p384_g_y brom_g_y_inst + (.clk(clk), .b_addr(brom_g_y_addr), .b_out(brom_g_y_dout)); + + (* ROM_STYLE="BLOCK" *) brom_p384_h_x brom_h_x_inst + (.clk(clk), .b_addr(brom_h_x_addr), .b_out(brom_h_x_dout)); + + (* ROM_STYLE="BLOCK" *) brom_p384_h_y brom_h_y_inst + (.clk(clk), .b_addr(brom_h_y_addr), .b_out(brom_h_y_dout)); + + + // + // Temporary Variables + // + reg [WORD_COUNTER_WIDTH-1:0] bram_t1_wr_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t2_wr_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t3_wr_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t4_wr_addr; + + reg [WORD_COUNTER_WIDTH-1:0] bram_t1_rd_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t2_rd_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t3_rd_addr; + reg [WORD_COUNTER_WIDTH-1:0] bram_t4_rd_addr; + + wire bram_t1_wr_en; + wire bram_t2_wr_en; + wire bram_t3_wr_en; + wire bram_t4_wr_en; + + wire [ 32-1:0] bram_t1_wr_data; + wire [ 32-1:0] bram_t2_wr_data; + wire [ 32-1:0] bram_t3_wr_data; + wire [ 32-1:0] bram_t4_wr_data; + + wire [ 32-1:0] bram_t1_rd_data; + wire [ 32-1:0] bram_t2_rd_data; + wire [ 32-1:0] bram_t3_rd_data; + wire [ 32-1:0] bram_t4_rd_data; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) + ) + bram_t1 + ( .clk (clk), + .a_addr(bram_t1_wr_addr), .a_wr(bram_t1_wr_en), .a_in(bram_t1_wr_data), .a_out(), + .b_addr(bram_t1_rd_addr), .b_out(bram_t1_rd_data) + ); + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) + ) + bram_t2 + ( .clk (clk), + .a_addr(bram_t2_wr_addr), .a_wr(bram_t2_wr_en), .a_in(bram_t2_wr_data), .a_out(), + .b_addr(bram_t2_rd_addr), .b_out(bram_t2_rd_data) + ); + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) + ) + bram_t3 + ( .clk (clk), + .a_addr(bram_t3_wr_addr), .a_wr(bram_t3_wr_en), .a_in(bram_t3_wr_data), .a_out(), + .b_addr(bram_t3_rd_addr), .b_out(bram_t3_rd_data) + ); + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH) + ) + bram_t4 + ( .clk (clk), + .a_addr(bram_t4_wr_addr), .a_wr(bram_t4_wr_en), .a_in(bram_t4_wr_data), .a_out(), + .b_addr(bram_t4_rd_addr), .b_out(bram_t4_rd_data) + ); + + + // + // uOP Trigger Logic + // + reg uop_trig; + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) uop_trig <= 1'b0; + else uop_trig <= (fsm_state == FSM_STATE_FETCH) ? 1'b1 : 1'b0; + + + // + // FSM + // + localparam [ 1: 0] FSM_STATE_STALL = 2'b00; + localparam [ 1: 0] FSM_STATE_FETCH = 2'b01; + localparam [ 1: 0] FSM_STATE_EXECUTE = 2'b10; + + reg [ 1: 0] fsm_state = FSM_STATE_STALL; + wire [ 1: 0] fsm_state_next = (uop_opcode == OPCODE_RDY) ? FSM_STATE_STALL : FSM_STATE_FETCH; + + + // + // FSM Transition Logic + // + reg uop_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state <= FSM_STATE_STALL; + else case (fsm_state) + FSM_STATE_STALL: fsm_state <= ena ? FSM_STATE_FETCH : FSM_STATE_STALL; + FSM_STATE_FETCH: fsm_state <= FSM_STATE_EXECUTE; + FSM_STATE_EXECUTE: fsm_state <= (!uop_trig && uop_done) ? fsm_state_next : FSM_STATE_EXECUTE; + default: fsm_state <= FSM_STATE_STALL; + endcase + + + // + // uOP Address Increment Logic + // + always @(posedge clk) + // + if (fsm_state == FSM_STATE_STALL) + uop_addr <= 5'd0; + else if (fsm_state == FSM_STATE_EXECUTE) + if (!uop_trig && uop_done) + uop_addr <= (uop_opcode == OPCODE_RDY) ? 5'd0 : uop_addr + 1'b1; + + + // + // uOP Completion Logic + // + always @(*) + // + case (uop_opcode) + OPCODE_CMP: uop_done = mw_cmp_rdy; + OPCODE_MOV: uop_done = mw_mov_rdy; + OPCODE_ADD: uop_done = mod_add_rdy; + OPCODE_SUB: uop_done = mod_sub_rdy; + OPCODE_MUL: uop_done = mod_mul_rdy; + OPCODE_RDY: uop_done = 1'b1; + default: uop_done = 1'b0; + endcase + + + // + // Helper Modules Enable Logic + // + assign mw_cmp_ena = uop_opcode[0] & uop_trig; + assign mw_mov_ena = uop_opcode[1] & uop_trig; + assign mod_add_ena = uop_opcode[2] & uop_trig; + assign mod_sub_ena = uop_opcode[3] & uop_trig; + assign mod_mul_ena = uop_opcode[4] & uop_trig; + + + // + // uOP Source Value Decoding Logic + // + reg [31: 0] uop_src_a_value; + + always @(*) + // + case (uop_src_a) + UOP_SRC_PX: uop_src_a_value = px_din; + UOP_SRC_PY: uop_src_a_value = py_din; + UOP_SRC_PZ: uop_src_a_value = pz_din; + + UOP_SRC_RX: uop_src_a_value = rx_din; + UOP_SRC_RY: uop_src_a_value = ry_din; + UOP_SRC_RZ: uop_src_a_value = rz_din; + + UOP_SRC_T1: uop_src_a_value = bram_t1_rd_data; + UOP_SRC_T2: uop_src_a_value = bram_t2_rd_data; + UOP_SRC_T3: uop_src_a_value = bram_t3_rd_data; + UOP_SRC_T4: uop_src_a_value = bram_t4_rd_data; + + UOP_SRC_ONE: uop_src_a_value = brom_one_dout; + UOP_SRC_ZERO: uop_src_a_value = brom_zero_dout; + UOP_SRC_DELTA: uop_src_a_value = brom_delta_dout; + + UOP_SRC_G_X: uop_src_a_value = brom_g_x_dout; + UOP_SRC_G_Y: uop_src_a_value = brom_g_y_dout; + + UOP_SRC_H_X: uop_src_a_value = brom_h_x_dout; + UOP_SRC_H_Y: uop_src_a_value = brom_h_y_dout; + + UOP_SRC_V: uop_src_a_value = v_din; + + default: uop_src_a_value = {32{1'bX}}; + endcase + + + assign mw_cmp_din_x = uop_src_a_value; + assign mw_mov_din_x = uop_src_a_value; + assign mod_add_din_a = uop_src_a_value; + assign mod_sub_din_a = uop_src_a_value; + assign mod_mul_din_a = uop_src_a_value; + + reg [31: 0] uop_src_b_value; + + always @(*) + // + case (uop_src_b) + UOP_SRC_PX: uop_src_b_value = px_din; + UOP_SRC_PY: uop_src_b_value = py_din; + UOP_SRC_PZ: uop_src_b_value = pz_din; + + UOP_SRC_RX: uop_src_b_value = rx_din; + UOP_SRC_RY: uop_src_b_value = ry_din; + UOP_SRC_RZ: uop_src_b_value = rz_din; + + UOP_SRC_T1: uop_src_b_value = bram_t1_rd_data; + UOP_SRC_T2: uop_src_b_value = bram_t2_rd_data; + UOP_SRC_T3: uop_src_b_value = bram_t3_rd_data; + UOP_SRC_T4: uop_src_b_value = bram_t4_rd_data; + + UOP_SRC_ONE: uop_src_b_value = brom_one_dout; + UOP_SRC_ZERO: uop_src_b_value = brom_zero_dout; + UOP_SRC_DELTA: uop_src_b_value = brom_delta_dout; + + UOP_SRC_G_X: uop_src_b_value = brom_g_x_dout; + UOP_SRC_G_Y: uop_src_b_value = brom_g_y_dout; + + UOP_SRC_H_X: uop_src_b_value = brom_h_x_dout; + UOP_SRC_H_Y: uop_src_b_value = brom_h_y_dout; + + UOP_SRC_V: uop_src_b_value = v_din; + + default: uop_src_b_value = {32{1'bX}}; + endcase + + assign mw_cmp_din_y = uop_src_b_value; + assign mod_add_din_b = uop_src_b_value; + assign mod_sub_din_b = uop_src_b_value; + assign mod_mul_din_b = uop_src_b_value; + + + // + // uOP Source & Destination Address Decoding Logic + // + reg [WORD_COUNTER_WIDTH-1:0] uop_src_a_addr; + reg [WORD_COUNTER_WIDTH-1:0] uop_src_b_addr; + reg [WORD_COUNTER_WIDTH-1:0] uop_dst_addr; + reg [WORD_COUNTER_WIDTH-1:0] uop_q_addr; + + assign q_addr = uop_q_addr; + + always @(*) + // + case (uop_opcode) + // + OPCODE_CMP: begin + uop_src_a_addr = mw_cmp_addr_xy; + uop_src_b_addr = mw_cmp_addr_xy; + uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}}; + uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + OPCODE_MOV: begin + uop_src_a_addr = mw_mov_addr_x; + uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}}; + uop_dst_addr = mw_mov_addr_y; + uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + OPCODE_ADD: begin + uop_src_a_addr = mod_add_addr_ab; + uop_src_b_addr = mod_add_addr_ab; + uop_dst_addr = mod_add_addr_s; + uop_q_addr = mod_add_addr_n; + end + // + OPCODE_SUB: begin + uop_src_a_addr = mod_sub_addr_ab; + uop_src_b_addr = mod_sub_addr_ab; + uop_dst_addr = mod_sub_addr_d; + uop_q_addr = mod_sub_addr_n; + end + // + OPCODE_MUL: begin + uop_src_a_addr = mod_mul_addr_a; + uop_src_b_addr = mod_mul_addr_b; + uop_dst_addr = mod_mul_addr_p; + uop_q_addr = mod_mul_addr_n; + end + // + default: begin + uop_src_a_addr = {WORD_COUNTER_WIDTH{1'bX}}; + uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}}; + uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}}; + uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + endcase + + + // + // uOP Conditional Execution Logic + // + reg uop_exec_effective; + + always @(*) + // + case (uop_exec) + UOP_EXEC_ALWAYS: uop_exec_effective = 1'b1; + UOP_EXEC_PZT1T2_0XX: uop_exec_effective = flag_pz_is_zero; + UOP_EXEC_PZT1T2_100: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && flag_t2_is_zero; + UOP_EXEC_PZT1T2_101: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && !flag_t2_is_zero; + endcase + + + // + // uOP Destination Store Logic + // + reg uop_dst_wren; + + always @(*) + // + case (uop_opcode) + // + OPCODE_MOV: uop_dst_wren = mw_mov_wren_y & uop_exec_effective; + OPCODE_ADD: uop_dst_wren = mod_add_wren_s; + OPCODE_SUB: uop_dst_wren = mod_sub_wren_d; + OPCODE_MUL: uop_dst_wren = mod_mul_wren_p; + default: uop_dst_wren = 1'b0; + // + endcase + + + always @(*) begin + // + // + // + if (uop_src_a == UOP_SRC_PX) px_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_PX) px_addr = uop_src_b_addr; + else px_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_PY) py_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_PY) py_addr = uop_src_b_addr; + else py_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_PZ) pz_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_PZ) pz_addr = uop_src_b_addr; + else pz_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_src_a == UOP_SRC_ONE) brom_one_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_ONE) brom_one_addr = uop_src_b_addr; + else brom_one_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + //if (uop_src_a == UOP_SRC_ZERO) brom_zero_addr = uop_src_a_addr; + //else if (uop_src_b == UOP_SRC_ZERO) brom_zero_addr = uop_src_b_addr; + //else brom_zero_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_DELTA) brom_delta_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_DELTA) brom_delta_addr = uop_src_b_addr; + else brom_delta_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_src_a == UOP_SRC_G_X) brom_g_x_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_G_X) brom_g_x_addr = uop_src_b_addr; + else brom_g_x_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_G_Y) brom_g_y_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_G_Y) brom_g_y_addr = uop_src_b_addr; + else brom_g_y_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_src_a == UOP_SRC_H_X) brom_h_x_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_H_X) brom_h_x_addr = uop_src_b_addr; + else brom_h_x_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_H_Y) brom_h_y_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_H_Y) brom_h_y_addr = uop_src_b_addr; + else brom_h_y_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_src_a == UOP_SRC_V) v_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_V) v_addr = uop_src_b_addr; + else v_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_src_a == UOP_SRC_T1) bram_t1_rd_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_T1) bram_t1_rd_addr = uop_src_b_addr; + else bram_t1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_T2) bram_t2_rd_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_T2) bram_t2_rd_addr = uop_src_b_addr; + else bram_t2_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_T3) bram_t3_rd_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_T3) bram_t3_rd_addr = uop_src_b_addr; + else bram_t3_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_src_a == UOP_SRC_T4) bram_t4_rd_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_T4) bram_t4_rd_addr = uop_src_b_addr; + else bram_t4_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if (uop_dst == UOP_DST_T1) bram_t1_wr_addr = uop_dst_addr; + else bram_t1_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_dst == UOP_DST_T2) bram_t2_wr_addr = uop_dst_addr; + else bram_t2_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_dst == UOP_DST_T3) bram_t3_wr_addr = uop_dst_addr; + else bram_t3_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + if (uop_dst == UOP_DST_T4) bram_t4_wr_addr = uop_dst_addr; + else bram_t4_wr_addr = {WORD_COUNTER_WIDTH{1'bX}}; + // + // + // + if ((uop_dst == UOP_DST_RX) && (uop_dst_wren)) rx_addr = uop_dst_addr; + else begin + if (uop_src_a == UOP_SRC_RX) rx_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_RX) rx_addr = uop_src_b_addr; + else rx_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + if ((uop_dst == UOP_DST_RY) && (uop_dst_wren)) ry_addr = uop_dst_addr; + else begin + if (uop_src_a == UOP_SRC_RY) ry_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_RY) ry_addr = uop_src_b_addr; + else ry_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + if ((uop_dst == UOP_DST_RZ) && (uop_dst_wren)) rz_addr = uop_dst_addr; + else begin + if (uop_src_a == UOP_SRC_RZ) rz_addr = uop_src_a_addr; + else if (uop_src_b == UOP_SRC_RZ) rz_addr = uop_src_b_addr; + else rz_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + end + + + assign rx_wren = uop_dst_wren && (uop_dst == UOP_DST_RX); + assign ry_wren = uop_dst_wren && (uop_dst == UOP_DST_RY); + assign rz_wren = uop_dst_wren && (uop_dst == UOP_DST_RZ); + + assign bram_t1_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T1); + assign bram_t2_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T2); + assign bram_t3_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T3); + assign bram_t4_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T4); + + + + // + // Destination Value Selector + // + reg [31: 0] uop_dst_value; + + always @(*) + // + case (uop_opcode) + + OPCODE_MOV: uop_dst_value = mw_mov_dout_y; + OPCODE_ADD: uop_dst_value = mod_add_dout_s; + OPCODE_SUB: uop_dst_value = mod_sub_dout_d; + OPCODE_MUL: uop_dst_value = mod_mul_dout_p; + + default: uop_dst_value = {32{1'bX}}; + + endcase + + assign rx_dout = uop_dst_value; + assign ry_dout = uop_dst_value; + assign rz_dout = uop_dst_value; + + assign bram_t1_wr_data = uop_dst_value; + assign bram_t2_wr_data = uop_dst_value; + assign bram_t3_wr_data = uop_dst_value; + assign bram_t4_wr_data = uop_dst_value; + + + // + // Latch Comparison Flags + // + always @(posedge clk) + // + if ( (fsm_state == FSM_STATE_EXECUTE) && + (uop_opcode == OPCODE_CMP) && + (uop_done && !uop_trig) ) begin + + if ( (uop_src_a == UOP_SRC_PZ) && (uop_src_b == UOP_SRC_ZERO) ) + flag_pz_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g; + + if ( (uop_src_a == UOP_SRC_T1) && (uop_src_b == UOP_SRC_ZERO) ) + flag_t1_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g; + + if ( (uop_src_a == UOP_SRC_T2) && (uop_src_b == UOP_SRC_ZERO) ) + flag_t2_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g; + + end + + + // + // Ready Flag Logic + // + reg rdy_reg = 1'b1; + assign rdy = rdy_reg; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) rdy_reg <= 1'b1; + else begin + + /* clear flag */ + if (fsm_state == FSM_STATE_STALL) + if (ena) rdy_reg <= 1'b0; + + /* set flag */ + if ((fsm_state == FSM_STATE_EXECUTE) && !uop_trig && uop_done) + if (uop_opcode == OPCODE_RDY) rdy_reg <= 1'b1; + + end + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/curve/curve_mul_384.v b/rtl/curve/curve_mul_384.v new file mode 100644 index 0000000..9fcb884 --- /dev/null +++ b/rtl/curve/curve_mul_384.v @@ -0,0 +1,720 @@ +//------------------------------------------------------------------------------ +// +// curve_mul_384.v +// ----------------------------------------------------------------------------- +// Elliptic curve point scalar multiplier. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module curve_mul_384 + ( + clk, rst_n, + ena, rdy, + k_addr, rx_addr, ry_addr, + rx_wren, ry_wren, + k_din, + rx_dout, ry_dout + ); + + + // + // Constants + // + localparam WORD_COUNTER_WIDTH = 4; // 0 .. 11 + localparam OPERAND_NUM_WORDS = 12; // 12 * 32 = 384 + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [ 3: 0] k_addr; + output wire [ 3: 0] rx_addr; + output wire [ 3: 0] ry_addr; + + output wire rx_wren; + output wire ry_wren; + + input wire [31: 0] k_din; + + output wire [31: 0] rx_dout; + output wire [31: 0] ry_dout; + + + // + // Temporary Variables + // + reg [ 3: 0] bram_tx_wr_addr; + reg [ 3: 0] bram_ty_wr_addr; + reg [ 3: 0] bram_tz_wr_addr; + + reg [ 3: 0] bram_rx_wr_addr; + reg [ 3: 0] bram_ry_wr_addr; + reg [ 3: 0] bram_rz_wr_addr; + wire [ 3: 0] bram_rz1_wr_addr; + + reg [ 3: 0] bram_tx_rd_addr; + reg [ 3: 0] bram_ty_rd_addr; + reg [ 3: 0] bram_tz_rd_addr; + + reg [ 3: 0] bram_rx_rd_addr; + reg [ 3: 0] bram_ry_rd_addr; + reg [ 3: 0] bram_rz_rd_addr; + wire [ 3: 0] bram_rz1_rd_addr; + + reg bram_tx_wr_en; + reg bram_ty_wr_en; + reg bram_tz_wr_en; + + reg bram_rx_wr_en; + reg bram_ry_wr_en; + reg bram_rz_wr_en; + wire bram_rz1_wr_en; + + wire [31: 0] bram_tx_rd_data; + wire [31: 0] bram_ty_rd_data; + wire [31: 0] bram_tz_rd_data; + + wire [31: 0] bram_rx_rd_data; + wire [31: 0] bram_ry_rd_data; + wire [31: 0] bram_rz_rd_data; + wire [31: 0] bram_rz1_rd_data; + + reg [31: 0] bram_tx_wr_data_in; + reg [31: 0] bram_ty_wr_data_in; + reg [31: 0] bram_tz_wr_data_in; + + reg [31: 0] bram_rx_wr_data_in; + reg [31: 0] bram_ry_wr_data_in; + reg [31: 0] bram_rz_wr_data_in; + wire [31: 0] bram_rz1_wr_data_in; + + wire [31: 0] bram_tx_wr_data_out; + wire [31: 0] bram_ty_wr_data_out; + wire [31: 0] bram_tz_wr_data_out; + + wire [31: 0] bram_rx_wr_data_out; + wire [31: 0] bram_ry_wr_data_out; + wire [31: 0] bram_rz_wr_data_out; + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_tx (.clk(clk), + .a_addr(bram_tx_wr_addr), .a_wr(bram_tx_wr_en), .a_in(bram_tx_wr_data_in), .a_out(bram_tx_wr_data_out), + .b_addr(bram_tx_rd_addr), .b_out(bram_tx_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_ty (.clk(clk), + .a_addr(bram_ty_wr_addr), .a_wr(bram_ty_wr_en), .a_in(bram_ty_wr_data_in), .a_out(bram_ty_wr_data_out), + .b_addr(bram_ty_rd_addr), .b_out(bram_ty_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_tz (.clk(clk), + .a_addr(bram_tz_wr_addr), .a_wr(bram_tz_wr_en), .a_in(bram_tz_wr_data_in), .a_out(bram_tz_wr_data_out), + .b_addr(bram_tz_rd_addr), .b_out(bram_tz_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_rx (.clk(clk), + .a_addr(bram_rx_wr_addr), .a_wr(bram_rx_wr_en), .a_in(bram_rx_wr_data_in), .a_out(bram_rx_wr_data_out), + .b_addr(bram_rx_rd_addr), .b_out(bram_rx_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_ry (.clk(clk), + .a_addr(bram_ry_wr_addr), .a_wr(bram_ry_wr_en), .a_in(bram_ry_wr_data_in), .a_out(bram_ry_wr_data_out), + .b_addr(bram_ry_rd_addr), .b_out(bram_ry_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_rz (.clk(clk), + .a_addr(bram_rz_wr_addr), .a_wr(bram_rz_wr_en), .a_in(bram_rz_wr_data_in), .a_out(bram_rz_wr_data_out), + .b_addr(bram_rz_rd_addr), .b_out(bram_rz_rd_data)); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(4)) + bram_rz1 (.clk(clk), + .a_addr(bram_rz1_wr_addr), .a_wr(bram_rz1_wr_en), .a_in(bram_rz1_wr_data_in), .a_out(), + .b_addr(bram_rz1_rd_addr), .b_out(bram_rz1_rd_data)); + + + // + // FSM + // + localparam [ 3: 0] FSM_STATE_IDLE = 4'd00; + localparam [ 3: 0] FSM_STATE_PREPARE_TRIG = 4'd01; + localparam [ 3: 0] FSM_STATE_PREPARE_WAIT = 4'd02; + localparam [ 3: 0] FSM_STATE_DOUBLE_TRIG = 4'd03; + localparam [ 3: 0] FSM_STATE_DOUBLE_WAIT = 4'd04; + localparam [ 3: 0] FSM_STATE_ADD_TRIG = 4'd05; + localparam [ 3: 0] FSM_STATE_ADD_WAIT = 4'd06; + localparam [ 3: 0] FSM_STATE_COPY_TRIG = 4'd07; + localparam [ 3: 0] FSM_STATE_COPY_WAIT = 4'd08; + localparam [ 3: 0] FSM_STATE_INVERT_TRIG = 4'd09; + localparam [ 3: 0] FSM_STATE_INVERT_WAIT = 4'd10; + localparam [ 3: 0] FSM_STATE_CONVERT_TRIG = 4'd11; + localparam [ 3: 0] FSM_STATE_CONVERT_WAIT = 4'd12; + localparam [ 3: 0] FSM_STATE_DONE = 4'd13; + + reg [3:0] fsm_state = FSM_STATE_IDLE; + + + // + // Round Counter + // + reg [ 8: 0] bit_counter; + wire [ 8: 0] bit_counter_max = 9'd383; + wire [ 8: 0] bit_counter_zero = 9'd0; + wire [ 8: 0] bit_counter_next = + (bit_counter < bit_counter_max) ? bit_counter + 1'b1 : bit_counter_zero; + + + // + // Round Completion + // + wire [ 3: 0] fsm_state_round_next = (bit_counter < bit_counter_max) ? + FSM_STATE_DOUBLE_TRIG : FSM_STATE_INVERT_TRIG; + + + // + // OP Trigger Logic + // + reg op_trig; + wire op_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) op_trig <= 1'b0; + else op_trig <= (fsm_state == FSM_STATE_PREPARE_TRIG) || + (fsm_state == FSM_STATE_DOUBLE_TRIG) || + (fsm_state == FSM_STATE_ADD_TRIG) || + (fsm_state == FSM_STATE_CONVERT_TRIG); + + // + // Microprograms + // + wire [ 5: 0] op_rom_addr; + wire [19: 0] op_rom_init_data; + wire [19: 0] op_rom_dbl_data; + wire [19: 0] op_rom_add_data; + wire [19: 0] op_rom_conv_data; + reg [19: 0] op_rom_mux_data; + + (* RAM_STYLE="BLOCK" *) + uop_init_rom op_rom_init + ( + .clk (clk), + .addr (op_rom_addr), + .data (op_rom_init_data) + ); + + (* RAM_STYLE="BLOCK" *) + uop_dbl_rom op_rom_dbl + ( + .clk (clk), + .addr (op_rom_addr), + .data (op_rom_dbl_data) + ); + + (* RAM_STYLE="BLOCK" *) + uop_add_rom op_rom_add + ( + .clk (clk), + .addr (op_rom_addr), + .data (op_rom_add_data) + ); + + (* RAM_STYLE="BLOCK" *) + uop_conv_rom op_rom_conv + ( + .clk (clk), + .addr (op_rom_addr), + .data (op_rom_conv_data) + ); + + always @(*) + // + case (fsm_state) + FSM_STATE_PREPARE_WAIT: op_rom_mux_data = op_rom_init_data; + FSM_STATE_DOUBLE_WAIT: op_rom_mux_data = op_rom_dbl_data; + FSM_STATE_ADD_WAIT: op_rom_mux_data = op_rom_add_data; + FSM_STATE_CONVERT_WAIT: op_rom_mux_data = op_rom_conv_data; + default: op_rom_mux_data = {20{1'bX}}; + endcase + + + + // + // Modulus + // + reg [ 3: 0] rom_q_addr; + wire [31: 0] rom_q_data; + + brom_p384_q rom_q + ( + .clk (clk), + .b_addr (rom_q_addr), + .b_out (rom_q_data) + ); + + + // + // Worker + // + wire [ 3: 0] worker_addr_px; + wire [ 3: 0] worker_addr_py; + wire [ 3: 0] worker_addr_pz; + + wire [ 3: 0] worker_addr_rx; + wire [ 3: 0] worker_addr_ry; + wire [ 3: 0] worker_addr_rz; + + wire [ 3: 0] worker_addr_q; + + wire worker_wren_rx; + wire worker_wren_ry; + wire worker_wren_rz; + + reg [31: 0] worker_din_px; + reg [31: 0] worker_din_py; + reg [31: 0] worker_din_pz; + + reg [31: 0] worker_din_rx; + reg [31: 0] worker_din_ry; + reg [31: 0] worker_din_rz; + + wire [31: 0] worker_dout_rx; + wire [31: 0] worker_dout_ry; + wire [31: 0] worker_dout_rz; + + curve_dbl_add_384 worker + ( + .clk (clk), + .rst_n (rst_n), + + .ena (op_trig), + .rdy (op_done), + + .uop_addr (op_rom_addr), + .uop (op_rom_mux_data), + + .px_addr (worker_addr_px), + .py_addr (worker_addr_py), + .pz_addr (worker_addr_pz), + + .rx_addr (worker_addr_rx), + .ry_addr (worker_addr_ry), + .rz_addr (worker_addr_rz), + + .q_addr (worker_addr_q), + + .v_addr (bram_rz1_rd_addr), + + .rx_wren (worker_wren_rx), + .ry_wren (worker_wren_ry), + .rz_wren (worker_wren_rz), + + .px_din (worker_din_px), + .py_din (worker_din_py), + .pz_din (worker_din_pz), + + .rx_din (worker_din_rx), + .ry_din (worker_din_ry), + .rz_din (worker_din_rz), + + .rx_dout (worker_dout_rx), + .ry_dout (worker_dout_ry), + .rz_dout (worker_dout_rz), + + .q_din (rom_q_data), + + .v_din (bram_rz1_rd_data) + ); + + + // + // Mover + // + reg move_trig; + wire move_done; + + wire [ 3: 0] mover_addr_x; + wire [ 3: 0] mover_addr_y; + + wire mover_wren_y; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) move_trig <= 1'b0; + else move_trig <= (fsm_state == FSM_STATE_COPY_TRIG); + + mw_mover # + ( + .WORD_COUNTER_WIDTH (4), + .OPERAND_NUM_WORDS (12) + ) + mover + ( + .clk (clk), + .rst_n (rst_n), + + .ena (move_trig), + .rdy (move_done), + + .x_addr (mover_addr_x), + .y_addr (mover_addr_y), + .y_wren (mover_wren_y), + + .x_din ({32{1'bX}}), + .y_dout () + ); + + + // + // Invertor + // + reg invert_trig; + wire invert_done; + + wire [ 3: 0] invertor_addr_a; + wire [ 3: 0] invertor_addr_q; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) invert_trig <= 1'b0; + else invert_trig <= (fsm_state == FSM_STATE_INVERT_TRIG); + + modular_invertor # + ( + .MAX_OPERAND_WIDTH(384) + ) + invertor + ( + .clk (clk), + .rst_n (rst_n), + + .ena (invert_trig), + .rdy (invert_done), + + .a_addr (invertor_addr_a), + .q_addr (invertor_addr_q), + .a1_addr (bram_rz1_wr_addr), + .a1_wren (bram_rz1_wr_en), + + .a_din (bram_rz_rd_data), + .q_din (rom_q_data), + .a1_dout (bram_rz1_wr_data_in) + ); + + + // + // FSM Transition Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; + else case (fsm_state) + + FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_PREPARE_TRIG : FSM_STATE_IDLE; + + FSM_STATE_PREPARE_TRIG: fsm_state <= FSM_STATE_PREPARE_WAIT; + FSM_STATE_PREPARE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DOUBLE_TRIG : FSM_STATE_PREPARE_WAIT; + + FSM_STATE_DOUBLE_TRIG: fsm_state <= FSM_STATE_DOUBLE_WAIT; + FSM_STATE_DOUBLE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_ADD_TRIG : FSM_STATE_DOUBLE_WAIT; + + FSM_STATE_ADD_TRIG: fsm_state <= FSM_STATE_ADD_WAIT; + FSM_STATE_ADD_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_COPY_TRIG : FSM_STATE_ADD_WAIT; + + FSM_STATE_COPY_TRIG: fsm_state <= FSM_STATE_COPY_WAIT; + FSM_STATE_COPY_WAIT: fsm_state <= (!move_trig && move_done) ? fsm_state_round_next : FSM_STATE_COPY_WAIT; + + FSM_STATE_INVERT_TRIG: fsm_state <= FSM_STATE_INVERT_WAIT; + FSM_STATE_INVERT_WAIT: fsm_state <= (!invert_trig && invert_done) ? FSM_STATE_CONVERT_TRIG : FSM_STATE_INVERT_WAIT; + + FSM_STATE_CONVERT_TRIG: fsm_state <= FSM_STATE_CONVERT_WAIT; + FSM_STATE_CONVERT_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DONE : FSM_STATE_CONVERT_WAIT; + + FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE; + + default: fsm_state <= FSM_STATE_IDLE; + + endcase + + + // + // Bit Counter Increment + // + always @(posedge clk) begin + // + if ((fsm_state == FSM_STATE_PREPARE_WAIT) && !op_trig && op_done) + bit_counter <= bit_counter_zero; + // + if ((fsm_state == FSM_STATE_COPY_WAIT) && !move_trig && move_done) + bit_counter <= bit_counter_next; + // + end + + + // + // K Latch Logic + // + reg [ 3: 0] k_addr_reg; + reg [31: 0] k_din_reg; + + assign k_addr = k_addr_reg; + + always @(posedge clk) begin + // + if (fsm_state == FSM_STATE_DOUBLE_TRIG) + k_addr_reg <= 4'd11 - bit_counter[8:5]; + // + if (fsm_state == FSM_STATE_ADD_TRIG) + k_din_reg <= (bit_counter[4:0] == 5'd0) ? k_din : {k_din_reg[30:0], 1'bX}; + // + end + + + + // + // Copy Inhibit Logic + // + wire move_inhibit = k_din_reg[31]; + + wire copy_t2r_int = mover_wren_y & ~move_inhibit; + + + always @(*) begin + // + // Q + // + case (fsm_state) + FSM_STATE_DOUBLE_WAIT: rom_q_addr = worker_addr_q; + FSM_STATE_ADD_WAIT: rom_q_addr = worker_addr_q; + FSM_STATE_INVERT_WAIT: rom_q_addr = invertor_addr_q; + FSM_STATE_CONVERT_WAIT: rom_q_addr = worker_addr_q; + default: rom_q_addr = worker_addr_q; + endcase + + // + // R(X,Y,Z) + // + case (fsm_state) + // + FSM_STATE_PREPARE_WAIT: begin + // + bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; + bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz; + bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz; + bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz; + // + end + // + FSM_STATE_DOUBLE_WAIT: begin + // + bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz; + bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; + bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; + bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; + // + end + // + FSM_STATE_ADD_WAIT: begin + // + bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; + bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz; + bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz; + bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz; + // + end + // + FSM_STATE_COPY_WAIT: begin + // + bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; + bram_rx_wr_addr <= mover_addr_y; bram_ry_wr_addr <= mover_addr_y; bram_rz_wr_addr <= mover_addr_y; + bram_rx_wr_en <= copy_t2r_int; bram_ry_wr_en <= copy_t2r_int; bram_rz_wr_en <= copy_t2r_int; + bram_rx_wr_data_in <= bram_tx_rd_data; bram_ry_wr_data_in <= bram_ty_rd_data; bram_rz_wr_data_in <= bram_tz_rd_data; + // + end + // + FSM_STATE_INVERT_WAIT: begin + // + bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= invertor_addr_a; + bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; + bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; + bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; + // + end + // + FSM_STATE_CONVERT_WAIT: begin + // + bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz; + bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; + bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; + bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; + // + end + + // + default: begin + // + bram_rx_rd_addr <= {4{1'bX}}; bram_ry_rd_addr <= {4{1'bX}}; bram_rz_rd_addr <= {4{1'bX}}; + bram_rx_wr_addr <= {4{1'bX}}; bram_ry_wr_addr <= {4{1'bX}}; bram_rz_wr_addr <= {4{1'bX}}; + bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0; + bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}}; + // + end + // + endcase + // + // T(X,Y,Z) + // + case (fsm_state) + // + FSM_STATE_DOUBLE_WAIT: begin + // + bram_tx_rd_addr <= {4{1'bX}}; bram_ty_rd_addr <= {4{1'bX}}; bram_tz_rd_addr <= {4{1'bX}}; + bram_tx_wr_addr <= worker_addr_rx; bram_ty_wr_addr <= worker_addr_ry; bram_tz_wr_addr <= worker_addr_rz; + bram_tx_wr_en <= worker_wren_rx; bram_ty_wr_en <= worker_wren_ry; bram_tz_wr_en <= worker_wren_rz; + bram_tx_wr_data_in <= worker_dout_rx; bram_ty_wr_data_in <= worker_dout_ry; bram_tz_wr_data_in <= worker_dout_rz; + // + end + // + FSM_STATE_ADD_WAIT: begin + // + bram_tx_rd_addr <= worker_addr_px; bram_ty_rd_addr <= worker_addr_py; bram_tz_rd_addr <= worker_addr_pz; + bram_tx_wr_addr <= {4{1'bX}}; bram_ty_wr_addr <= {4{1'bX}}; bram_tz_wr_addr <= {4{1'bX}}; + bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0; + bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}}; + // + end + // + FSM_STATE_COPY_WAIT: begin + // + bram_tx_rd_addr <= mover_addr_x; bram_ty_rd_addr <= mover_addr_x; bram_tz_rd_addr <= mover_addr_x; + bram_tx_wr_addr <= {4{1'bX}}; bram_ty_wr_addr <= {4{1'bX}}; bram_tz_wr_addr <= {4{1'bX}}; + bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0; + bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}}; + // + end + + // + default: begin + // + bram_tx_rd_addr <= {4{1'bX}}; bram_ty_rd_addr <= {4{1'bX}}; bram_tz_rd_addr <= {4{1'bX}}; + bram_tx_wr_addr <= {4{1'bX}}; bram_ty_wr_addr <= {4{1'bX}}; bram_tz_wr_addr <= {4{1'bX}}; + bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0; + bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}}; + // + end + // + endcase + // + // Worker + // + case (fsm_state) + // + FSM_STATE_DOUBLE_WAIT: begin + // + worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data; + worker_din_rx <= bram_tx_wr_data_out; worker_din_ry <= bram_ty_wr_data_out; worker_din_rz <= bram_tz_wr_data_out; + // + end + // + FSM_STATE_ADD_WAIT: begin + // + worker_din_px <= bram_tx_rd_data; worker_din_py <= bram_ty_rd_data; worker_din_pz <= bram_tz_rd_data; + worker_din_rx <= bram_rx_wr_data_out; worker_din_ry <= bram_ry_wr_data_out; worker_din_rz <= bram_rz_wr_data_out; + // + end + // + FSM_STATE_CONVERT_WAIT: begin + // + worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data; + worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}}; + // + end + // + default: begin + // + worker_din_px <= {32{1'bX}}; worker_din_py <= {32{1'bX}}; worker_din_pz <= {32{1'bX}}; + worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}}; + // + end + // + endcase + // + end + + + // + // Output Mapping + // + assign rx_wren = worker_wren_rx && (fsm_state == FSM_STATE_CONVERT_WAIT); + assign ry_wren = worker_wren_ry && (fsm_state == FSM_STATE_CONVERT_WAIT); + + assign rx_dout = worker_dout_rx; + assign ry_dout = worker_dout_ry; + + assign rx_addr = worker_addr_rx; + assign ry_addr = worker_addr_ry; + + + // + // Ready Flag Logic + // + reg rdy_reg = 1'b1; + assign rdy = rdy_reg; + + always @(posedge clk or negedge rst_n) + + if (rst_n == 1'b0) rdy_reg <= 1'b1; + else begin + + /* clear flag */ + if ((fsm_state == FSM_STATE_IDLE) && ena) + rdy_reg <= 1'b0; + + /* set flag */ + if (fsm_state == FSM_STATE_DONE) + rdy_reg <= 1'b1; + + end + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/curve/rom/brom_p384_delta.v b/rtl/curve/rom/brom_p384_delta.v new file mode 100644 index 0000000..754af3e --- /dev/null +++ b/rtl/curve/rom/brom_p384_delta.v @@ -0,0 +1,72 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module brom_p384_delta + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); + + + // + // Output Registers + // + reg [31:0] bram_reg_b; + + assign b_out = bram_reg_b; + + + // + // Read-Only Port B + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h80000000; + 4'b0001: bram_reg_b <= 32'h00000000; + 4'b0010: bram_reg_b <= 32'h80000000; + 4'b0011: bram_reg_b <= 32'h7fffffff; + 4'b0100: bram_reg_b <= 32'hffffffff; + 4'b0101: bram_reg_b <= 32'hffffffff; + 4'b0110: bram_reg_b <= 32'hffffffff; + 4'b0111: bram_reg_b <= 32'hffffffff; + 4'b1000: bram_reg_b <= 32'hffffffff; + 4'b1001: bram_reg_b <= 32'hffffffff; + 4'b1010: bram_reg_b <= 32'hffffffff; + 4'b1011: bram_reg_b <= 32'h7fffffff; + endcase + + +endmodule diff --git a/rtl/curve/rom/brom_p384_g_x.v b/rtl/curve/rom/brom_p384_g_x.v new file mode 100644 index 0000000..25d5103 --- /dev/null +++ b/rtl/curve/rom/brom_p384_g_x.v @@ -0,0 +1,72 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module brom_p384_g_x + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); + + + // + // Output Registers + // + reg [31:0] bram_reg_b; + + assign b_out = bram_reg_b; + + + // + // Read-Only Port B + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h72760ab7; + 4'b0001: bram_reg_b <= 32'h3a545e38; + 4'b0010: bram_reg_b <= 32'hbf55296c; + 4'b0011: bram_reg_b <= 32'h5502f25d; + 4'b0100: bram_reg_b <= 32'h82542a38; + 4'b0101: bram_reg_b <= 32'h59f741e0; + 4'b0110: bram_reg_b <= 32'h8ba79b98; + 4'b0111: bram_reg_b <= 32'h6e1d3b62; + 4'b1000: bram_reg_b <= 32'hf320ad74; + 4'b1001: bram_reg_b <= 32'h8eb1c71e; + 4'b1010: bram_reg_b <= 32'hbe8b0537; + 4'b1011: bram_reg_b <= 32'haa87ca22; + endcase + + +endmodule diff --git a/rtl/curve/rom/brom_p384_g_y.v b/rtl/curve/rom/brom_p384_g_y.v new file mode 100644 index 0000000..c2461eb --- /dev/null +++ b/rtl/curve/rom/brom_p384_g_y.v @@ -0,0 +1,72 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module brom_p384_g_y + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); + + + // + // Output Registers + // + reg [31:0] bram_reg_b; + + assign b_out = bram_reg_b; + + + // + // Read-Only Port B + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h90ea0e5f; + 3'b0001: bram_reg_b <= 32'h7a431d7c; + 4'b0010: bram_reg_b <= 32'h1d7e819d; + 4'b0011: bram_reg_b <= 32'h0a60b1ce; + 4'b0100: bram_reg_b <= 32'hb5f0b8c0; + 4'b0101: bram_reg_b <= 32'he9da3113; + 4'b0110: bram_reg_b <= 32'h289a147c; + 4'b0111: bram_reg_b <= 32'hf8f41dbd; + 4'b1000: bram_reg_b <= 32'h9292dc29; + 4'b1001: bram_reg_b <= 32'h5d9e98bf; + 4'b1010: bram_reg_b <= 32'h96262c6f; + 4'b1011: bram_reg_b <= 32'h3617de4a; + endcase + + +endmodule diff --git a/rtl/curve/rom/brom_p384_h_x.v b/rtl/curve/rom/brom_p384_h_x.v new file mode 100644 index 0000000..a6c474e --- /dev/null +++ b/rtl/curve/rom/brom_p384_h_x.v @@ -0,0 +1,71 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module brom_p384_h_x + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); + + + // + // Output Registers + // + reg [31:0] bram_reg_b; + + assign b_out = bram_reg_b; + + + // + // Read-Only Port B + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h1b13ea8a; + 4'b0001: bram_reg_b <= 32'h8b574391; + 4'b0010: bram_reg_b <= 32'h8155ad27; + 4'b0011: bram_reg_b <= 32'h55fa1b42; + 4'b0100: bram_reg_b <= 32'hfb57ab8d; + 4'b0101: bram_reg_b <= 32'h4c117c3e; + 4'b0110: bram_reg_b <= 32'he8b0c8cf; + 4'b0111: bram_reg_b <= 32'h23c5893a; + 4'b1000: bram_reg_b <= 32'h19bea517; + 4'b1001: bram_reg_b <= 32'he29c71c2; + 4'b1010: bram_reg_b <= 32'h82e9f590; + 4'b1011: bram_reg_b <= 32'haaf06bba; + endcase + +endmodule diff --git a/rtl/curve/rom/brom_p384_h_y.v b/rtl/curve/rom/brom_p384_h_y.v new file mode 100644 index 0000000..98c59ed --- /dev/null +++ b/rtl/curve/rom/brom_p384_h_y.v @@ -0,0 +1,71 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module brom_p384_h_y + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); + + + // + // Output Registers + // + reg [31:0] bram_reg_b; + + assign b_out = bram_reg_b; + + + // + // Read-Only Port B + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h6f15f19d; + 4'b0001: bram_reg_b <= 32'h85bce284; + 4'b0010: bram_reg_b <= 32'he2817e62; + 4'b0011: bram_reg_b <= 32'hf59f4e30; + 4'b0100: bram_reg_b <= 32'h4a0f473e; + 4'b0101: bram_reg_b <= 32'h1625ceec; + 4'b0110: bram_reg_b <= 32'hd765eb83; + 4'b0111: bram_reg_b <= 32'h070be242; + 4'b1000: bram_reg_b <= 32'h6d6d23d6; + 4'b1001: bram_reg_b <= 32'ha2616740; + 4'b1010: bram_reg_b <= 32'h69d9d390; + 4'b1011: bram_reg_b <= 32'hc9e821b5; + endcase + +endmodule diff --git a/rtl/curve/rom/brom_p384_one.v b/rtl/curve/rom/brom_p384_one.v new file mode 100644 index 0000000..fa8caa0 --- /dev/null +++ b/rtl/curve/rom/brom_p384_one.v @@ -0,0 +1,72 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module brom_p384_one + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); + + + // + // Output Registers + // + reg [31:0] bram_reg_b; + + assign b_out = bram_reg_b; + + + // + // Read-Only Port B + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'h00000001; + 4'b0001: bram_reg_b <= 32'h00000000; + 4'b0010: bram_reg_b <= 32'h00000000; + 4'b0011: bram_reg_b <= 32'h00000000; + 4'b0100: bram_reg_b <= 32'h00000000; + 4'b0101: bram_reg_b <= 32'h00000000; + 4'b0110: bram_reg_b <= 32'h00000000; + 4'b0111: bram_reg_b <= 32'h00000000; + 4'b1000: bram_reg_b <= 32'h00000000; + 4'b1001: bram_reg_b <= 32'h00000000; + 4'b1010: bram_reg_b <= 32'h00000000; + 4'b1011: bram_reg_b <= 32'h00000000; + endcase + + +endmodule diff --git a/rtl/curve/rom/brom_p384_q.v b/rtl/curve/rom/brom_p384_q.v new file mode 100644 index 0000000..497c634 --- /dev/null +++ b/rtl/curve/rom/brom_p384_q.v @@ -0,0 +1,72 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module brom_p384_q + ( + input wire clk, + input wire [ 4-1:0] b_addr, + output wire [32-1:0] b_out + ); + + + // + // Output Registers + // + reg [31:0] bram_reg_b; + + assign b_out = bram_reg_b; + + + // + // Read-Only Port B + // + always @(posedge clk) + // + case (b_addr) + 4'b0000: bram_reg_b <= 32'hffffffff; + 4'b0001: bram_reg_b <= 32'h00000000; + 4'b0010: bram_reg_b <= 32'h00000000; + 4'b0011: bram_reg_b <= 32'hffffffff; + 4'b0100: bram_reg_b <= 32'hfffffffe; + 4'b0101: bram_reg_b <= 32'hffffffff; + 4'b0110: bram_reg_b <= 32'hffffffff; + 4'b0111: bram_reg_b <= 32'hffffffff; + 4'b1000: bram_reg_b <= 32'hffffffff; + 4'b1001: bram_reg_b <= 32'hffffffff; + 4'b1010: bram_reg_b <= 32'hffffffff; + 4'b1011: bram_reg_b <= 32'hffffffff; + endcase + + +endmodule diff --git a/rtl/curve/rom/brom_p384_zero.v b/rtl/curve/rom/brom_p384_zero.v new file mode 100644 index 0000000..5166391 --- /dev/null +++ b/rtl/curve/rom/brom_p384_zero.v @@ -0,0 +1,42 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module brom_p384_zero + ( + output wire [32-1:0] b_out + ); + + assign b_out = {32{1'b0}}; + +endmodule diff --git a/rtl/curve/uop/uop_add_rom.v b/rtl/curve/uop/uop_add_rom.v new file mode 100644 index 0000000..c807736 --- /dev/null +++ b/rtl/curve/uop/uop_add_rom.v @@ -0,0 +1,66 @@ +`timescale 1ns / 1ps + +module uop_add_rom + ( + input wire clk, + input wire [ 5: 0] addr, + output reg [19: 0] data + ); + + + // + // Microcode + // +`include "..\uop_ecdsa.v" + + + // + // Addition Microprogram + // + always @(posedge clk) + + case (addr) + +/* 2. */6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS}; +/* 3. */6'd01: data <= {OPCODE_MOV, UOP_SRC_PZ, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; + 6'd02: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS}; +/* 4. */6'd03: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS}; +/* 5. */6'd04: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_G_X, UOP_DST_T1, UOP_EXEC_ALWAYS}; +/* 6. */6'd05: data <= {OPCODE_MUL, UOP_SRC_T2, UOP_SRC_G_Y, UOP_DST_T2, UOP_EXEC_ALWAYS}; +/* 7. */6'd06: data <= {OPCODE_SUB, UOP_SRC_T1, UOP_SRC_PX, UOP_DST_T1, UOP_EXEC_ALWAYS}; +/* 8. */6'd07: data <= {OPCODE_SUB, UOP_SRC_T2, UOP_SRC_PY, UOP_DST_T2, UOP_EXEC_ALWAYS}; +/* 9. */6'd08: data <= {OPCODE_CMP, UOP_SRC_T1, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS}; + 6'd09: data <= {OPCODE_CMP, UOP_SRC_T2, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS}; +/* 10. */6'd10: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_RZ, UOP_EXEC_ALWAYS}; +/* 11. */6'd11: data <= {OPCODE_MOV, UOP_SRC_T1, UOP_SRC_DUMMY, UOP_DST_T3, UOP_EXEC_ALWAYS}; + 6'd12: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS}; +/* 12. */6'd13: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_T4, UOP_EXEC_ALWAYS}; +/* 13. */6'd14: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS}; +/* 14. */6'd15: data <= {OPCODE_ADD, UOP_SRC_T3, UOP_SRC_T3, UOP_DST_T1, UOP_EXEC_ALWAYS}; +/* 15. */6'd16: data <= {OPCODE_MOV, UOP_SRC_T2, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS}; + 6'd17: data <= {OPCODE_MUL, UOP_SRC_RX, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS}; +/* 16. */6'd18: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T1, UOP_DST_RX, UOP_EXEC_ALWAYS}; +/* 17. */6'd19: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T4, UOP_DST_RX, UOP_EXEC_ALWAYS}; +/* 18. */6'd20: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_RX, UOP_DST_T3, UOP_EXEC_ALWAYS}; +/* 19. */6'd21: data <= {OPCODE_MUL, UOP_SRC_T2, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS}; +/* 20. */6'd22: data <= {OPCODE_MUL, UOP_SRC_PY, UOP_SRC_T4, UOP_DST_T4, UOP_EXEC_ALWAYS}; +/* 21. */6'd23: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_T4, UOP_DST_RY, UOP_EXEC_ALWAYS}; + + 6'd24: data <= {OPCODE_MOV, UOP_SRC_G_X, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX}; + 6'd25: data <= {OPCODE_MOV, UOP_SRC_G_Y, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX}; + 6'd26: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_0XX}; + + 6'd27: data <= {OPCODE_MOV, UOP_SRC_H_X, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_100}; + 6'd28: data <= {OPCODE_MOV, UOP_SRC_H_Y, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_100}; + 6'd29: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_100}; + + 6'd30: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_101}; + 6'd31: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_101}; + 6'd32: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_101}; + + default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY}; + + endcase + + +endmodule diff --git a/rtl/curve/uop/uop_conv_rom.v b/rtl/curve/uop/uop_conv_rom.v new file mode 100644 index 0000000..3097736 --- /dev/null +++ b/rtl/curve/uop/uop_conv_rom.v @@ -0,0 +1,38 @@ +`timescale 1ns / 1ps + +module uop_conv_rom + ( + input wire clk, + input wire [ 5: 0] addr, + output reg [19: 0] data + ); + + + // + // Microcode + // +`include "..\uop_ecdsa.v" + + + // + // Doubling Microprogram + // + always @(posedge clk) + + case (addr) + + 6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS}; + 6'd01: data <= {OPCODE_MOV, UOP_SRC_V, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; + 6'd02: data <= {OPCODE_MUL, UOP_SRC_V, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS}; + 6'd03: data <= {OPCODE_MUL, UOP_SRC_V, UOP_SRC_T2, UOP_DST_T3, UOP_EXEC_ALWAYS}; + 6'd04: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS}; + 6'd05: data <= {OPCODE_MUL, UOP_SRC_PY, UOP_SRC_T3, UOP_DST_RY, UOP_EXEC_ALWAYS}; + 6'd06: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX}; + 6'd07: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX}; + + default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY}; + + endcase + + +endmodule diff --git a/rtl/curve/uop/uop_dbl_rom.v b/rtl/curve/uop/uop_dbl_rom.v new file mode 100644 index 0000000..1939ca9 --- /dev/null +++ b/rtl/curve/uop/uop_dbl_rom.v @@ -0,0 +1,58 @@ +`timescale 1ns / 1ps + +module uop_dbl_rom + ( + input wire clk, + input wire [ 5: 0] addr, + output reg [19: 0] data + ); + + + // + // Microcode + // +`include "..\uop_ecdsa.v" + + + // + // Doubling Microprogram + // + always @(posedge clk) + + case (addr) + +/* 1. */6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS}; +/* 2. */6'd01: data <= {OPCODE_MOV, UOP_SRC_PZ, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; + 5'd02: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS}; +/* 3. */6'd03: data <= {OPCODE_SUB, UOP_SRC_PX, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS}; +/* 4. */6'd04: data <= {OPCODE_ADD, UOP_SRC_PX, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS}; +/* 5. */6'd05: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T2, UOP_EXEC_ALWAYS}; +/* 6. */6'd06: data <= {OPCODE_ADD, UOP_SRC_T2, UOP_SRC_T2, UOP_DST_T1, UOP_EXEC_ALWAYS}; + 6'd07: data <= {OPCODE_ADD, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T2, UOP_EXEC_ALWAYS}; +/* 7. */6'd08: data <= {OPCODE_ADD, UOP_SRC_PY, UOP_SRC_PY, UOP_DST_RY, UOP_EXEC_ALWAYS}; +/* 8. */6'd09: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_RY, UOP_DST_RZ, UOP_EXEC_ALWAYS}; +/* 9. */6'd10: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; + 6'd11: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T3, UOP_EXEC_ALWAYS}; + 6'd12: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_RY, UOP_EXEC_ALWAYS}; +/* 10. */6'd13: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_RY, UOP_DST_T3, UOP_EXEC_ALWAYS}; +/* 11. */6'd14: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; + 6'd15: data <= {OPCODE_MUL, UOP_SRC_RY, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS}; +/* 12. */6'd16: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_DELTA, UOP_DST_RY, UOP_EXEC_ALWAYS}; +/* 13. */6'd17: data <= {OPCODE_MOV, UOP_SRC_T2, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS}; + 6'd18: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS}; +/* 14. */6'd19: data <= {OPCODE_ADD, UOP_SRC_T3, UOP_SRC_T3, UOP_DST_T1, UOP_EXEC_ALWAYS}; +/* 15. */6'd20: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T1, UOP_DST_RX, UOP_EXEC_ALWAYS}; +/* 16. */6'd21: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_RX, UOP_DST_T1, UOP_EXEC_ALWAYS}; +/* 17. */6'd22: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T1, UOP_EXEC_ALWAYS}; +/* 18. */6'd23: data <= {OPCODE_SUB, UOP_SRC_T1, UOP_SRC_RY, UOP_DST_RY, UOP_EXEC_ALWAYS}; + + 6'd24: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX}; + 6'd25: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX}; + 6'd26: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_0XX}; + + default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY}; + + endcase + + +endmodule diff --git a/rtl/curve/uop/uop_init_rom.v b/rtl/curve/uop/uop_init_rom.v new file mode 100644 index 0000000..ac44b55 --- /dev/null +++ b/rtl/curve/uop/uop_init_rom.v @@ -0,0 +1,33 @@ +`timescale 1ns / 1ps + +module uop_init_rom + ( + input wire clk, + input wire [ 5: 0] addr, + output reg [19: 0] data + ); + + + // + // Microcode + // +`include "..\uop_ecdsa.v" + + + // + // Doubling Microprogram + // + always @(posedge clk) + + case (addr) + + 6'd00: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS}; + 6'd01: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_ALWAYS}; + 6'd02: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_ALWAYS}; + + default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY}; + + endcase + + +endmodule diff --git a/rtl/curve/uop_ecdsa.v b/rtl/curve/uop_ecdsa.v new file mode 100644 index 0000000..e64119d --- /dev/null +++ b/rtl/curve/uop_ecdsa.v @@ -0,0 +1,50 @@ +localparam [ 4: 0] OPCODE_CMP = 5'b00001; +localparam [ 4: 0] OPCODE_MOV = 5'b00010; +localparam [ 4: 0] OPCODE_ADD = 5'b00100; +localparam [ 4: 0] OPCODE_SUB = 5'b01000; +localparam [ 4: 0] OPCODE_MUL = 5'b10000; +localparam [ 4: 0] OPCODE_RDY = 5'b00000; + +localparam [ 4: 0] UOP_SRC_PX = 5'h0_0; +localparam [ 4: 0] UOP_SRC_PY = 5'h0_1; +localparam [ 4: 0] UOP_SRC_PZ = 5'h0_2; + +localparam [ 4: 0] UOP_SRC_RX = 5'h0_3; +localparam [ 4: 0] UOP_SRC_RY = 5'h0_4; +localparam [ 4: 0] UOP_SRC_RZ = 5'h0_5; + +localparam [ 4: 0] UOP_SRC_T1 = 5'h0_6; +localparam [ 4: 0] UOP_SRC_T2 = 5'h0_7; +localparam [ 4: 0] UOP_SRC_T3 = 5'h0_8; +localparam [ 4: 0] UOP_SRC_T4 = 5'h0_9; + +localparam [ 4: 0] UOP_SRC_ONE = 5'h0_A; +localparam [ 4: 0] UOP_SRC_ZERO = 5'h0_B; +localparam [ 4: 0] UOP_SRC_DELTA = 5'h0_C; + +localparam [ 4: 0] UOP_SRC_V = 5'h0_F; + +localparam [ 4: 0] UOP_SRC_G_X = 5'h1_0; +localparam [ 4: 0] UOP_SRC_G_Y = 5'h1_1; + +localparam [ 4: 0] UOP_SRC_H_X = 5'h1_2; +localparam [ 4: 0] UOP_SRC_H_Y = 5'h1_3; + +localparam [ 4: 0] UOP_SRC_DUMMY = 5'hX_X; + +localparam [ 2: 0] UOP_DST_RX = 3'd0; +localparam [ 2: 0] UOP_DST_RY = 3'd1; +localparam [ 2: 0] UOP_DST_RZ = 3'd2; + +localparam [ 2: 0] UOP_DST_T1 = 3'd3; +localparam [ 2: 0] UOP_DST_T2 = 3'd4; +localparam [ 2: 0] UOP_DST_T3 = 3'd5; +localparam [ 2: 0] UOP_DST_T4 = 3'd6; + +localparam [ 2: 0] UOP_DST_DUMMY = 3'dX; + +localparam UOP_EXEC_ALWAYS = 2'b11; // R +localparam UOP_EXEC_PZT1T2_0XX = 2'b10; // G +localparam UOP_EXEC_PZT1T2_100 = 2'b00; // H +localparam UOP_EXEC_PZT1T2_101 = 2'b01; // O + diff --git a/rtl/ecdsa384.v b/rtl/ecdsa384.v new file mode 100644 index 0000000..cefef14 --- /dev/null +++ b/rtl/ecdsa384.v @@ -0,0 +1,160 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module ecdsa384 + ( + input wire clk, + input wire rst_n, + + input wire next, + output wire valid, + + input wire bus_cs, + input wire bus_we, + input wire [ 5:0] bus_addr, + input wire [31:0] bus_data_wr, + output wire [31:0] bus_data_rd + ); + + + // + // Memory Banks + // + localparam [1:0] BUS_ADDR_BANK_K = 2'b00; + localparam [1:0] BUS_ADDR_BANK_X = 2'b01; + localparam [1:0] BUS_ADDR_BANK_Y = 2'b10; + + wire [1:0] bus_addr_upper = bus_addr[5:4]; + wire [3:0] bus_addr_lower = bus_addr[3:0]; + + + // + // Memories + // + + wire [31:0] user_rw_k_bram_out; + wire [31:0] user_ro_x_bram_out; + wire [31:0] user_ro_y_bram_out; + + wire [ 3:0] core_ro_k_bram_addr; + wire [ 3:0] core_rw_x_bram_addr; + wire [ 3:0] core_rw_y_bram_addr; + + wire core_rw_x_bram_wren; + wire core_rw_y_bram_wren; + + wire [31:0] core_ro_k_bram_dout; + wire [31:0] core_rw_x_bram_din; + wire [31:0] core_rw_y_bram_din; + + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(4) + ) + bram_k + ( .clk(clk), + .a_addr(bus_addr_lower), .a_out(user_rw_k_bram_out), .a_wr(bus_cs && bus_we && (bus_addr_upper == BUS_ADDR_BANK_K)), .a_in(bus_data_wr), + .b_addr(core_ro_k_bram_addr), .b_out(core_ro_k_bram_dout) + ); + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(4) + ) + bram_x + ( .clk(clk), + .a_addr(core_rw_x_bram_addr), .a_out(), .a_wr(core_rw_x_bram_wren), .a_in(core_rw_x_bram_din), + .b_addr(bus_addr_lower), .b_out(user_ro_x_bram_out) + ); + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(4) + ) + bram_y + ( .clk(clk), + .a_addr(core_rw_y_bram_addr), .a_out(), .a_wr(core_rw_y_bram_wren), .a_in(core_rw_y_bram_din), + .b_addr(bus_addr_lower), .b_out(user_ro_y_bram_out) + ); + + + // + // Curve Base Point Multiplier + // + reg next_dly; + + always @(posedge clk) next_dly <= next; + + wire next_trig = next && !next_dly; + + curve_mul_384 base_point_multiplier_p384 + ( + .clk (clk), + .rst_n (rst_n), + + .ena (next_trig), + .rdy (valid), + + .k_addr (core_ro_k_bram_addr), + .rx_addr (core_rw_x_bram_addr), + .ry_addr (core_rw_y_bram_addr), + + .rx_wren (core_rw_x_bram_wren), + .ry_wren (core_rw_y_bram_wren), + + .k_din (core_ro_k_bram_dout), + .rx_dout (core_rw_x_bram_din), + .ry_dout (core_rw_y_bram_din) + ); + + // + // Output Selector + // + reg [1:0] bus_addr_upper_prev; + always @(posedge clk) bus_addr_upper_prev = bus_addr_upper; + + reg [31: 0] bus_data_rd_mux; + assign bus_data_rd = bus_data_rd_mux; + + always @(*) + // + case (bus_addr_upper_prev) + // + BUS_ADDR_BANK_K: bus_data_rd_mux = user_rw_k_bram_out; + BUS_ADDR_BANK_X: bus_data_rd_mux = user_ro_x_bram_out; + BUS_ADDR_BANK_Y: bus_data_rd_mux = user_ro_y_bram_out; + // + default: bus_data_rd_mux = {32{1'b0}}; + // + endcase + +endmodule diff --git a/rtl/ecdsa384_wrapper.v b/rtl/ecdsa384_wrapper.v new file mode 100644 index 0000000..feb10b4 --- /dev/null +++ b/rtl/ecdsa384_wrapper.v @@ -0,0 +1,177 @@ +//====================================================================== +// +// Copyright (c) 2016, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +module ecdsa384_wrapper + ( + input wire clk, + input wire rst_n, + + input wire cs, + input wire we, + + input wire [6: 0] address, + input wire [31: 0] write_data, + output wire [31: 0] read_data + ); + + + // + // Address Decoder + // + localparam ADDR_MSB_REGS = 1'b0; + localparam ADDR_MSB_CORE = 1'b1; + + wire [0:0] addr_msb = address[6]; + wire [5:0] addr_lsb = address[5:0]; + + + // + // Output Mux + // + wire [31: 0] read_data_regs; + wire [31: 0] read_data_core; + + + // + // Registers + // + localparam ADDR_NAME0 = 6'h00; + localparam ADDR_NAME1 = 6'h01; + localparam ADDR_VERSION = 6'h02; + + localparam ADDR_CONTROL = 6'h08; // {next, init} + localparam ADDR_STATUS = 6'h09; // {valid, ready} + localparam ADDR_DUMMY = 6'h0F; // don't care + +// localparam CONTROL_INIT_BIT = 0; -- not used + localparam CONTROL_NEXT_BIT = 1; + +// localparam STATUS_READY_BIT = 0; -- hardcoded to always read 1 + localparam STATUS_VALID_BIT = 1; + + localparam CORE_NAME0 = 32'h65636473; // "ecds" + localparam CORE_NAME1 = 32'h61333834; // "a384" + localparam CORE_VERSION = 32'h302E3131; // "0.11" + + + // + // Registers + // + reg reg_control; + reg [31:0] reg_dummy; + + + // + // Wires + // + wire reg_status; + + + // + // ECDSA384 + // + ecdsa384 ecdsa384_inst + ( + .clk (clk), + .rst_n (rst_n), + + .next (reg_control), + .valid (reg_status), + + .bus_cs (cs && (addr_msb == ADDR_MSB_CORE)), + .bus_we (we), + .bus_addr (addr_lsb), + .bus_data_wr (write_data), + .bus_data_rd (read_data_core) + ); + + + // + // Read Latch + // + reg [31: 0] tmp_read_data; + + + // + // Read/Write Interface + // + always @(posedge clk) + // + if (!rst_n) begin + // + reg_control <= 1'b0; + // + end else if (cs && (addr_msb == ADDR_MSB_REGS)) begin + // + if (we) begin + // + // Write Handler + // + case (addr_lsb) + // + ADDR_CONTROL: reg_control <= write_data[1]; + ADDR_DUMMY: reg_dummy <= write_data[31:0]; + // + endcase + // + end else begin + // + // Read Handler + // + case (address) + // + ADDR_NAME0: tmp_read_data <= CORE_NAME0; + ADDR_NAME1: tmp_read_data <= CORE_NAME1; + ADDR_VERSION: tmp_read_data <= CORE_VERSION; + ADDR_CONTROL: tmp_read_data <= {{30{1'b0}}, reg_control, 1'b0}; + ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status, 1'b1}; + ADDR_DUMMY: tmp_read_data <= reg_dummy; + // + default: tmp_read_data <= 32'h00000000; + // + endcase + // + end + // + end + + + // + // Register / Core Memory Selector + // + reg addr_msb_last; + always @(posedge clk) addr_msb_last = addr_msb; + + assign read_data = (addr_msb_last == ADDR_MSB_REGS) ? tmp_read_data : read_data_core; + + +endmodule diff --git a/rtl/lowlevel/adder32_wrapper.v b/rtl/lowlevel/adder32_wrapper.v new file mode 100644 index 0000000..ebfd8ce --- /dev/null +++ b/rtl/lowlevel/adder32_wrapper.v @@ -0,0 +1,73 @@ +//------------------------------------------------------------------------------ +// +// adder32_wrapper.v +// ----------------------------------------------------------------------------- +// Wrapper for 32-bit adder. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module adder32_wrapper + ( + input clk, // clock + input [31: 0] a, // operand input + input [31: 0] b, // operand input + output [31: 0] s, // sum output + input c_in, // carry input + output c_out // carry output + ); + + // + // Include Primitive Selector + // +`include "ecdsa_lowlevel_settings.v" + + + // + // Instantiate Vendor/Generic Primitive + // + `ADDER32_PRIMITIVE adder32_inst + ( + .clk(clk), + .a(a), + .b(b), + .s(s), + .c_in(c_in), + .c_out(c_out) + ); + + +endmodule + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/adder47_wrapper.v b/rtl/lowlevel/adder47_wrapper.v new file mode 100644 index 0000000..1a0a18e --- /dev/null +++ b/rtl/lowlevel/adder47_wrapper.v @@ -0,0 +1,69 @@ +//------------------------------------------------------------------------------ +// +// adder47_wrapper.v +// ----------------------------------------------------------------------------- +// Wrapper for 47-bit adder. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module adder47_wrapper + ( + input clk, // clock + input [46: 0] a, // operand input + input [46: 0] b, // operand input + output [46: 0] s // sum output + ); + + // + // Include Primitive Selector + // +`include "ecdsa_lowlevel_settings.v" + + + // + // Instantiate Vendor/Generic Primitive + // + `ADDER47_PRIMITIVE adder47_inst + ( + .clk(clk), + .a(a), + .b(b), + .s(s) + ); + + +endmodule + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/artix7/adder32_artix7.v b/rtl/lowlevel/artix7/adder32_artix7.v new file mode 100644 index 0000000..5f9ba79 --- /dev/null +++ b/rtl/lowlevel/artix7/adder32_artix7.v @@ -0,0 +1,96 @@ +//------------------------------------------------------------------------------ +// +// adder32_artix7.v +// ----------------------------------------------------------------------------- +// Hardware (Artix-7 DSP48E1) 32-bit adder. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module adder32_artix7 + ( + input clk, // clock + input [31: 0] a, // operand input + input [31: 0] b, // operand input + output [31: 0] s, // sum output + input c_in, // carry input + output c_out // carry output + ); + + // + // Lower and higher parts of operand + // + wire [17: 0] bl = b[17: 0]; + wire [13: 0] bh = b[31:18]; + + + // + // DSP48E1 Slice + // + + /* Operation Mode */ + wire [ 3: 0] dsp48e1_alumode = 4'b0000; + wire [ 6: 0] dsp48e1_opmode = 7'b0110011; + + /* Internal Product */ + wire [47: 0] p_int; + + dsp48e1_wrapper dsp_adder + ( + .clk (clk), + + .ce (1'b1), + + .carry (c_in), + + .alumode (dsp48e1_alumode), + .opmode (dsp48e1_opmode), + + .a ({{16{1'b0}}, bh}), + .b (bl), + .c ({{16{1'b0}}, a}), + + .p (p_int) + ); + + // + // Output Mapping + // + assign s = p_int[31: 0]; + assign c_out = p_int[32]; + + +endmodule + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/artix7/adder47_artix7.v b/rtl/lowlevel/artix7/adder47_artix7.v new file mode 100644 index 0000000..00566e4 --- /dev/null +++ b/rtl/lowlevel/artix7/adder47_artix7.v @@ -0,0 +1,91 @@ +//------------------------------------------------------------------------------ +// +// adder47_artix7.v +// ----------------------------------------------------------------------------- +// Hardware (Artix-7 DSP48E1) 47-bit adder. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module adder47_artix7 + ( + input clk, // clock + input [46: 0] a, // operand input + input [46: 0] b, // operand input + output [46: 0] s // sum output + ); + + // + // Lower and higher parts of operand + // + wire [17: 0] bl = b[17: 0]; + wire [28: 0] bh = b[46:18]; + + // + // DSP48E1 Slice + // + + /* Operation Mode */ + wire [ 3: 0] dsp48e1_alumode = 4'b0000; + wire [ 6: 0] dsp48e1_opmode = 7'b0110011; + + /* Internal Product */ + wire [47: 0] p_int; + + dsp48e1_wrapper dsp_adder + ( + .clk (clk), + + .ce (1'b1), + + .carry (1'b0), + + .alumode (dsp48e1_alumode), + .opmode (dsp48e1_opmode), + + .a ({1'b0, bh}), + .b (bl), + .c ({1'b0, a}), + + .p (p_int) + ); + + // + // Output Mapping + // + assign s = p_int[46: 0]; + +endmodule + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/artix7/dsp48e1_wrapper.v b/rtl/lowlevel/artix7/dsp48e1_wrapper.v new file mode 100644 index 0000000..11a21bc --- /dev/null +++ b/rtl/lowlevel/artix7/dsp48e1_wrapper.v @@ -0,0 +1,159 @@ +//------------------------------------------------------------------------------ +// +// dsp48e1_wrapper.v +// ----------------------------------------------------------------------------- +// Hardware (Artix-7 DSP48E1) tile wrapper. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module dsp48e1_wrapper + ( + input clk, + + input ce, + + input [ 6: 0] opmode, + input [ 3: 0] alumode, + + input carry, + + input [29: 0] a, + input [17: 0] b, + input [47: 0] c, + + output [47: 0] p + ); + + + // + // Tile instantiation + // + DSP48E1 # + ( + .AREG (0), + .BREG (0), + .CREG (0), + .DREG (0), + .MREG (0), + .PREG (1), + .ADREG (0), + + .ACASCREG (0), + .BCASCREG (0), + .ALUMODEREG (0), + .INMODEREG (0), + .OPMODEREG (0), + .CARRYINREG (0), + .CARRYINSELREG (0), + + .A_INPUT ("DIRECT"), + .B_INPUT ("DIRECT"), + + .USE_DPORT ("FALSE"), + .USE_MULT ("DYNAMIC"), + .USE_SIMD ("ONE48"), + + .USE_PATTERN_DETECT ("NO_PATDET"), + .SEL_PATTERN ("PATTERN"), + .SEL_MASK ("MASK"), + .PATTERN (48'h000000000000), + .MASK (48'h3fffffffffff), + .AUTORESET_PATDET ("NO_RESET") + ) + DSP48E1_inst + ( + .CLK (clk), + + .RSTA (1'b0), + .RSTB (1'b0), + .RSTC (1'b0), + .RSTD (1'b0), + .RSTM (1'b0), + .RSTP (1'b0), + + .RSTCTRL (1'b0), + .RSTINMODE (1'b0), + .RSTALUMODE (1'b0), + .RSTALLCARRYIN (1'b0), + + .CEA1 (1'b0), + .CEA2 (1'b0), + .CEB1 (1'b0), + .CEB2 (1'b0), + .CEC (1'b0), + .CED (1'b0), + .CEM (1'b0), + .CEP (ce), + .CEAD (1'b0), + .CEALUMODE (1'b0), + .CEINMODE (1'b0), + + .CECTRL (1'b0), + .CECARRYIN (1'b0), + + .A (a), + .B (b), + .C (c), + .D ({25{1'b1}}), + .P (p), + + .CARRYIN (carry), + .CARRYOUT (), + .CARRYINSEL (3'b000), + + .CARRYCASCIN (1'b0), + .CARRYCASCOUT (), + + .PATTERNDETECT (), + .PATTERNBDETECT (), + + .OPMODE (opmode), + .ALUMODE (alumode), + .INMODE (5'b00000), + + .MULTSIGNIN (1'b0), + .MULTSIGNOUT (), + + .UNDERFLOW (), + .OVERFLOW (), + + .ACIN (30'd0), + .BCIN (18'd0), + .PCIN (48'd0), + + .ACOUT (), + .BCOUT (), + .PCOUT () + ); + +endmodule diff --git a/rtl/lowlevel/artix7/mac16_artix7.v b/rtl/lowlevel/artix7/mac16_artix7.v new file mode 100644 index 0000000..63b74ab --- /dev/null +++ b/rtl/lowlevel/artix7/mac16_artix7.v @@ -0,0 +1,90 @@ +//------------------------------------------------------------------------------ +// +// mac16_artix7.v +// ----------------------------------------------------------------------------- +// Hardware (Artix-7 DSP48E1) 16-bit multiplier and 47-bit accumulator. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module mac16_artix7 + ( + input clk, // clock + input clr, // clear accumulator (active-high) + input ce, // enable clock (active-high) + input [15: 0] a, // operand input + input [15: 0] b, // operand input + output [46: 0] s // sum output + ); + + + // + // DSP48E1 Slice + // + + /* Operation Mode */ + wire [ 3: 0] dsp48e1_alumode = 4'b0000; + wire [ 6: 0] dsp48e1_opmode = {2'b01, clr, 4'b0101}; + + /* Internal Product */ + wire [47: 0] p_int; + + dsp48e1_wrapper dsp_adder + ( + .clk (clk), + + .ce (ce), + + .carry (1'b0), + + .alumode (dsp48e1_alumode), + .opmode (dsp48e1_opmode), + + .a ({{14{1'b0}}, a}), + .b ({{ 2{1'b0}}, b}), + .c ({48{1'b0}}), + + .p (p_int) + ); + + // + // Output Mapping + // + assign s = p_int[46:0]; + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/artix7/subtractor32_artix7.v b/rtl/lowlevel/artix7/subtractor32_artix7.v new file mode 100644 index 0000000..b46ac5c --- /dev/null +++ b/rtl/lowlevel/artix7/subtractor32_artix7.v @@ -0,0 +1,94 @@ +//------------------------------------------------------------------------------ +// +// subtractor32_artix7.v +// ----------------------------------------------------------------------------- +// Hardware (Artix-7 DSP48E1) 32-bit subtractor. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module subtractor32_artix7 + ( + input clk, + input [31: 0] a, + input [31: 0] b, + output [31: 0] d, + input b_in, + output b_out + ); + + // + // Lower and higher parts of operand + // + wire [17: 0] bl = b[17: 0]; + wire [13: 0] bh = b[31:18]; + + // + // DSP48E1 Slice + // + + /* Operation Mode */ + wire [ 3: 0] dsp48e1_alumode = 4'b0011; + wire [ 6: 0] dsp48e1_opmode = 7'b0110011; + + /* Internal Product */ + wire [47: 0] p_int; + + dsp48e1_wrapper dsp_subtractor + ( + .clk (clk), + + .ce (1'b1), + + .carry (b_in), + + .alumode (dsp48e1_alumode), + .opmode (dsp48e1_opmode), + + .a ({{16{1'b0}}, bh}), + .b (bl), + .c ({{16{1'b0}}, a}), + + .p (p_int) + ); + + // + // Output Mapping + // + assign d = p_int[31: 0]; + assign b_out = p_int[32]; + +endmodule + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/ecdsa_lowlevel_settings.v b/rtl/lowlevel/ecdsa_lowlevel_settings.v new file mode 100644 index 0000000..8f95e2f --- /dev/null +++ b/rtl/lowlevel/ecdsa_lowlevel_settings.v @@ -0,0 +1,17 @@ +`define USE_VENDOR_PRIMITIVES + +`ifdef USE_VENDOR_PRIMITIVES + +`define MAC16_PRIMITIVE mac16_artix7 +`define ADDER32_PRIMITIVE adder32_artix7 +`define ADDER47_PRIMITIVE adder47_artix7 +`define SUBTRACTOR32_PRIMITIVE subtractor32_artix7 + +`else + +`define MAC16_PRIMITIVE mac16_generic +`define ADDER32_PRIMITIVE adder32_generic +`define ADDER47_PRIMITIVE adder47_generic +`define SUBTRACTOR32_PRIMITIVE subtractor32_generic + +`endif diff --git a/rtl/lowlevel/generic/adder32_generic.v b/rtl/lowlevel/generic/adder32_generic.v new file mode 100644 index 0000000..b9c94aa --- /dev/null +++ b/rtl/lowlevel/generic/adder32_generic.v @@ -0,0 +1,67 @@ +//------------------------------------------------------------------------------ +// +// adder32_generic.v +// ----------------------------------------------------------------------------- +// Generic 32-bit adder. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module adder32_generic + ( + input clk, // clock + input [31: 0] a, // operand input + input [31: 0] b, // operand input + output [31: 0] s, // sum output + input c_in, // carry input + output c_out // carry output + ); + + // + // Sum + // + reg [32: 0] s_int; + + always @(posedge clk) + s_int <= {1'b0, a} + {1'b0, b} + {{32{1'b0}}, c_in}; + + // + // Output + // + assign s = s_int[31:0]; + assign c_out = s_int[32]; + +endmodule + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/generic/adder47_generic.v b/rtl/lowlevel/generic/adder47_generic.v new file mode 100644 index 0000000..f472061 --- /dev/null +++ b/rtl/lowlevel/generic/adder47_generic.v @@ -0,0 +1,64 @@ +//------------------------------------------------------------------------------ +// +// adder47_generic.v +// ----------------------------------------------------------------------------- +// Generic 47-bit adder. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module adder47_generic + ( + input clk, // clock + input [46: 0] a, // operand input + input [46: 0] b, // operand input + output [46: 0] s // sum output + ); + + // + // Sum + // + reg [46: 0] s_int; + + always @(posedge clk) + s_int <= a + b; + + // + // Output + // + assign s = s_int; + +endmodule + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/generic/mac16_generic.v b/rtl/lowlevel/generic/mac16_generic.v new file mode 100644 index 0000000..dc95645 --- /dev/null +++ b/rtl/lowlevel/generic/mac16_generic.v @@ -0,0 +1,74 @@ +//------------------------------------------------------------------------------ +// +// mac16_generic.v +// ----------------------------------------------------------------------------- +// Generic 16-bit multiplier and 47-bit accumulator. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module mac16_generic + ( + input clk, // clock + input clr, // clear accumulator (active-high) + input ce, // enable clock (active-high) + input [15: 0] a, // operand input + input [15: 0] b, // operand input + output [46: 0] s // sum output + ); + + // + // Multiplier + // + wire [31: 0] p = {{16{1'b0}}, a} * {{16{1'b0}}, b}; + wire [46: 0] p_ext = {{15{1'b0}}, p}; + + // + // Accumulator + // + reg [46: 0] s_int; + + always @(posedge clk) + // + if (ce) s_int <= clr ? p_ext : p_ext + s_int; + + // + // Output + // + assign s = s_int; + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/generic/subtractor32_generic.v b/rtl/lowlevel/generic/subtractor32_generic.v new file mode 100644 index 0000000..46aefe8 --- /dev/null +++ b/rtl/lowlevel/generic/subtractor32_generic.v @@ -0,0 +1,67 @@ +//------------------------------------------------------------------------------ +// +// subtractor32_generic.v +// ----------------------------------------------------------------------------- +// Generic 32-bit subtractor. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module subtractor32_generic + ( + input clk, + input [31: 0] a, + input [31: 0] b, + output [31: 0] d, + input b_in, + output b_out + ); + + // + // Difference + // + reg [32: 0] d_int; + + always @(posedge clk) + d_int <= {1'b0, a} - {1'b0, b} - {{32{1'b0}}, b_in}; + + // + // Output + // + assign d = d_int[31:0]; + assign b_out = d_int[32]; + +endmodule + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/mac16_wrapper.v b/rtl/lowlevel/mac16_wrapper.v new file mode 100644 index 0000000..b91e518 --- /dev/null +++ b/rtl/lowlevel/mac16_wrapper.v @@ -0,0 +1,75 @@ +//------------------------------------------------------------------------------ +// +// mac16_wrapper.v +// ----------------------------------------------------------------------------- +// Wrapper for 16-bit multiplier and 48-bit accumulator. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module mac16_wrapper + ( + input clk, // clock + input clr, // clear accumulator (active-high) + input ce, // enable clock (active-high) + input [15: 0] a, // operand input + input [15: 0] b, // operand input + output [46: 0] s // sum output + ); + + + // + // Include Primitive Selector + // +`include "ecdsa_lowlevel_settings.v" + + + // + // Instantiate Vendor/Generic Primitive + // + `MAC16_PRIMITIVE mac16_inst + ( + .clk(clk), + .clr(clr), + .ce(ce), + .a(a), + .b(b), + .s(s) + ); + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/lowlevel/subtractor32_wrapper.v b/rtl/lowlevel/subtractor32_wrapper.v new file mode 100644 index 0000000..3c7e5e9 --- /dev/null +++ b/rtl/lowlevel/subtractor32_wrapper.v @@ -0,0 +1,72 @@ +//------------------------------------------------------------------------------ +// +// subtractor32_wrapper.v +// ----------------------------------------------------------------------------- +// Wrapper for 32-bit subtractor. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module subtractor32_wrapper + ( + input clk, + input [31: 0] a, + input [31: 0] b, + output [31: 0] d, + input b_in, + output b_out + ); + + // + // Include Primitive Selector + // +`include "ecdsa_lowlevel_settings.v" + + + // + // Instantiate Vendor/Generic Primitive + // + `SUBTRACTOR32_PRIMITIVE subtractor32_inst + ( + .clk(clk), + .a(a), + .b(b), + .d(d), + .b_in(b_in), + .b_out(b_out) + ); + +endmodule + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_adder.v b/rtl/modular/modular_adder.v new file mode 100644 index 0000000..5641feb --- /dev/null +++ b/rtl/modular/modular_adder.v @@ -0,0 +1,298 @@ +//------------------------------------------------------------------------------ +// +// modular_adder.v +// ----------------------------------------------------------------------------- +// Modular adder. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_adder + ( + clk, rst_n, + ena, rdy, + ab_addr, n_addr, s_addr, s_wren, + a_din, b_din, n_din, s_dout + ); + + + // + // Parameters + // + parameter OPERAND_NUM_WORDS = 8; + parameter WORD_COUNTER_WIDTH = 3; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] s_addr; // index of current S word + output wire s_wren; // store current S word now + + input wire [ 31:0] a_din; // A + input wire [ 31:0] b_din; // B + input wire [ 31:0] n_din; // N + output wire [ 31:0] s_dout; // S = (A + B) mod N + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_ab; + reg [WORD_COUNTER_WIDTH-1:0] index_n; + reg [WORD_COUNTER_WIDTH-1:0] index_s; + + /* map registers to output ports */ + assign ab_addr = index_ab; + assign n_addr = index_n; + assign s_addr = index_s; + + + // + // Adder + // + wire [31: 0] add32_s; + wire add32_c_in; + wire add32_c_out; + + adder32_wrapper adder32 + ( + .clk (clk), + .a (a_din), + .b (b_din), + .s (add32_s), + .c_in (add32_c_in), + .c_out (add32_c_out) + ); + + + // + // Subtractor + // + wire [31: 0] sub32_d; + wire sub32_b_in; + wire sub32_b_out; + + subtractor32_wrapper subtractor32 + ( + .clk (clk), + .a (add32_s), + .b (n_din), + .d (sub32_d), + .b_in (sub32_b_in), + .b_out (sub32_b_out) + ); + + + // + // FSM + // + + localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)]; + + wire fsm_latch_msb_carry = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; + wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)]; + + wire inc_index_ab = |fsm_shreg_inc_index_ab; + wire inc_index_n = |fsm_shreg_inc_index_n; + wire store_sum_ab = |fsm_shreg_store_sum_ab; + wire store_sum_ab_n = |fsm_shreg_store_sum_ab_n; + wire store_data_s = |fsm_shreg_store_data_s; + wire inc_index_s = |fsm_shreg_inc_index_s; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + + + + + + // + // Carry & Borrow Masking Logic + // + reg add32_c_mask; + reg sub32_b_mask; + + always @(posedge clk) begin + // + add32_c_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0; + sub32_b_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0; + // + end + + assign add32_c_in = add32_c_out & ~add32_c_mask; + assign sub32_b_in = sub32_b_out & ~sub32_b_mask; + + + // + // Carry & Borrow Latch Logic + // + reg add32_carry_latch; + reg sub32_borrow_latch; + + always @(posedge clk) begin + // + if (fsm_latch_msb_carry) add32_carry_latch <= add32_c_out; + if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out; + // + end + + + // + // Intermediate Results + // + reg [32*OPERAND_NUM_WORDS-1:0] s_ab; + reg [32*OPERAND_NUM_WORDS-1:0] s_ab_n; + + always @(posedge clk) + // + if (store_data_s) begin + // + s_ab <= {{32{1'bX}}, s_ab[32*OPERAND_NUM_WORDS-1:32]}; + s_ab_n <= {{32{1'bX}}, s_ab_n[32*OPERAND_NUM_WORDS-1:32]}; + // + end else begin + // + if (store_sum_ab) s_ab <= {add32_s, s_ab[32*OPERAND_NUM_WORDS-1:32]}; + if (store_sum_ab_n) s_ab_n <= {sub32_d, s_ab_n[32*OPERAND_NUM_WORDS-1:32]}; + // + end + + + // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) begin + // + index_ab <= WORD_INDEX_ZERO; + index_n <= WORD_INDEX_ZERO; + index_s <= WORD_INDEX_ZERO; + // + end else begin + // + if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab); + if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n); + if (inc_index_s) index_s <= WORD_INDEX_NEXT_OR_ZERO(index_s); + // + end + + + // + // Output Sum Selector + // + wire mux_select_ab = sub32_borrow_latch && !add32_carry_latch; + + + // + // Output Data and Write Enable Logic + // + reg s_wren_reg; + reg [31: 0] s_dout_reg; + wire [31: 0] s_dout_mux = mux_select_ab ? s_ab[31:0] : s_ab_n[31:0]; + + assign s_wren = s_wren_reg; + assign s_dout = s_dout_reg; + + always @(posedge clk) + // + if (rdy) begin + // + s_wren_reg <= 1'b0; + s_dout_reg <= {32{1'bX}}; + // + end else begin + // + s_wren_reg <= store_data_s; + s_dout_reg <= store_data_s ? s_dout_mux : {32{1'bX}}; + // + end + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_copy.v b/rtl/modular/modular_invertor/helper/modinv_helper_copy.v new file mode 100644 index 0000000..07c1b4f --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_copy.v @@ -0,0 +1,148 @@ +`timescale 1ns / 1ps + +module modinv_helper_copy + ( + clk, rst_n, + ena, rdy, + s_addr, s_din, + a1_addr, a1_wren, a1_dout + ); + + + // + // Parameters + // + parameter OPERAND_NUM_WORDS = 8; + parameter OPERAND_ADDR_BITS = 3; + + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 2; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + + input wire ena; + output wire rdy; + + output wire [ BUFFER_ADDR_BITS-1:0] s_addr; + output wire [OPERAND_ADDR_BITS-1:0] a1_addr; + + output wire a1_wren; + + input wire [ 31:0] s_din; + + output wire [ 31:0] a1_dout; + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [OPERAND_ADDR_BITS-1:0] addr_s; + + wire [OPERAND_ADDR_BITS-1:0] addr_s_max = OPERAND_NUM_WORDS - 1; + wire [OPERAND_ADDR_BITS-1:0] addr_s_zero = {OPERAND_ADDR_BITS{1'b0}}; + wire [OPERAND_ADDR_BITS-1:0] addr_s_next = (addr_s < addr_s_max) ? + addr_s + 1'b1 : addr_s_zero; + + reg [OPERAND_ADDR_BITS-1:0] addr_a1; + + wire [OPERAND_ADDR_BITS-1:0] addr_a1_max = OPERAND_NUM_WORDS - 1; + wire [OPERAND_ADDR_BITS-1:0] addr_a1_zero = {OPERAND_ADDR_BITS{1'b0}}; + wire [OPERAND_ADDR_BITS-1:0] addr_a1_next = (addr_a1 < addr_a1_max) ? + addr_a1 + 1'b1 : addr_a1_zero; + + assign s_addr = {{(BUFFER_ADDR_BITS - OPERAND_ADDR_BITS){1'b0}}, addr_s}; + assign a1_addr = addr_a1; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Increment Logic + // + wire inc_addr_s; + wire inc_addr_a1; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_start = 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_stop = OPERAND_NUM_WORDS + 0; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_stop = OPERAND_NUM_WORDS + 1; + + assign inc_addr_s = (proc_cnt >= cnt_inc_addr_s_start) && (proc_cnt <= cnt_inc_addr_s_stop); + assign inc_addr_a1 = (proc_cnt >= cnt_inc_addr_a1_start) && (proc_cnt <= cnt_inc_addr_a1_stop); + + always @(posedge clk) begin + // + if (inc_addr_s) addr_s <= addr_s_next; + else addr_s <= addr_s_zero; + // + if (inc_addr_a1) addr_a1 <= addr_a1_next; + else addr_a1 <= addr_a1_zero; + // + end + + + // + // Write Enable Logic + // + wire wren_a1; + + wire [PROC_CNT_BITS-1:0] cnt_wren_a1_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_wren_a1_stop = OPERAND_NUM_WORDS + 1; + + assign wren_a1 = (proc_cnt >= cnt_wren_a1_start) && (proc_cnt <= cnt_wren_a1_stop); + + assign a1_wren = wren_a1; + + + // + // Data Logic + // + assign a1_dout = s_din; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_init.v b/rtl/modular/modular_invertor/helper/modinv_helper_init.v new file mode 100644 index 0000000..0468134 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_init.v @@ -0,0 +1,172 @@ +`timescale 1ns / 1ps + +module modinv_helper_init + ( + clk, rst_n, + ena, rdy, + a_addr, a_din, + q_addr, q_din, + r_addr, r_wren, r_dout, + s_addr, s_wren, s_dout, + u_addr, u_wren, u_dout, + v_addr, v_wren, v_dout + ); + + + // + // Parameters + // + parameter OPERAND_NUM_WORDS = 8; + parameter OPERAND_ADDR_BITS = 3; + + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 3; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + output wire [OPERAND_ADDR_BITS-1:0] a_addr; + output wire [OPERAND_ADDR_BITS-1:0] q_addr; + output wire [ BUFFER_ADDR_BITS-1:0] r_addr; + output wire [ BUFFER_ADDR_BITS-1:0] s_addr; + output wire [ BUFFER_ADDR_BITS-1:0] u_addr; + output wire [ BUFFER_ADDR_BITS-1:0] v_addr; + + output wire r_wren; + output wire s_wren; + output wire u_wren; + output wire v_wren; + + input wire [ 31:0] a_din; + input wire [ 31:0] q_din; + output wire [ 31:0] r_dout; + output wire [ 31:0] s_dout; + output wire [ 31:0] u_dout; + output wire [ 31:0] v_dout; + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [OPERAND_ADDR_BITS-1:0] addr_aq; + + wire [OPERAND_ADDR_BITS-1:0] addr_aq_max = OPERAND_NUM_WORDS - 1; + wire [OPERAND_ADDR_BITS-1:0] addr_aq_zero = {OPERAND_ADDR_BITS{1'b0}}; + wire [OPERAND_ADDR_BITS-1:0] addr_aq_next = (addr_aq < addr_aq_max) ? + addr_aq + 1'b1 : addr_aq_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_rsuv; + + wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_max = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_next = (addr_rsuv < addr_rsuv_max) ? + addr_rsuv + 1'b1 : addr_rsuv_zero; + + assign a_addr = addr_aq; + assign q_addr = addr_aq; + + assign r_addr = addr_rsuv; + assign s_addr = addr_rsuv; + assign u_addr = addr_rsuv; + assign v_addr = addr_rsuv; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Increment Logic + // + wire inc_addr_aq; + wire inc_addr_rsuv; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_start = 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_stop = OPERAND_NUM_WORDS; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_stop = BUFFER_NUM_WORDS + 1; + + assign inc_addr_aq = (proc_cnt >= cnt_inc_addr_aq_start) && (proc_cnt <= cnt_inc_addr_aq_stop); + assign inc_addr_rsuv = (proc_cnt >= cnt_inc_addr_rsuv_start) && (proc_cnt <= cnt_inc_addr_rsuv_stop); + + always @(posedge clk) begin + // + if (inc_addr_aq) addr_aq <= addr_aq_next; + else addr_aq <= addr_aq_zero; + // + if (inc_addr_rsuv) addr_rsuv <= addr_rsuv_next; + else addr_rsuv <= addr_rsuv_zero; + // + end + + + // + // Write Enable Logic + // + wire wren_rsuv; + + wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_stop = BUFFER_NUM_WORDS + 1; + + assign wren_rsuv = (proc_cnt >= cnt_wren_rsuv_start) && (proc_cnt <= cnt_wren_rsuv_stop); + + assign r_wren = wren_rsuv; + assign s_wren = wren_rsuv; + assign u_wren = wren_rsuv; + assign v_wren = wren_rsuv; + + + // + // Data Logic + // + assign r_dout = 32'd0; + assign s_dout = (proc_cnt == cnt_wren_rsuv_start) ? 32'd1 : 32'd0; + assign u_dout = (proc_cnt != cnt_wren_rsuv_stop) ? q_din : 32'd0; + assign v_dout = (proc_cnt != cnt_wren_rsuv_stop) ? a_din : 32'd0; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v new file mode 100644 index 0000000..6b65eb1 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v @@ -0,0 +1,286 @@ +`timescale 1ns / 1ps + +module modinv_helper_invert_compare + ( + clk, rst_n, + ena, rdy, + + u_addr, u_din, + v_addr, v_din, + + u_gt_v, v_eq_1, + u_is_even, v_is_even + ); + + + // + // Parameters + // + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = 1 * BUFFER_NUM_WORDS + 10; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + output wire [BUFFER_ADDR_BITS-1:0] u_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_addr; + + input wire [ 32-1:0] u_din; + input wire [ 32-1:0] v_din; + + output wire u_gt_v; + output wire v_eq_1; + output wire u_is_even; + output wire v_is_even; + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [BUFFER_ADDR_BITS-1:0] addr_in; + + wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ? + addr_in - 1'b1 : addr_in_last; + + assign u_addr = addr_in; + assign v_addr = addr_in; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Decrement Logic + // + wire dec_addr_in; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 0 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 1 * BUFFER_NUM_WORDS + 0; + + assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop); + + always @(posedge clk) + // + if (rdy) addr_in <= addr_in_last; + else if (dec_addr_in) addr_in <= addr_in_prev; + + + // + // Comparison Stage Flags + // + wire calc_leg; + wire calc_leg_final; + wire calc_parity; + + wire [PROC_CNT_BITS-1:0] cnt_calc_leg_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_calc_leg_stop = 1 * BUFFER_NUM_WORDS + 2; + wire [PROC_CNT_BITS-1:0] cnt_calc_parity = 1 * BUFFER_NUM_WORDS + 1; + + assign calc_leg = (proc_cnt >= cnt_calc_leg_start) && (proc_cnt <= cnt_calc_leg_stop); + assign calc_leg_final = (proc_cnt == cnt_calc_leg_stop); + assign calc_parity = (proc_cnt == cnt_calc_parity); + + + // + // Dummy Input + // + reg sub32_din_1_lsb; + wire [31: 0] sub32_din_1 = {{31{1'b0}}, sub32_din_1_lsb}; + + always @(posedge clk) + // + sub32_din_1_lsb <= (addr_in == addr_in_zero) ? 1'b1 : 1'b0; + + + // + // Subtractor (u - v) + // + wire [31: 0] sub32_u_minus_v_difference_out; + wire sub32_u_minus_v_borrow_in; + wire sub32_u_minus_v_borrow_out; + + subtractor32_wrapper sub32_u_minus_v + ( + .clk (clk), + .a (u_din), + .b (v_din), + .d (sub32_u_minus_v_difference_out), + .b_in (sub32_u_minus_v_borrow_in), + .b_out (sub32_u_minus_v_borrow_out) + ); + + + // + // Subtractor (v - 1) + // + wire [31: 0] sub32_v_minus_1_difference_out; + wire sub32_v_minus_1_borrow_in; + wire sub32_v_minus_1_borrow_out; + + subtractor32_wrapper sub32_v_minus_1 + ( + .clk (clk), + .a (v_din), + .b (sub32_din_1), + .d (sub32_v_minus_1_difference_out), + .b_in (sub32_v_minus_1_borrow_in), + .b_out (sub32_v_minus_1_borrow_out) + ); + + + + // + // Borrow Masking Logic + // + reg mask_borrow; + + always @(posedge clk) + // + mask_borrow <= ((proc_cnt > cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop)) ? + 1'b0 : 1'b1; + + assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_borrow; + assign sub32_v_minus_1_borrow_in = sub32_v_minus_1_borrow_out & ~mask_borrow; + + + // + // Comparison Logic + // + reg cmp_u_v_l; + reg cmp_u_v_e; + reg cmp_u_v_g; + + reg cmp_v_1_l; + reg cmp_v_1_e; + reg cmp_v_1_g; + + wire cmp_unresolved_u_v = !(cmp_u_v_l || cmp_u_v_g); + wire cmp_unresolved_v_1 = !(cmp_v_1_l || cmp_v_1_g); + + wire cmp_u_v_borrow_is_set = (sub32_u_minus_v_borrow_out == 1'b1) ? 1'b1 : 1'b0; + wire cmp_u_v_difference_is_nonzero = (sub32_u_minus_v_difference_out != 32'd0) ? 1'b1 : 1'b0; + + wire cmp_v_1_borrow_is_set = (sub32_v_minus_1_borrow_out == 1'b1) ? 1'b1 : 1'b0; + wire cmp_v_1_difference_is_nonzero = (sub32_v_minus_1_difference_out != 32'd0) ? 1'b1 : 1'b0; + + reg u_is_even_reg; + reg v_is_even_reg; + + always @(posedge clk) + // + if (rdy) begin + // + if (ena) begin + // + cmp_u_v_l <= 1'b0; + cmp_u_v_e <= 1'b0; + cmp_u_v_g <= 1'b0; + // + cmp_v_1_l <= 1'b0; + cmp_v_1_e <= 1'b0; + cmp_v_1_g <= 1'b0; + // + u_is_even_reg <= 1'bX; + v_is_even_reg <= 1'bX; + // + end + // + end else begin + // + // parity + // + if (calc_parity) begin + u_is_even_reg <= ~u_din[0]; + v_is_even_reg <= ~v_din[0]; + end + // + // u <> v + // + if (cmp_unresolved_u_v && calc_leg) begin + // + if (cmp_u_v_borrow_is_set) + cmp_u_v_l <= 1'b1; + // + if (!cmp_u_v_borrow_is_set && cmp_u_v_difference_is_nonzero) + cmp_u_v_g <= 1'b1; + // + if (!cmp_u_v_borrow_is_set && !cmp_u_v_difference_is_nonzero && calc_leg_final) + cmp_u_v_e <= 1'b1; + // + end + // + // v <> 1 + // + if (cmp_unresolved_v_1 && calc_leg) begin + // + if (cmp_v_1_borrow_is_set) + cmp_v_1_l <= 1'b1; + // + if (!cmp_v_1_borrow_is_set && cmp_v_1_difference_is_nonzero) + cmp_v_1_g <= 1'b1; + // + if (!cmp_v_1_borrow_is_set && !cmp_v_1_difference_is_nonzero && calc_leg_final) + cmp_v_1_e <= 1'b1; + // + end + // + end + + + // + // Output Flags + // + assign u_gt_v = !cmp_u_v_l && !cmp_u_v_e && cmp_u_v_g; + assign v_eq_1 = !cmp_v_1_l && cmp_v_1_e && !cmp_v_1_g; + + assign u_is_even = u_is_even_reg; + assign v_is_even = v_is_even_reg; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v new file mode 100644 index 0000000..ab15563 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v @@ -0,0 +1,408 @@ +`timescale 1ns / 1ps + +module modinv_helper_invert_precalc + ( + clk, rst_n, + ena, rdy, + + r_addr, r_din, + s_addr, s_din, + u_addr, u_din, + v_addr, v_din, + + r_dbl_addr, r_dbl_wren, r_dbl_dout, + s_dbl_addr, s_dbl_wren, s_dbl_dout, + r_plus_s_addr, r_plus_s_wren, r_plus_s_dout, + u_half_addr, u_half_wren, u_half_dout, + v_half_addr, v_half_wren, v_half_dout, + u_minus_v_addr, u_minus_v_wren, u_minus_v_dout, u_minus_v_din, + v_minus_u_addr, v_minus_u_wren, v_minus_u_dout, v_minus_u_din, + u_minus_v_half_addr, u_minus_v_half_wren, u_minus_v_half_dout, + v_minus_u_half_addr, v_minus_u_half_wren, v_minus_u_half_dout + ); + + + // + // Parameters + // + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + output wire [BUFFER_ADDR_BITS-1:0] r_addr; + output wire [BUFFER_ADDR_BITS-1:0] s_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_addr; + + input wire [ 32-1:0] r_din; + input wire [ 32-1:0] s_din; + input wire [ 32-1:0] u_din; + input wire [ 32-1:0] v_din; + + output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr; + output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr; + output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr; + + output wire [ 32-1:0] r_dbl_dout; + output wire [ 32-1:0] s_dbl_dout; + output wire [ 32-1:0] r_plus_s_dout; + output wire [ 32-1:0] u_half_dout; + output wire [ 32-1:0] v_half_dout; + output wire [ 32-1:0] u_minus_v_dout; + output wire [ 32-1:0] v_minus_u_dout; + output wire [ 32-1:0] u_minus_v_half_dout; + output wire [ 32-1:0] v_minus_u_half_dout; + + output wire r_dbl_wren; + output wire s_dbl_wren; + output wire r_plus_s_wren; + output wire u_half_wren; + output wire v_half_wren; + output wire u_minus_v_wren; + output wire v_minus_u_wren; + output wire u_minus_v_half_wren; + output wire v_minus_u_half_wren; + + input wire [ 32-1:0] u_minus_v_din; + input wire [ 32-1:0] v_minus_u_din; + + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [BUFFER_ADDR_BITS-1:0] addr_in; + + wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_last) ? + addr_in + 1'b1 : addr_in_zero; + wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ? + addr_in - 1'b1 : addr_in_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_out1; + + wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ? + addr_out1 + 1'b1 : addr_out1_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_out2; + + wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_next = (addr_out2 < addr_out2_last) ? + addr_out2 + 1'b1 : addr_out2_zero; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ? + addr_out2 - 1'b1 : addr_out2_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_out3; + + wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ? + addr_out3 - 1'b1 : addr_out3_last; + + reg [BUFFER_ADDR_BITS-1:0] addr_out4; + + wire [BUFFER_ADDR_BITS-1:0] addr_out4_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out4_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out4_prev = (addr_out4 > addr_out4_zero) ? + addr_out4 - 1'b1 : addr_out4_last; + + + assign r_addr = addr_in; + assign s_addr = addr_in; + assign u_addr = addr_in; + assign v_addr = addr_in; + + assign r_dbl_addr = addr_out1; + assign s_dbl_addr = addr_out1; + assign r_plus_s_addr = addr_out2; + assign u_half_addr = addr_out3; + assign v_half_addr = addr_out3; + assign u_minus_v_addr = addr_out2; + assign v_minus_u_addr = addr_out2; + assign u_minus_v_half_addr = addr_out4; + assign v_minus_u_half_addr = addr_out4; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Increment/Decrement Logic + // + wire inc_addr_in; + wire dec_addr_in; + wire inc_addr_out1; + wire inc_addr_out2; + wire dec_addr_out2; + wire dec_addr_out3; + wire dec_addr_out4; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 0 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = 1 * BUFFER_NUM_WORDS - 1; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 2; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_stop = 1 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 1 * BUFFER_NUM_WORDS + 0; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 2 * BUFFER_NUM_WORDS - 2; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 0; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_start = 1 * BUFFER_NUM_WORDS + 4; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_stop = 2 * BUFFER_NUM_WORDS + 3; + + assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop); + assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop); + assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop); + assign inc_addr_out2 = (proc_cnt >= cnt_inc_addr_out2_start) && (proc_cnt <= cnt_inc_addr_out2_stop); + assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop); + assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop); + assign dec_addr_out4 = (proc_cnt >= cnt_dec_addr_out4_start) && (proc_cnt <= cnt_dec_addr_out4_stop); + + + always @(posedge clk) begin + // + if (rdy) begin + // + addr_in <= addr_in_zero; + addr_out1 <= addr_out1_zero; + addr_out2 <= addr_out2_zero; + addr_out3 <= addr_out3_last; + addr_out4 <= addr_out4_last; + // + end else begin + // + if (inc_addr_in) addr_in <= addr_in_next; + else if (dec_addr_in) addr_in <= addr_in_prev; + // + if (inc_addr_out1) addr_out1 <= addr_out1_next; + else addr_out1 <= addr_out1_zero; + // + if (inc_addr_out2) addr_out2 <= addr_out2_next; + else if (dec_addr_out2) addr_out2 <= addr_out2_prev; + // + if (dec_addr_out3) addr_out3 <= addr_out3_prev; + else addr_out3 <= addr_out3_last; + // + if (dec_addr_out4) addr_out4 <= addr_out4_prev; + else addr_out4 <= addr_out4_last; + // + end + // + end + + + // + // Write Enable Logic + // + wire wren_out1; + wire wren_out2; + wire wren_out3; + wire wren_out4; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 2; + wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 1 * BUFFER_NUM_WORDS + 2; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 0; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out4_start = 1 * BUFFER_NUM_WORDS + 4; + wire [PROC_CNT_BITS-1:0] cnt_wren_out4_stop = 2 * BUFFER_NUM_WORDS + 3; + + assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop); + assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop); + assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop); + assign wren_out4 = (proc_cnt >= cnt_wren_out4_start) && (proc_cnt <= cnt_wren_out4_stop); + + assign r_dbl_wren = wren_out1; + assign s_dbl_wren = wren_out1; + assign r_plus_s_wren = wren_out2; + assign u_half_wren = wren_out3; + assign v_half_wren = wren_out3; + assign u_minus_v_wren = wren_out2; + assign v_minus_u_wren = wren_out2; + assign u_minus_v_half_wren = wren_out4; + assign v_minus_u_half_wren = wren_out4; + + + // + // Adder (r + s) + // + wire [31: 0] add32_r_plus_s_sum_out; + wire add32_r_plus_s_carry_in; + wire add32_r_plus_s_carry_out; + + adder32_wrapper add32_r_plus_s + ( + .clk (clk), + .a (r_din), + .b (s_din), + .s (add32_r_plus_s_sum_out), + .c_in (add32_r_plus_s_carry_in), + .c_out (add32_r_plus_s_carry_out) + ); + + // + // Subtractor (u - v) + // + wire [31: 0] sub32_u_minus_v_difference_out; + wire sub32_u_minus_v_borrow_in; + wire sub32_u_minus_v_borrow_out; + + subtractor32_wrapper sub32_u_minus_v + ( + .clk (clk), + .a (u_din), + .b (v_din), + .d (sub32_u_minus_v_difference_out), + .b_in (sub32_u_minus_v_borrow_in), + .b_out (sub32_u_minus_v_borrow_out) + ); + + // + // Subtractor (v - u) + // + wire [31: 0] sub32_v_minus_u_difference_out; + wire sub32_v_minus_u_borrow_in; + wire sub32_v_minus_u_borrow_out; + + subtractor32_wrapper sub32_v_minus_u + ( + .clk (clk), + .a (v_din), + .b (u_din), + .d (sub32_v_minus_u_difference_out), + .b_in (sub32_v_minus_u_borrow_in), + .b_out (sub32_v_minus_u_borrow_out) + ); + + + // + // Carry & Borrow Masking Logic + // + reg mask_carry_borrow; + + always @(posedge clk) + // + mask_carry_borrow <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? + 1'b0 : 1'b1; + + assign add32_r_plus_s_carry_in = add32_r_plus_s_carry_out & ~mask_carry_borrow; + assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_carry_borrow; + assign sub32_v_minus_u_borrow_in = sub32_v_minus_u_borrow_out & ~mask_carry_borrow; + + + // + // Carry Bits + // + reg r_dbl_carry; + reg s_dbl_carry; + reg u_half_carry; + reg v_half_carry; + reg u_minus_v_half_carry; + reg v_minus_u_half_carry; + + always @(posedge clk) begin + + r_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? + r_din[31] : 1'b0; + + s_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? + s_din[31] : 1'b0; + + u_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ? + u_din[0] : 1'b0; + + v_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ? + v_din[0] : 1'b0; + + u_minus_v_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ? + u_minus_v_din[0] : 1'b0; + + v_minus_u_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ? + v_minus_u_din[0] : 1'b0; + + end + + + // + // Data Mapper + // + assign r_dbl_dout = {r_din[30:0], r_dbl_carry}; + assign s_dbl_dout = {s_din[30:0], s_dbl_carry}; + assign r_plus_s_dout = add32_r_plus_s_sum_out; + assign u_half_dout = {u_half_carry, u_din[31:1]}; + assign v_half_dout = {v_half_carry, v_din[31:1]}; + assign u_minus_v_dout = sub32_u_minus_v_difference_out; + assign v_minus_u_dout = sub32_v_minus_u_difference_out; + assign u_minus_v_half_dout = {u_minus_v_half_carry, u_minus_v_din[31:1]}; + assign v_minus_u_half_dout = {v_minus_u_half_carry, v_minus_u_din[31:1]}; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v new file mode 100644 index 0000000..0cd6ac5 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v @@ -0,0 +1,257 @@ +`timescale 1ns / 1ps + +module modinv_helper_invert_update + ( + clk, rst_n, + ena, rdy, + + u_gt_v, v_eq_1, + u_is_even, v_is_even, + + r_addr, r_wren, r_dout, + s_addr, s_wren, s_dout, + u_addr, u_wren, u_dout, + v_addr, v_wren, v_dout, + + r_dbl_addr, r_dbl_din, + s_dbl_addr, s_dbl_din, + r_plus_s_addr, r_plus_s_din, + u_half_addr, u_half_din, + v_half_addr, v_half_din, + u_minus_v_half_addr, u_minus_v_half_din, + v_minus_u_half_addr, v_minus_u_half_din + ); + + + // + // Parameters + // + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + input wire u_gt_v; + input wire v_eq_1; + input wire u_is_even; + input wire v_is_even; + + output wire [BUFFER_ADDR_BITS-1:0] r_addr; + output wire [BUFFER_ADDR_BITS-1:0] s_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_addr; + + output wire r_wren; + output wire s_wren; + output wire u_wren; + output wire v_wren; + + output wire [ 32-1:0] r_dout; + output wire [ 32-1:0] s_dout; + output wire [ 32-1:0] u_dout; + output wire [ 32-1:0] v_dout; + + output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr; + output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr; + output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr; + + input wire [ 32-1:0] r_dbl_din; + input wire [ 32-1:0] s_dbl_din; + input wire [ 32-1:0] r_plus_s_din; + input wire [ 32-1:0] u_half_din; + input wire [ 32-1:0] v_half_din; + input wire [ 32-1:0] u_minus_v_half_din; + input wire [ 32-1:0] v_minus_u_half_din; + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [BUFFER_ADDR_BITS-1:0] addr_in; + + wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ? + addr_in + 1'b1 : addr_in_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_out; + + wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ? + addr_out + 1'b1 : addr_out_zero; + + assign r_addr = addr_out; + assign s_addr = addr_out; + assign u_addr = addr_out; + assign v_addr = addr_out; + + assign r_dbl_addr = addr_in; + assign s_dbl_addr = addr_in; + assign r_plus_s_addr = addr_in; + assign u_half_addr = addr_in; + assign v_half_addr = addr_in; + assign u_minus_v_half_addr = addr_in; + assign v_minus_u_half_addr = addr_in; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Increment Logic + // + wire inc_addr_in; + wire inc_addr_out; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1; + + assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop); + assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop); + + always @(posedge clk) begin + // + if (inc_addr_in) addr_in <= addr_in_next; + else addr_in <= addr_in_zero; + // + if (inc_addr_out) addr_out <= addr_out_next; + else addr_out <= addr_out_zero; + // + end + + // + // Write Enable Logic + // + wire wren_out; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1; + + assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop); + + reg r_wren_allow; + reg s_wren_allow; + reg u_wren_allow; + reg v_wren_allow; + + assign r_wren = wren_out && r_wren_allow && !v_eq_1 && !rdy; + assign s_wren = wren_out && s_wren_allow && !v_eq_1 && !rdy; + assign u_wren = wren_out && u_wren_allow && !v_eq_1 && !rdy; + assign v_wren = wren_out && v_wren_allow && !v_eq_1 && !rdy; + + + // + // Data Logic + // + reg [31: 0] r_dout_mux; + reg [31: 0] s_dout_mux; + reg [31: 0] u_dout_mux; + reg [31: 0] v_dout_mux; + + assign r_dout = r_dout_mux; + assign s_dout = s_dout_mux; + assign u_dout = u_dout_mux; + assign v_dout = v_dout_mux; + + always @(*) begin + // + // r, s, u, v + // + if (u_is_even) begin + // + u_dout_mux = u_half_din; + v_dout_mux = {32{1'bX}}; + r_dout_mux = {32{1'bX}}; + s_dout_mux = s_dbl_din; + // + u_wren_allow = 1'b1; + v_wren_allow = 1'b0; + r_wren_allow = 1'b0; + s_wren_allow = 1'b1; + // + end else begin + // + if (v_is_even) begin + // + u_dout_mux = {32{1'bX}}; + v_dout_mux = v_half_din; + r_dout_mux = r_dbl_din; + s_dout_mux = {32{1'bX}}; + // + u_wren_allow = 1'b0; + v_wren_allow = 1'b1; + r_wren_allow = 1'b1; + s_wren_allow = 1'b0; + // + end else begin + // + u_dout_mux = u_gt_v ? u_minus_v_half_din : {32{1'bX}}; + v_dout_mux = u_gt_v ? {32{1'bX}} : v_minus_u_half_din; + r_dout_mux = u_gt_v ? r_plus_s_din : r_dbl_din; + s_dout_mux = u_gt_v ? s_dbl_din : r_plus_s_din; + // + u_wren_allow = u_gt_v; + v_wren_allow = !u_gt_v; + r_wren_allow = 1'b1; + s_wren_allow = 1'b1; + // + end + // + end + // + end + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v new file mode 100644 index 0000000..fb858a6 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v @@ -0,0 +1,328 @@ +`timescale 1ns / 1ps + +module modinv_helper_reduce_precalc + ( + clk, rst_n, + ena, rdy, + + k, + + s_is_odd, k_is_nul, + + r_addr, r_din, r_wren, r_dout, + s_addr, s_din, + u_addr, u_wren, u_dout, + v_addr, v_wren, v_dout, + q_addr, q_din + ); + + + // + // Parameters + // + parameter OPERAND_NUM_WORDS = 8; + parameter OPERAND_ADDR_BITS = 3; + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + parameter K_NUM_BITS = 10; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + input wire [ K_NUM_BITS-1:0] k; + + output wire s_is_odd; + output wire k_is_nul; + + output wire [ BUFFER_ADDR_BITS-1:0] r_addr; + output wire [ BUFFER_ADDR_BITS-1:0] s_addr; + output wire [ BUFFER_ADDR_BITS-1:0] u_addr; + output wire [ BUFFER_ADDR_BITS-1:0] v_addr; + output wire [OPERAND_ADDR_BITS-1:0] q_addr; + + input wire [ 32-1:0] r_din; + input wire [ 32-1:0] s_din; + input wire [ 32-1:0] q_din; + + output wire r_wren; + output wire u_wren; + output wire v_wren; + + output wire [ 32-1:0] r_dout; + output wire [ 32-1:0] u_dout; + output wire [ 32-1:0] v_dout; + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [ BUFFER_ADDR_BITS-1:0] addr_in_buf; + reg [OPERAND_ADDR_BITS-1:0] addr_in_op; + reg [ BUFFER_ADDR_BITS-1:0] addr_out1; + reg [ BUFFER_ADDR_BITS-1:0] addr_out2; + reg [ BUFFER_ADDR_BITS-1:0] addr_out3; + + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_last = BUFFER_NUM_WORDS - 1; + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_next = (addr_in_buf < addr_in_buf_last) ? + addr_in_buf + 1'b1 : addr_in_buf_zero; + wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_prev = (addr_in_buf > addr_in_buf_zero) ? + addr_in_buf - 1'b1 : addr_in_buf_zero; + + wire [OPERAND_ADDR_BITS-1:0] addr_in_op_last = OPERAND_NUM_WORDS - 1; + wire [OPERAND_ADDR_BITS-1:0] addr_in_op_zero = {OPERAND_ADDR_BITS{1'b0}}; + wire [OPERAND_ADDR_BITS-1:0] addr_in_op_next = (addr_in_op < addr_in_op_last) ? + addr_in_op + 1'b1 : addr_in_op_zero; + + wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ? + addr_out1 + 1'b1 : addr_out1_zero; + wire [BUFFER_ADDR_BITS-1:0] addr_out1_prev = (addr_out1 > addr_out1_zero) ? + addr_out1 - 1'b1 : addr_out1_zero; + + wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ? + addr_out2 - 1'b1 : addr_out2_last; + + wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ? + addr_out3 - 1'b1 : addr_out3_last; + + + assign s_addr = addr_in_buf; + assign q_addr = addr_in_op; + assign r_addr = addr_out1; + assign u_addr = addr_out2; + assign v_addr = addr_out3; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Increment/Decrement Logic + // + wire inc_addr_buf_in; + wire dec_addr_buf_in; + wire inc_addr_op_in; + wire inc_addr_out1; + wire dec_addr_out1; + wire dec_addr_out2; + wire dec_addr_out3; + + wire [PROC_CNT_BITS-1:0] cnt_calc_flags = 0 * BUFFER_NUM_WORDS + 2; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_start = 0 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_stop = 1 * BUFFER_NUM_WORDS - 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_start = 1 * BUFFER_NUM_WORDS + 0; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_stop = 2 * BUFFER_NUM_WORDS - 2; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_start = 0 * OPERAND_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_stop = 1 * OPERAND_NUM_WORDS + 0; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_start = 1 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_stop = 2 * BUFFER_NUM_WORDS + 1; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 0; + + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 4; + wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 3; + + assign inc_addr_buf_in = (proc_cnt >= cnt_inc_addr_buf_in_start) && (proc_cnt <= cnt_inc_addr_buf_in_stop); + assign dec_addr_buf_in = (proc_cnt >= cnt_dec_addr_buf_in_start) && (proc_cnt <= cnt_dec_addr_buf_in_stop); + assign inc_addr_op_in = (proc_cnt >= cnt_inc_addr_op_in_start) && (proc_cnt <= cnt_inc_addr_op_in_stop); + assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop); + assign dec_addr_out1 = (proc_cnt >= cnt_dec_addr_out1_start) && (proc_cnt <= cnt_dec_addr_out1_stop); + assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop); + assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop); + + always @(posedge clk) begin + // + if (rdy) begin + // + addr_in_buf <= addr_in_buf_zero; + addr_in_op <= addr_in_op_zero; + addr_out1 <= addr_out1_zero; + addr_out2 <= addr_out2_last; + addr_out3 <= addr_out3_last; + // + end else begin + // + if (inc_addr_buf_in) addr_in_buf <= addr_in_buf_next; + else if (dec_addr_buf_in) addr_in_buf <= addr_in_buf_prev; + // + if (inc_addr_op_in) addr_in_op <= addr_in_op_next; + else addr_in_op <= addr_in_op_zero; + // + if (inc_addr_out1) addr_out1 <= addr_out1_next; + else if (dec_addr_out1) addr_out1 <= addr_out1_prev; + // + if (dec_addr_out2) addr_out2 <= addr_out2_prev; + else addr_out2 <= addr_out2_last; + // + if (dec_addr_out3) addr_out3 <= addr_out3_prev; + else addr_out3 <= addr_out3_last; + // + end + // + end + + + // + // Write Enable Logic + // + wire wren_out1; + wire wren_out2; + wire wren_out3; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 3; + wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 2; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 1 * BUFFER_NUM_WORDS + 1; + wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 2 * BUFFER_NUM_WORDS + 0; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 4; + wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 3; + + assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop); + assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop); + assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop); + + assign r_wren = wren_out1; + assign u_wren = wren_out2; + assign v_wren = wren_out3; + + // + // Adder (s + q) + // + wire [31: 0] q_din_masked; + wire [31: 0] add32_s_plus_q_sum_out; + wire add32_s_plus_q_carry_in; + wire add32_s_plus_q_carry_out; + + adder32_wrapper add32_r_plus_s + ( + .clk (clk), + .a (s_din), + .b (q_din_masked), + .s (add32_s_plus_q_sum_out), + .c_in (add32_s_plus_q_carry_in), + .c_out (add32_s_plus_q_carry_out) + ); + + + // + // Carry Masking Logic + // + wire mask_carry; + + assign mask_carry = ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? 1'b0 : 1'b1; + + + // + // Addend Masking Logic + // + reg q_din_mask; + + always @(posedge clk) + q_din_mask <= (addr_in_buf == addr_in_buf_last) ? 1'b1 : 1'b0; + + assign q_din_masked = q_din_mask ? {32{1'b0}} : q_din; + + assign add32_s_plus_q_carry_in = add32_s_plus_q_carry_out & ~mask_carry; + + + // + // Carry Bits + // + reg s_half_carry; + reg s_plus_q_half_carry; + + always @(posedge clk) begin + // + s_half_carry <= ((proc_cnt >= cnt_wren_out2_start) && (proc_cnt < cnt_wren_out2_stop)) ? + s_din[0] : 1'b0; + // + s_plus_q_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ? + r_din[0] : 1'b0; + // + end + + // + // Data Mapper + // + assign r_dout = add32_s_plus_q_sum_out; + assign u_dout = {s_half_carry, s_din[31:1]}; + assign v_dout = {s_plus_q_half_carry, r_din[31:1]}; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + + // + // Output Flags + // + reg s_is_odd_reg; + reg k_is_nul_reg; + + assign s_is_odd = s_is_odd_reg; + assign k_is_nul = k_is_nul_reg; + + always @(posedge clk) + // + if (proc_cnt == cnt_calc_flags) begin + s_is_odd_reg <= s_din[0]; + k_is_nul_reg <= (k == {K_NUM_BITS{1'b0}}) ? 1'b1 : 1'b0; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v new file mode 100644 index 0000000..ea5b854 --- /dev/null +++ b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v @@ -0,0 +1,153 @@ +`timescale 1ns / 1ps + +module modinv_helper_reduce_update + ( + clk, rst_n, + ena, rdy, + + s_is_odd, k_is_nul, + + s_addr, s_wren, s_dout, + u_addr, u_din, + v_addr, v_din + ); + + + // + // Parameters + // + parameter BUFFER_NUM_WORDS = 9; + parameter BUFFER_ADDR_BITS = 4; + + + // + // clog2 + // +`include "..\modinv_clog2.v" + + + // + // Constants + // + localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3; + localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES); + + + // + // Ports + // + input wire clk; + input wire rst_n; + input wire ena; + output wire rdy; + + input wire s_is_odd; + input wire k_is_nul; + + output wire [BUFFER_ADDR_BITS-1:0] s_addr; + output wire [BUFFER_ADDR_BITS-1:0] u_addr; + output wire [BUFFER_ADDR_BITS-1:0] v_addr; + + output wire s_wren; + + output wire [ 32-1:0] s_dout; + + input wire [ 32-1:0] u_din; + input wire [ 32-1:0] v_din; + + + // + // Counter + // + reg [PROC_CNT_BITS-1:0] proc_cnt; + + wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1; + wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}}; + wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ? + proc_cnt + 1'b1 : proc_cnt_zero; + + // + // Addresses + // + reg [BUFFER_ADDR_BITS-1:0] addr_in; + + wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ? + addr_in + 1'b1 : addr_in_zero; + + reg [BUFFER_ADDR_BITS-1:0] addr_out; + + wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1; + wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}}; + wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ? + addr_out + 1'b1 : addr_out_zero; + + assign s_addr = addr_out; + assign u_addr = addr_in; + assign v_addr = addr_in; + + + // + // Ready Flag + // + assign rdy = (proc_cnt == proc_cnt_zero); + + + // + // Address Increment Logic + // + wire inc_addr_in; + wire inc_addr_out; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS; + + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1; + + assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop); + assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop); + + always @(posedge clk) begin + // + if (inc_addr_in) addr_in <= addr_in_next; + else addr_in <= addr_in_zero; + // + if (inc_addr_out) addr_out <= addr_out_next; + else addr_out <= addr_out_zero; + // + end + + // + // Write Enable Logic + // + wire wren_out; + + wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2; + wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1; + + assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop); + + assign s_wren = wren_out && !k_is_nul; //s_wren_allow && !v_eq_1 && !rdy; + + + // + // Data Logic + // + assign s_dout = s_is_odd ? v_din : u_din; + + + // + // Primary Counter Logic + // + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero; + else begin + if (!rdy) proc_cnt <= proc_cnt_next; + else if (ena) proc_cnt <= proc_cnt_next; + end + + +endmodule diff --git a/rtl/modular/modular_invertor/modinv_clog2.v b/rtl/modular/modular_invertor/modinv_clog2.v new file mode 100644 index 0000000..2f7b64d --- /dev/null +++ b/rtl/modular/modular_invertor/modinv_clog2.v @@ -0,0 +1,10 @@ +function integer clog2; + input integer value; + integer result; + begin + value = value - 1; + for (result = 0; value > 0; result = result + 1) + value = value >> 1; + clog2 = result; + end +endfunction diff --git a/rtl/modular/modular_invertor/modular_invertor.v b/rtl/modular/modular_invertor/modular_invertor.v new file mode 100644 index 0000000..e9f2460 --- /dev/null +++ b/rtl/modular/modular_invertor/modular_invertor.v @@ -0,0 +1,981 @@ +//------------------------------------------------------------------------------ +// +// modular_invertor.v +// ----------------------------------------------------------------------------- +// Modular invertor. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_invertor + ( + clk, rst_n, + ena, rdy, + a_addr, q_addr, a1_addr, a1_wren, + a_din, q_din, a1_dout + ); + + + // + // Parameters + // + parameter MAX_OPERAND_WIDTH = 256; + + + // + // clog2 + // +`include "modinv_clog2.v" + + + // + // More Parameters + // + localparam OPERAND_NUM_WORDS = MAX_OPERAND_WIDTH / 32; + localparam OPERAND_ADDR_BITS = clog2(OPERAND_NUM_WORDS); + + localparam BUFFER_NUM_WORDS = OPERAND_NUM_WORDS + 1; + localparam BUFFER_ADDR_BITS = clog2(BUFFER_NUM_WORDS); + + localparam LOOP_NUM_ROUNDS = 2 * MAX_OPERAND_WIDTH; + localparam ROUND_COUNTER_BITS = clog2(LOOP_NUM_ROUNDS); + + localparam K_NUM_BITS = clog2(LOOP_NUM_ROUNDS + 1); + + + // + // Ports + // + input wire clk; + input wire rst_n; + + input wire ena; + output wire rdy; + + output wire [OPERAND_ADDR_BITS-1:0] a_addr; + output reg [OPERAND_ADDR_BITS-1:0] q_addr; + output wire [OPERAND_ADDR_BITS-1:0] a1_addr; + output wire a1_wren; + + input wire [32-1:0] a_din; + input wire [32-1:0] q_din; + output wire [32-1:0] a1_dout; + + + // + // "Redundant" Power of 2 (K) + // + reg [K_NUM_BITS-1:0] k; + + + // + // Buffers + // + reg [BUFFER_ADDR_BITS-1:0] buf_r_wr_addr; + reg [BUFFER_ADDR_BITS-1:0] buf_r_rd_addr; + reg buf_r_wr_en; + reg [ 32-1:0] buf_r_wr_din; + wire [ 32-1:0] buf_r_wr_dout; + wire [ 32-1:0] buf_r_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_r + ( .clk(clk), + .a_addr(buf_r_wr_addr), .a_out(buf_r_wr_dout), .a_wr(buf_r_wr_en), .a_in(buf_r_wr_din), + .b_addr(buf_r_rd_addr), .b_out(buf_r_rd_dout) + ); + + reg [BUFFER_ADDR_BITS-1:0] buf_s_wr_addr; + reg [BUFFER_ADDR_BITS-1:0] buf_s_rd_addr; + reg buf_s_wr_en; + reg [ 32-1:0] buf_s_wr_din; + wire [ 32-1:0] buf_s_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_s + ( .clk(clk), + .a_addr(buf_s_wr_addr), .a_out(), .a_wr(buf_s_wr_en), .a_in(buf_s_wr_din), + .b_addr(buf_s_rd_addr), .b_out(buf_s_rd_dout) + ); + + reg [BUFFER_ADDR_BITS-1:0] buf_u_wr_addr; + reg [BUFFER_ADDR_BITS-1:0] buf_u_rd_addr; + reg buf_u_wr_en; + reg [ 32-1:0] buf_u_wr_din; + wire [ 32-1:0] buf_u_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_u + ( .clk(clk), + .a_addr(buf_u_wr_addr), .a_out(), .a_wr(buf_u_wr_en), .a_in(buf_u_wr_din), + .b_addr(buf_u_rd_addr), .b_out(buf_u_rd_dout) + ); + + reg [BUFFER_ADDR_BITS-1:0] buf_v_wr_addr; + reg [BUFFER_ADDR_BITS-1:0] buf_v_rd_addr; + reg buf_v_wr_en; + reg [ 32-1:0] buf_v_wr_din; + wire [ 32-1:0] buf_v_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_v + ( .clk(clk), + .a_addr(buf_v_wr_addr), .a_out(), .a_wr(buf_v_wr_en), .a_in(buf_v_wr_din), + .b_addr(buf_v_rd_addr), .b_out(buf_v_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_rd_addr; + wire buf_r_dbl_wr_en; + wire [ 32-1:0] buf_r_dbl_wr_din; + wire [ 32-1:0] buf_r_dbl_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_r_dbl + ( .clk(clk), + .a_addr(buf_r_dbl_wr_addr), .a_out(), .a_wr(buf_r_dbl_wr_en), .a_in(buf_r_dbl_wr_din), + .b_addr(buf_r_dbl_rd_addr), .b_out(buf_r_dbl_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_rd_addr; + wire buf_s_dbl_wr_en; + wire [ 32-1:0] buf_s_dbl_wr_din; + wire [ 32-1:0] buf_s_dbl_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_s_dbl + ( .clk(clk), + .a_addr(buf_s_dbl_wr_addr), .a_out(), .a_wr(buf_s_dbl_wr_en), .a_in(buf_s_dbl_wr_din), + .b_addr(buf_s_dbl_rd_addr), .b_out(buf_s_dbl_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_rd_addr; + wire buf_r_plus_s_wr_en; + wire [ 32-1:0] buf_r_plus_s_wr_din; + wire [ 32-1:0] buf_r_plus_s_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_r_plus_s + ( .clk(clk), + .a_addr(buf_r_plus_s_wr_addr), .a_out(), .a_wr(buf_r_plus_s_wr_en), .a_in(buf_r_plus_s_wr_din), + .b_addr(buf_r_plus_s_rd_addr), .b_out(buf_r_plus_s_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_rd_addr; + wire buf_u_minus_v_wr_en; + wire [ 32-1:0] buf_u_minus_v_wr_din; + wire [ 32-1:0] buf_u_minus_v_wr_dout; + + assign buf_u_minus_v_rd_addr = ~buf_u_minus_v_wr_addr; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_u_minus_v + ( .clk(clk), + .a_addr(buf_u_minus_v_wr_addr), .a_out(buf_u_minus_v_wr_dout), .a_wr(buf_u_minus_v_wr_en), .a_in(buf_u_minus_v_wr_din), + .b_addr(buf_u_minus_v_rd_addr), .b_out() + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_rd_addr; + wire buf_v_minus_u_wr_en; + wire [ 32-1:0] buf_v_minus_u_wr_din; + wire [ 32-1:0] buf_v_minus_u_wr_dout; + + assign buf_v_minus_u_rd_addr = ~buf_v_minus_u_wr_addr; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_v_minus_u + ( .clk(clk), + .a_addr(buf_v_minus_u_wr_addr), .a_out(buf_v_minus_u_wr_dout), .a_wr(buf_v_minus_u_wr_en), .a_in(buf_v_minus_u_wr_din), + .b_addr(buf_v_minus_u_rd_addr), .b_out() + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_u_half_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_u_half_rd_addr; + wire buf_u_half_wr_en; + wire [ 32-1:0] buf_u_half_wr_din; + wire [ 32-1:0] buf_u_half_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_u_half + ( .clk(clk), + .a_addr(buf_u_half_wr_addr), .a_out(), .a_wr(buf_u_half_wr_en), .a_in(buf_u_half_wr_din), + .b_addr(buf_u_half_rd_addr), .b_out(buf_u_half_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_v_half_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_v_half_rd_addr; + wire buf_v_half_wr_en; + wire [ 32-1:0] buf_v_half_wr_din; + wire [ 32-1:0] buf_v_half_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_v_half + ( .clk(clk), + .a_addr(buf_v_half_wr_addr), .a_out(), .a_wr(buf_v_half_wr_en), .a_in(buf_v_half_wr_din), + .b_addr(buf_v_half_rd_addr), .b_out(buf_v_half_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_rd_addr; + wire buf_u_minus_v_half_wr_en; + wire [ 32-1:0] buf_u_minus_v_half_wr_din; + wire [ 32-1:0] buf_u_minus_v_half_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_u_minus_v_half + ( .clk(clk), + .a_addr(buf_u_minus_v_half_wr_addr), .a_out(), .a_wr(buf_u_minus_v_half_wr_en), .a_in(buf_u_minus_v_half_wr_din), + .b_addr(buf_u_minus_v_half_rd_addr), .b_out(buf_u_minus_v_half_rd_dout) + ); + + wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_wr_addr; + wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_rd_addr; + wire buf_v_minus_u_half_wr_en; + wire [ 32-1:0] buf_v_minus_u_half_wr_din; + wire [ 32-1:0] buf_v_minus_u_half_rd_dout; + + bram_1rw_1ro_readfirst # + ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS) + ) + buf_v_minus_u_half + ( .clk(clk), + .a_addr(buf_v_minus_u_half_wr_addr), .a_out(), .a_wr(buf_v_minus_u_half_wr_en), .a_in(buf_v_minus_u_half_wr_din), + .b_addr(buf_v_minus_u_half_rd_addr), .b_out(buf_v_minus_u_half_rd_dout) + ); + + + // + // Helper Modules + // + wire helper_init_ena; + wire helper_invert_precalc_ena; + wire helper_invert_compare_ena; + wire helper_invert_update_ena; + wire helper_reduce_precalc_ena; + wire helper_reduce_update_ena; + wire helper_copy_ena; + + wire helper_init_rdy; + wire helper_invert_precalc_rdy; + wire helper_invert_compare_rdy; + wire helper_invert_update_rdy; + wire helper_reduce_precalc_rdy; + wire helper_reduce_update_rdy; + wire helper_copy_rdy; + + wire helper_init_done = helper_init_rdy && !helper_init_ena; + wire helper_invert_precalc_done = helper_invert_precalc_rdy && !helper_invert_precalc_ena; + wire helper_invert_compare_done = helper_invert_compare_rdy && !helper_invert_compare_ena; + wire helper_invert_update_done = helper_invert_update_rdy && !helper_invert_update_ena; + wire helper_reduce_precalc_done = helper_reduce_precalc_rdy && !helper_reduce_precalc_ena; + wire helper_reduce_update_done = helper_reduce_update_rdy && !helper_reduce_update_ena; + wire helper_copy_done = helper_copy_rdy && !helper_copy_ena; + + + // + // Helper Module - Initialization + // + wire [ BUFFER_ADDR_BITS-1:0] helper_init_r_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_init_s_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_init_u_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_init_v_addr; + wire [OPERAND_ADDR_BITS-1:0] helper_init_q_addr; + + wire helper_init_r_wren; + wire helper_init_s_wren; + wire helper_init_u_wren; + wire helper_init_v_wren; + + wire [ 32-1:0] helper_init_r_data; + wire [ 32-1:0] helper_init_s_data; + wire [ 32-1:0] helper_init_u_data; + wire [ 32-1:0] helper_init_v_data; + + modinv_helper_init # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS), + + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_init + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_init_ena), + .rdy (helper_init_rdy), + + .a_addr (a_addr), + .q_addr (helper_init_q_addr), + + .r_addr (helper_init_r_addr), + .s_addr (helper_init_s_addr), + .u_addr (helper_init_u_addr), + .v_addr (helper_init_v_addr), + + .q_din (q_din), + .a_din (a_din), + + .r_dout (helper_init_r_data), + .s_dout (helper_init_s_data), + .u_dout (helper_init_u_data), + .v_dout (helper_init_v_data), + + .r_wren (helper_init_r_wren), + .s_wren (helper_init_s_wren), + .u_wren (helper_init_u_wren), + .v_wren (helper_init_v_wren) + ); + + + // + // Helper Module - Inversion Pre-Calculation + // + wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_r_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_s_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_u_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_v_addr; + + modinv_helper_invert_precalc # + ( + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_invert_precalc + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_invert_precalc_ena), + .rdy (helper_invert_precalc_rdy), + + .r_addr (helper_invert_precalc_r_addr), + .s_addr (helper_invert_precalc_s_addr), + .u_addr (helper_invert_precalc_u_addr), + .v_addr (helper_invert_precalc_v_addr), + + .r_din (buf_r_rd_dout), + .s_din (buf_s_rd_dout), + .u_din (buf_u_rd_dout), + .v_din (buf_v_rd_dout), + + .r_dbl_addr (buf_r_dbl_wr_addr), + .s_dbl_addr (buf_s_dbl_wr_addr), + .r_plus_s_addr (buf_r_plus_s_wr_addr), + + .u_half_addr (buf_u_half_wr_addr), + .v_half_addr (buf_v_half_wr_addr), + .u_minus_v_addr (buf_u_minus_v_wr_addr), + .v_minus_u_addr (buf_v_minus_u_wr_addr), + .u_minus_v_half_addr (buf_u_minus_v_half_wr_addr), + .v_minus_u_half_addr (buf_v_minus_u_half_wr_addr), + + .r_dbl_dout (buf_r_dbl_wr_din), + .s_dbl_dout (buf_s_dbl_wr_din), + .r_plus_s_dout (buf_r_plus_s_wr_din), + + .u_half_dout (buf_u_half_wr_din), + .v_half_dout (buf_v_half_wr_din), + .u_minus_v_dout (buf_u_minus_v_wr_din), + .v_minus_u_dout (buf_v_minus_u_wr_din), + .u_minus_v_half_dout (buf_u_minus_v_half_wr_din), + .v_minus_u_half_dout (buf_v_minus_u_half_wr_din), + + .r_dbl_wren (buf_r_dbl_wr_en), + .s_dbl_wren (buf_s_dbl_wr_en), + .r_plus_s_wren (buf_r_plus_s_wr_en), + + .u_half_wren (buf_u_half_wr_en), + .v_half_wren (buf_v_half_wr_en), + .u_minus_v_wren (buf_u_minus_v_wr_en), + .v_minus_u_wren (buf_v_minus_u_wr_en), + .u_minus_v_half_wren (buf_u_minus_v_half_wr_en), + .v_minus_u_half_wren (buf_v_minus_u_half_wr_en), + + .u_minus_v_din (buf_u_minus_v_wr_dout), + .v_minus_u_din (buf_v_minus_u_wr_dout) + ); + + + // + // Helper Module - Inversion Comparison + // + wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_u_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_v_addr; + + wire flag_invert_u_gt_v; + wire flag_invert_v_eq_1; + wire flag_invert_u_is_even; + wire flag_invert_v_is_even; + + modinv_helper_invert_compare # + ( + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_invert_compare + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_invert_compare_ena), + .rdy (helper_invert_compare_rdy), + + .u_addr (helper_invert_compare_u_addr), + .v_addr (helper_invert_compare_v_addr), + + .u_din (buf_u_rd_dout), + .v_din (buf_v_rd_dout), + + .u_gt_v (flag_invert_u_gt_v), + .v_eq_1 (flag_invert_v_eq_1), + .u_is_even (flag_invert_u_is_even), + .v_is_even (flag_invert_v_is_even) + ); + + + // + // Helper Module - Inversion Update + // + wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_r_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_s_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_u_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_v_addr; + + wire helper_invert_update_r_wren; + wire helper_invert_update_s_wren; + wire helper_invert_update_u_wren; + wire helper_invert_update_v_wren; + + wire [ 32-1:0] helper_invert_update_r_data; + wire [ 32-1:0] helper_invert_update_s_data; + wire [ 32-1:0] helper_invert_update_u_data; + wire [ 32-1:0] helper_invert_update_v_data; + + modinv_helper_invert_update # + ( + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_invert_update + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_invert_update_ena), + .rdy (helper_invert_update_rdy), + + .u_gt_v (flag_invert_u_gt_v), + .v_eq_1 (flag_invert_v_eq_1), + .u_is_even (flag_invert_u_is_even), + .v_is_even (flag_invert_v_is_even), + + .r_addr (helper_invert_update_r_addr), + .s_addr (helper_invert_update_s_addr), + .u_addr (helper_invert_update_u_addr), + .v_addr (helper_invert_update_v_addr), + + .r_wren (helper_invert_update_r_wren), + .s_wren (helper_invert_update_s_wren), + .u_wren (helper_invert_update_u_wren), + .v_wren (helper_invert_update_v_wren), + + .r_dout (helper_invert_update_r_data), + .s_dout (helper_invert_update_s_data), + .u_dout (helper_invert_update_u_data), + .v_dout (helper_invert_update_v_data), + + .r_dbl_addr (buf_r_dbl_rd_addr), + .s_dbl_addr (buf_s_dbl_rd_addr), + .r_plus_s_addr (buf_r_plus_s_rd_addr), + .u_half_addr (buf_u_half_rd_addr), + .v_half_addr (buf_v_half_rd_addr), + .u_minus_v_half_addr (buf_u_minus_v_half_rd_addr), + .v_minus_u_half_addr (buf_v_minus_u_half_rd_addr), + + .r_dbl_din (buf_r_dbl_rd_dout), + .s_dbl_din (buf_s_dbl_rd_dout), + .r_plus_s_din (buf_r_plus_s_rd_dout), + .u_half_din (buf_u_half_rd_dout), + .v_half_din (buf_v_half_rd_dout), + .u_minus_v_half_din (buf_u_minus_v_half_rd_dout), + .v_minus_u_half_din (buf_v_minus_u_half_rd_dout) + ); + + + // + // Helper Module - Reduction Pre-Calculation + // + wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_r_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_s_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_u_addr; + wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_v_addr; + wire [OPERAND_ADDR_BITS-1:0] helper_reduce_precalc_q_addr; + + wire helper_reduce_precalc_r_wren; + wire helper_reduce_precalc_u_wren; + wire helper_reduce_precalc_v_wren; + + wire [ 32-1:0] helper_reduce_precalc_r_data; + wire [ 32-1:0] helper_reduce_precalc_u_data; + wire [ 32-1:0] helper_reduce_precalc_v_data; + + wire flag_reduce_s_is_odd; + wire flag_invert_k_is_nul; + + modinv_helper_reduce_precalc # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS), + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS), + .K_NUM_BITS (K_NUM_BITS) + ) + helper_reduce_precalc + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_reduce_precalc_ena), + .rdy (helper_reduce_precalc_rdy), + + .r_addr (helper_reduce_precalc_r_addr), + .s_addr (helper_reduce_precalc_s_addr), + .u_addr (helper_reduce_precalc_u_addr), + .v_addr (helper_reduce_precalc_v_addr), + .q_addr (helper_reduce_precalc_q_addr), + + .k (k), + + .s_is_odd (flag_reduce_s_is_odd), + .k_is_nul (flag_reduce_k_is_nul), + + .r_din (buf_r_wr_dout), + .s_din (buf_s_rd_dout), + .q_din (q_din), + + .r_wren (helper_reduce_precalc_r_wren), + .u_wren (helper_reduce_precalc_u_wren), + .v_wren (helper_reduce_precalc_v_wren), + + .r_dout (helper_reduce_precalc_r_data), + .u_dout (helper_reduce_precalc_u_data), + .v_dout (helper_reduce_precalc_v_data) + ); + + // + // Helper Module - Reduction Update + // + wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_s_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_u_addr; + wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_v_addr; + + wire helper_reduce_update_s_wren; + + wire [ 32-1:0] helper_reduce_update_s_data; + + modinv_helper_reduce_update # + ( + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_reduce_update + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_reduce_update_ena), + .rdy (helper_reduce_update_rdy), + + .s_is_odd (flag_reduce_s_is_odd), + .k_is_nul (flag_reduce_k_is_nul), + + .s_addr (helper_reduce_update_s_addr), + .u_addr (helper_reduce_update_u_addr), + .v_addr (helper_reduce_update_v_addr), + + .s_wren (helper_reduce_update_s_wren), + + .s_dout (helper_reduce_update_s_data), + + .u_din (buf_u_rd_dout), + .v_din (buf_v_rd_dout) + ); + + + // + // Helper Module - Copying + // + wire [BUFFER_ADDR_BITS-1:0] helper_copy_s_addr; + + modinv_helper_copy # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS), + + .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS), + .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS) + ) + helper_copy + ( + .clk (clk), + .rst_n (rst_n), + + .ena (helper_copy_ena), + .rdy (helper_copy_rdy), + + .s_addr (helper_copy_s_addr), + .a1_addr (a1_addr), + + .s_din (buf_s_rd_dout), + + .a1_dout (a1_dout), + + .a1_wren (a1_wren) + ); + + + // + // Round Counter + // + reg [ROUND_COUNTER_BITS-1:0] round_counter; + wire [ROUND_COUNTER_BITS-1:0] round_counter_max = LOOP_NUM_ROUNDS - 1; + wire [ROUND_COUNTER_BITS-1:0] round_counter_zero = {ROUND_COUNTER_BITS{1'b0}}; + wire [ROUND_COUNTER_BITS-1:0] round_counter_next = + (round_counter < round_counter_max) ? round_counter + 1'b1 : round_counter_zero; + + + // + // FSM + // + localparam FSM_STATE_IDLE = 4'd0; + + localparam FSM_STATE_INIT = 4'd1; + + localparam FSM_STATE_INVERT_PRECALC = 4'd11; + localparam FSM_STATE_INVERT_COMPARE = 4'd12; + localparam FSM_STATE_INVERT_UPDATE = 4'd13; + + localparam FSM_STATE_REDUCE_PRECALC = 4'd14; + localparam FSM_STATE_REDUCE_UPDATE = 4'd15; + + localparam FSM_STATE_COPY = 4'd2; + + localparam FSM_STATE_DONE = 4'd3; + + reg [3:0] fsm_state = FSM_STATE_IDLE; + reg [3:0] fsm_state_dly = FSM_STATE_IDLE; + + wire fsm_state_new = (fsm_state != fsm_state_dly); + + wire [3:0] fsm_state_invert_next = (round_counter < round_counter_max) ? + FSM_STATE_INVERT_PRECALC : FSM_STATE_REDUCE_PRECALC; + + wire [3:0] fsm_state_reduce_next = (round_counter < round_counter_max) ? + FSM_STATE_REDUCE_PRECALC : FSM_STATE_COPY; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE; + else case (fsm_state) + FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_INIT : FSM_STATE_IDLE; + FSM_STATE_INIT: fsm_state <= helper_init_done ? FSM_STATE_INVERT_PRECALC : FSM_STATE_INIT; + FSM_STATE_INVERT_PRECALC: fsm_state <= helper_invert_precalc_done ? FSM_STATE_INVERT_COMPARE : FSM_STATE_INVERT_PRECALC; + FSM_STATE_INVERT_COMPARE: fsm_state <= helper_invert_compare_done ? FSM_STATE_INVERT_UPDATE : FSM_STATE_INVERT_COMPARE; + FSM_STATE_INVERT_UPDATE: fsm_state <= helper_invert_update_done ? fsm_state_invert_next : FSM_STATE_INVERT_UPDATE; + FSM_STATE_REDUCE_PRECALC: fsm_state <= helper_reduce_precalc_done ? FSM_STATE_REDUCE_UPDATE : FSM_STATE_REDUCE_PRECALC; + FSM_STATE_REDUCE_UPDATE: fsm_state <= helper_reduce_update_done ? fsm_state_reduce_next : FSM_STATE_REDUCE_UPDATE; + FSM_STATE_COPY: fsm_state <= helper_copy_done ? FSM_STATE_DONE : FSM_STATE_COPY; + FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE; + default: fsm_state <= FSM_STATE_IDLE; + endcase + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) fsm_state_dly <= FSM_STATE_IDLE; + else fsm_state_dly <= fsm_state; + + + assign helper_init_ena = (fsm_state == FSM_STATE_INIT) && fsm_state_new; + assign helper_invert_precalc_ena = (fsm_state == FSM_STATE_INVERT_PRECALC) && fsm_state_new; + assign helper_invert_compare_ena = (fsm_state == FSM_STATE_INVERT_COMPARE) && fsm_state_new; + assign helper_invert_update_ena = (fsm_state == FSM_STATE_INVERT_UPDATE) && fsm_state_new; + assign helper_reduce_precalc_ena = (fsm_state == FSM_STATE_REDUCE_PRECALC) && fsm_state_new; + assign helper_reduce_update_ena = (fsm_state == FSM_STATE_REDUCE_UPDATE) && fsm_state_new; + assign helper_copy_ena = (fsm_state == FSM_STATE_COPY) && fsm_state_new; + + + // + // Counter Increment + // + always @(posedge clk) begin + // + if ((fsm_state == FSM_STATE_INIT) && helper_init_done) + round_counter <= round_counter_zero; + // + if ((fsm_state == FSM_STATE_INVERT_UPDATE) && helper_invert_update_done) + round_counter <= round_counter_next; + // + if ((fsm_state == FSM_STATE_REDUCE_UPDATE) && helper_reduce_update_done) + round_counter <= round_counter_next; + // + end + + + // + // Q Address Selector + // + always @(*) begin + // + case (fsm_state) + FSM_STATE_INIT: q_addr = helper_init_q_addr; + FSM_STATE_REDUCE_PRECALC: q_addr = helper_reduce_precalc_q_addr; + default: q_addr = {OPERAND_ADDR_BITS{1'bX}}; + endcase + // + end + + + // + // Buffer Address Selector + // + always @(*) begin + // + // Write Ports + // + case (fsm_state) + FSM_STATE_INIT: buf_r_wr_addr = helper_init_r_addr; + FSM_STATE_INVERT_UPDATE: buf_r_wr_addr = helper_invert_update_r_addr; + FSM_STATE_REDUCE_PRECALC: buf_r_wr_addr = helper_reduce_precalc_r_addr; + default: buf_r_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_s_wr_addr = helper_init_s_addr; + FSM_STATE_INVERT_UPDATE: buf_s_wr_addr = helper_invert_update_s_addr; + FSM_STATE_REDUCE_UPDATE: buf_s_wr_addr = helper_reduce_update_s_addr; + default: buf_s_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_u_wr_addr = helper_init_u_addr; + FSM_STATE_INVERT_UPDATE: buf_u_wr_addr = helper_invert_update_u_addr; + FSM_STATE_REDUCE_PRECALC: buf_u_wr_addr = helper_reduce_precalc_u_addr; + default: buf_u_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_v_wr_addr = helper_init_v_addr; + FSM_STATE_INVERT_UPDATE: buf_v_wr_addr = helper_invert_update_v_addr; + FSM_STATE_REDUCE_PRECALC: buf_v_wr_addr = helper_reduce_precalc_v_addr; + default: buf_v_wr_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + // Read Ports + // + case (fsm_state) + FSM_STATE_INVERT_PRECALC: buf_r_rd_addr = helper_invert_precalc_r_addr; + default: buf_r_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INVERT_PRECALC: buf_s_rd_addr = helper_invert_precalc_s_addr; + FSM_STATE_REDUCE_PRECALC: buf_s_rd_addr = helper_reduce_precalc_s_addr; + FSM_STATE_COPY: buf_s_rd_addr = helper_copy_s_addr; + default: buf_s_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INVERT_PRECALC: buf_u_rd_addr = helper_invert_precalc_u_addr; + FSM_STATE_INVERT_COMPARE: buf_u_rd_addr = helper_invert_compare_u_addr; + FSM_STATE_REDUCE_UPDATE: buf_u_rd_addr = helper_reduce_update_u_addr; + default: buf_u_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INVERT_PRECALC: buf_v_rd_addr = helper_invert_precalc_v_addr; + FSM_STATE_INVERT_COMPARE: buf_v_rd_addr = helper_invert_compare_v_addr; + FSM_STATE_REDUCE_UPDATE: buf_v_rd_addr = helper_reduce_update_v_addr; + default: buf_v_rd_addr = {BUFFER_ADDR_BITS{1'bX}}; + endcase + // + end + + + // + // Buffer Write Enable Logic + // + always @(*) begin + // + // Write Ports + // + case (fsm_state) + FSM_STATE_INIT: buf_r_wr_en = helper_init_r_wren; + FSM_STATE_INVERT_UPDATE: buf_r_wr_en = helper_invert_update_r_wren; + FSM_STATE_REDUCE_PRECALC: buf_r_wr_en = helper_reduce_precalc_r_wren; + default: buf_r_wr_en = 1'b0; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_s_wr_en = helper_init_s_wren; + FSM_STATE_INVERT_UPDATE: buf_s_wr_en = helper_invert_update_s_wren; + FSM_STATE_REDUCE_UPDATE: buf_s_wr_en = helper_reduce_update_s_wren; + default: buf_s_wr_en = 1'b0; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_u_wr_en = helper_init_u_wren; + FSM_STATE_INVERT_UPDATE: buf_u_wr_en = helper_invert_update_u_wren; + FSM_STATE_REDUCE_PRECALC: buf_u_wr_en = helper_reduce_precalc_u_wren; + default: buf_u_wr_en = 1'b0; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_v_wr_en = helper_init_v_wren; + FSM_STATE_INVERT_UPDATE: buf_v_wr_en = helper_invert_update_v_wren; + FSM_STATE_REDUCE_PRECALC: buf_v_wr_en = helper_reduce_precalc_v_wren; + default: buf_v_wr_en = 1'b0; + endcase + // + end + + + // + // Buffer Write Data Selector + // + always @(*) begin + // + case (fsm_state) + FSM_STATE_INIT: buf_r_wr_din = helper_init_r_data; + FSM_STATE_INVERT_UPDATE: buf_r_wr_din = helper_invert_update_r_data; + FSM_STATE_REDUCE_PRECALC: buf_r_wr_din = helper_reduce_precalc_r_data; + default: buf_r_wr_din = {32{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_s_wr_din = helper_init_s_data; + FSM_STATE_INVERT_UPDATE: buf_s_wr_din = helper_invert_update_s_data; + FSM_STATE_REDUCE_UPDATE: buf_s_wr_din = helper_reduce_update_s_data; + default: buf_s_wr_din = {32{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_u_wr_din = helper_init_u_data; + FSM_STATE_INVERT_UPDATE: buf_u_wr_din = helper_invert_update_u_data; + FSM_STATE_REDUCE_PRECALC: buf_u_wr_din = helper_reduce_precalc_u_data; + default: buf_u_wr_din = {32{1'bX}}; + endcase + // + case (fsm_state) + FSM_STATE_INIT: buf_v_wr_din = helper_init_v_data; + FSM_STATE_INVERT_UPDATE: buf_v_wr_din = helper_invert_update_v_data; + FSM_STATE_REDUCE_PRECALC: buf_v_wr_din = helper_reduce_precalc_v_data; + default: buf_v_wr_din = {32{1'bX}}; + endcase + // + end + + + // + // Ready Logic + // + reg rdy_reg = 1'b1; + + assign rdy = rdy_reg; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) rdy_reg <= 1'b1; + else begin + + /* clear */ + if (rdy && ena) rdy_reg <= 1'b0; + + /* set */ + if (!rdy && (fsm_state == FSM_STATE_DONE)) rdy_reg <= 1'b1; + + end + + + // + // Store Redundant Power of 2 (K) + // + always @(posedge clk) + // + if (helper_init_ena) + k <= {K_NUM_BITS{1'b0}}; + else begin + + if (helper_invert_update_ena && !flag_invert_v_eq_1) + k <= k + 1'b1; + + if (helper_reduce_update_ena && (k != {K_NUM_BITS{1'b0}})) + k <= k - 1'b1; + + end + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_multiplier_384.v b/rtl/modular/modular_multiplier_384.v new file mode 100644 index 0000000..705cd44 --- /dev/null +++ b/rtl/modular/modular_multiplier_384.v @@ -0,0 +1,402 @@ +//------------------------------------------------------------------------------ +// +// modular_multiplier_384.v +// ----------------------------------------------------------------------------- +// Modular multiplier. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_multiplier_384 + ( + clk, rst_n, + ena, rdy, + a_addr, b_addr, n_addr, p_addr, p_wren, + a_din, b_din, n_din, p_dout + ); + + + // + // Constants + // + localparam OPERAND_NUM_WORDS = 12; + localparam WORD_COUNTER_WIDTH = 4; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction + + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word + output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + output wire p_wren; // store current P word now + + input wire [ 31:0] a_din; // A + input wire [ 31:0] b_din; // B + input wire [ 31:0] n_din; // N (must be P-384!) + output wire [ 31:0] p_dout; // P = A * B mod N + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_a; + reg [WORD_COUNTER_WIDTH-1:0] index_b; + + /* map registers to output ports */ + assign a_addr = index_a; + assign b_addr = index_b; + + // + // FSM + // + localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; + wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)]; + wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)]; + wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)]; + + wire inc_index_a = |fsm_shreg_inc_index_a; + wire store_word_a = |fsm_shreg_store_word_a; + wire inc_index_b = |fsm_shreg_inc_index_b; + wire clear_mac_ab = |fsm_shreg_inc_index_b; + wire shift_wide_a = |fsm_shreg_inc_index_b; + wire enable_mac_ab = |fsm_shreg_inc_index_b; + wire store_si_msb = |fsm_shreg_store_si_msb; + wire store_si_lsb = fsm_shreg_store_si_lsb; + wire shift_si = |fsm_shreg_shift_si; + wire mask_cw1_sum = fsm_shreg_mask_cw1_sum; + wire store_c_word = |fsm_shreg_store_c_word; + wire reduce_start = fsm_shreg_reduce_start; + wire reduce_stop = fsm_shreg_reduce_stop; + + + // + // FSM Logic + // + wire reduce_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) + fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else if (!reduce_stop || reduce_done) + fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Word Index Increment Logic + // + reg index_b_ff; + + always @(posedge clk) + // + if (inc_index_b) index_b_ff <= ~index_b_ff; + else index_b_ff <= 1'b0; + + always @(posedge clk) + // + if (rdy) begin + // + index_a <= WORD_INDEX_ZERO; + index_b <= WORD_INDEX_LAST; + // + end else begin + // + if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); + if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); + // + end + + + // + // Wide Operand Buffer + // + reg [383:0] buf_a_wide; + + always @(posedge clk) + // + if (store_word_a) + buf_a_wide <= {buf_a_wide[16 +: 384 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[384 - 2 * 16 +: 16]}; + else if (shift_wide_a) + buf_a_wide <= {buf_a_wide[384-(16+1):0], buf_a_wide[384-16+:16]}; + + + // + // Multiplier Array + // + wire mac_inhibit; // control signal to pause all accumulators + + wire [46: 0] mac[0:23]; // outputs of all accumulators + reg [23: 0] mac_clear; // individual per-accumulator clear flag + + assign mac_inhibit = ~enable_mac_ab; + + always @(posedge clk) + // + if (!clear_mac_ab) + mac_clear <= {24{1'b1}}; + else begin + + if (mac_clear == {24{1'b1}}) + mac_clear <= {{22{1'b0}}, 1'b1, 1'b0}; + else + mac_clear <= (mac_clear[23] == 1'b0) ? {mac_clear[22:0], 1'b0} : {24{1'b1}}; + + + end + + // + // Array of parallel multipliers + // + genvar i; + generate for (i=0; i<24; i=i+1) + begin : gen_mac_array + // + mac16_wrapper mac16_inst + ( + .clk (clk), + .ce (~mac_inhibit), + + .clr (mac_clear[i]), + + .a (buf_a_wide[16*i+:16]), + .b (index_b_ff ? b_din[15:0] : b_din[31:16]), + .s (mac[i]) + ); + // + end + endgenerate + + // + // Intermediate Words + // + reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; + reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; + + + wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; + wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; + + generate for (i=0; i<24; i=i+1) + begin : gen_si_lsb_new + assign si_lsb_new[47*i+:47] = mac[23-i]; + end + endgenerate + + generate for (i=1; i<24; i=i+1) + begin : gen_si_msb_new + assign si_msb_new[47*(23-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(23-i)+:47]; + end + endgenerate + + always @(posedge clk) begin + // + if (shift_si) begin + si_msb <= {{2*47{1'b0}}, si_msb[23*47-1:2*47]}; + si_lsb <= {si_msb[2*47-1:0], si_lsb[24*47-1:2*47]}; + end else begin + + if (store_si_msb) + si_msb <= si_msb_new; + + if (store_si_lsb) + si_lsb <= si_lsb_new; + end + + end + + + // + // Accumulators + // + wire [46: 0] add47_cw0_s; + wire [46: 0] add47_cw1_s; + + + // + // cw0, b, cw1, b + // + reg [30: 0] si_prev_dly; + reg [15: 0] si_next_dly; + + always @(posedge clk) + // + if (shift_si) + si_prev_dly <= si_lsb[93:63]; + else + si_prev_dly <= {31{1'b0}}; + + always @(posedge clk) + // + si_next_dly <= si_lsb[62:47]; + + wire [46: 0] add47_cw0_a = si_lsb[46:0]; + wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; + + wire [46: 0] add47_cw1_a = add47_cw0_s; + wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}}; + + adder47_wrapper add47_cw0_inst + ( + .clk (clk), + .a (add47_cw0_a), + .b (add47_cw0_b), + .s (add47_cw0_s) + ); + + adder47_wrapper add47_cw1_inst + ( + .clk (clk), + .a (add47_cw1_a), + .b (add47_cw1_b), + .s (add47_cw1_s) + ); + + + + // + // Full-Size Product + // + reg [WORD_COUNTER_WIDTH:0] bram_c_addr; + + wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; + wire [ 31:0] reduce_c_word; + + always @(posedge clk) + // + if (store_c_word) + bram_c_addr <= bram_c_addr + 1'b1; + else + bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}}; + + bram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1) + ) + bram_c_inst + ( + .clk (clk), + + .a_addr (bram_c_addr), + .a_wr (store_c_word), + .a_in (add47_cw1_s[31:0]), + .a_out (), + + .b_addr (reduce_c_addr), + .b_out (reduce_c_word) + ); + + + // + // Reduction Stage + // + modular_reductor_384 reduce_384_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_done), + + .x_addr (reduce_c_addr), + .n_addr (n_addr), + .p_addr (p_addr), + .p_wren (p_wren), + + .x_din (reduce_c_word), + .n_din (n_din), + .p_dout (p_dout) + ); + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_reductor_384.v b/rtl/modular/modular_reductor_384.v new file mode 100644 index 0000000..4aea5fe --- /dev/null +++ b/rtl/modular/modular_reductor_384.v @@ -0,0 +1,739 @@ +//------------------------------------------------------------------------------ +// +// modular_reductor_384.v +// ----------------------------------------------------------------------------- +// Modular reductor. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_reductor_384 + ( + clk, rst_n, + ena, rdy, + x_addr, n_addr, p_addr, p_wren, + x_din, n_din, p_dout + ); + + // + // Constants + // + localparam OPERAND_NUM_WORDS = 12; + localparam WORD_COUNTER_WIDTH = 4; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST; + input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + output wire p_wren; // store current P word now + + input wire [ 31:0] x_din; // X + input wire [ 31:0] n_din; // N (must be P-256!) + output wire [ 31:0] p_dout; // P = X mod N + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH:0] index_x; + + + /* map registers to output ports */ + assign x_addr = index_x; + + + // + // FSM + // + localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS]; + wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS]; + wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5]; + + wire [5-1:0] fsm_shreg_reduce_stage_start; + wire [5-1:0] fsm_shreg_reduce_stage_stop; + + genvar s; + generate for (s=0; s<5; s=s+1) + begin : gen_fsm_shreg_reduce_stages + assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1]; + assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2]; + end + endgenerate + + wire inc_index_x = |fsm_shreg_inc_index_x; + wire store_word_z = |fsm_shreg_store_word_z; + wire reduce_start = |fsm_shreg_reduce_stage_start; + wire reduce_stop = |fsm_shreg_reduce_stage_stop; + wire store_p = fsm_shreg_reduce_stage_stop[0]; + + + wire reduce_adder0_done; + wire reduce_adder1_done; + wire reduce_subtractor_done; + + wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) + // + fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else if (!reduce_stop || reduce_done_all) + // + fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) + // + index_x <= WORD_INDEX_LAST; + // + else if (inc_index_x) + // + index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x); + + + // + // Look-up Table + // + + // + // Take a look at the corresponding C model for more information + // on how exactly the math behind reduction works. The first step + // is to assemble nine 384-bit values ("z-words") from 32-bit parts + // of the full 768-bit product ("c-word"). The problem with z10 is + // that it contains c23 two times. This implementation scans from + // c23 to c0 and writes current part of c-word into corresponding + // parts of z-words. Since those 32-bit parts are stored in block + // memories, one source word can only be written to one location in + // every z-word at a time. The trick is to delay c23 and then write + // the delayed value at the corresponding location in z10 instead of + // the next c22. "z_save" flag is used to indicate that the current + // word should be delayed and written once again during the next cycle. + // + + + reg [10*WORD_COUNTER_WIDTH-1:0] z_addr; // + reg [10 -1:0] z_wren; // + reg [10 -1:0] z_mask; // mask input to store zero word + reg [10 -1:0] z_save; // save previous word once again + + always @(posedge clk) + // + if (inc_index_x) + // + case (index_x) + // + // s10 s9 s8 s7 s6 s5 s4 s3 s2 s1 + // || || || || || || || || || || + 5'd00: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd00}; + 5'd01: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd01}; + 5'd02: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd02}; + 5'd03: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd03}; + 5'd04: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd04}; + 5'd05: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd05}; + 5'd06: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd06}; + 5'd07: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd07}; + 5'd08: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd08}; + 5'd09: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd09}; + 5'd10: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd10}; + 5'd11: z_addr <= {4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'dxx, 4'd11}; + + 5'd12: z_addr <= {4'd00, 4'd00, 4'd01, 4'd01, 4'd00, 4'd04, 4'd03, 4'd00, 4'd00, 4'dxx}; + 5'd13: z_addr <= {4'd01, 4'd05, 4'd02, 4'd02, 4'd01, 4'd05, 4'd04, 4'd01, 4'd01, 4'dxx}; + 5'd14: z_addr <= {4'd02, 4'd06, 4'd03, 4'd06, 4'd02, 4'd06, 4'd05, 4'd02, 4'd02, 4'dxx}; + 5'd15: z_addr <= {4'd05, 4'd07, 4'd04, 4'd07, 4'd03, 4'd07, 4'd06, 4'd03, 4'd03, 4'dxx}; + 5'd16: z_addr <= {4'd06, 4'd08, 4'd05, 4'd08, 4'd08, 4'd08, 4'd07, 4'd04, 4'd07, 4'dxx}; + 5'd17: z_addr <= {4'd07, 4'd09, 4'd06, 4'd09, 4'd09, 4'd09, 4'd08, 4'd05, 4'd08, 4'dxx}; + 5'd18: z_addr <= {4'd08, 4'd10, 4'd07, 4'd10, 4'd10, 4'd10, 4'd09, 4'd06, 4'd09, 4'dxx}; + 5'd19: z_addr <= {4'd09, 4'd11, 4'd08, 4'd11, 4'd11, 4'd11, 4'd10, 4'd07, 4'd10, 4'dxx}; + 5'd20: z_addr <= {4'd10, 4'd01, 4'd09, 4'd00, 4'd04, 4'd03, 4'd11, 4'd08, 4'd11, 4'dxx}; + 5'd21: z_addr <= {4'd11, 4'd02, 4'd10, 4'd03, 4'd05, 4'd00, 4'd00, 4'd09, 4'd04, 4'dxx}; + 5'd22: z_addr <= {4'd04, 4'd03, 4'd11, 4'd04, 4'd06, 4'd02, 4'd01, 4'd10, 4'd05, 4'dxx}; + 5'd23: z_addr <= {4'd03, 4'd04, 4'd00, 4'd05, 4'd07, 4'd01, 4'd02, 4'd11, 4'd06, 4'dxx}; + // + default: z_addr <= {10*WORD_COUNTER_WIDTH{1'bX}}; + // + endcase + + always @(posedge clk) + // + case (index_x) + // + // 10 9 8 7 6 5 4 3 2 1 + // | | | | | | | | | | + 5'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd08: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd09: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd10: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + 5'd11: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; + + 5'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd16: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd17: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd18: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd19: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd20: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd21: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd22: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + 5'd23: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; + // + default: z_wren <= {10{1'b0}}; + // + endcase + + always @(posedge clk) + // + if (inc_index_x) + // + case (index_x) + // + // 10 9 8 7 6 5 4 3 2 1 + // | | | | | | | | | | + 5'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd08: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd10: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd11: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + + 5'd12: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd13: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd14: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd15: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd16: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd17: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd18: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd19: z_mask <= {1'b1, 1'b1, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd20: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0}; + 5'd21: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd22: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd23: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + // + default: z_mask <= {10{1'bX}}; + // + endcase + + always @(posedge clk) + // + if (inc_index_x) + // + case (index_x) + // + // 10 9 8 7 6 5 4 3 2 1 + // | | | | | | | | | | + 5'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + + 5'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd16: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd17: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd18: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd19: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd20: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd21: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd22: z_save <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + 5'd23: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; + // + default: z_save <= {10{1'bX}}; + // + endcase + + + // + // Intermediate Numbers + // + reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:10]; + wire [ 32-1:0] reduce_z_dout[1:10]; + + reg [31: 0] x_din_dly; + always @(posedge clk) + // + x_din_dly <= x_din; + + + genvar z; + generate for (z=1; z<=10; z=z+1) + // + begin : gen_z_bram + // + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_c_inst + ( + .clk (clk), + + .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]), + .a_wr (z_wren[z-1] & store_word_z), + .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)), + .a_out (), + + .b_addr (reduce_z_addr[z]), + .b_out (reduce_z_dout[z]) + ); + // + end + // + endgenerate + + + + + wire [ 32-1:0] bram_sum0_wr_din; + wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr; + wire bram_sum0_wr_wren; + + wire [ 32-1:0] bram_sum1_wr_din; + wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr; + wire bram_sum1_wr_wren; + + wire [ 32-1:0] bram_diff_wr_din; + wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr; + wire bram_diff_wr_wren; + + wire [ 32-1:0] bram_sum0_rd_dout; + reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr; + + wire [ 32-1:0] bram_sum1_rd_dout; + reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr; + + wire [ 32-1:0] bram_diff_rd_dout; + reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr; + + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_sum0_inst + ( + .clk (clk), + + .a_addr (bram_sum0_wr_addr), + .a_wr (bram_sum0_wr_wren), + .a_in (bram_sum0_wr_din), + .a_out (), + + .b_addr (bram_sum0_rd_addr), + .b_out (bram_sum0_rd_dout) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_sum1_inst + ( + .clk (clk), + + .a_addr (bram_sum1_wr_addr), + .a_wr (bram_sum1_wr_wren), + .a_in (bram_sum1_wr_din), + .a_out (), + + .b_addr (bram_sum1_rd_addr), + .b_out (bram_sum1_rd_dout) + ); + + bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) + bram_diff_inst + ( + .clk (clk), + + .a_addr (bram_diff_wr_addr), + .a_wr (bram_diff_wr_wren), + .a_in (bram_diff_wr_din), + .a_out (), + + .b_addr (bram_diff_rd_addr), + .b_out (bram_diff_rd_dout) + ); + + + wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr; + wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr; + wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr; + + reg [ 32-1:0] adder0_a_din; + reg [ 32-1:0] adder0_b_din; + + reg [ 32-1:0] adder1_a_din; + reg [ 32-1:0] adder1_b_din; + + reg [ 32-1:0] subtractor_a_din; + reg [ 32-1:0] subtractor_b_din; + + // n_addr - only 1 output, because all modules are in sync + + modular_adder # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) + ) + adder_inst0 + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_adder0_done), + + .ab_addr (adder0_ab_addr), + .n_addr (), + .s_addr (bram_sum0_wr_addr), + .s_wren (bram_sum0_wr_wren), + + .a_din (adder0_a_din), + .b_din (adder0_b_din), + .n_din (n_din), + .s_dout (bram_sum0_wr_din) + ); + + modular_adder # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) + ) + adder_inst1 + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_adder1_done), + + .ab_addr (adder1_ab_addr), + .n_addr (), + .s_addr (bram_sum1_wr_addr), + .s_wren (bram_sum1_wr_wren), + + .a_din (adder1_a_din), + .b_din (adder1_b_din), + .n_din (n_din), + .s_dout (bram_sum1_wr_din) + ); + + modular_subtractor # + ( + .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), + .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) + ) + subtractor_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_subtractor_done), + + .ab_addr (subtractor_ab_addr), + .n_addr (n_addr), + .d_addr (bram_diff_wr_addr), + .d_wren (bram_diff_wr_wren), + + .a_din (subtractor_a_din), + .b_din (subtractor_b_din), + .n_din (n_din), + .d_dout (bram_diff_wr_din) + ); + + + // + // Address (Operand) Selector + // + always @(*) + // + case (fsm_shreg_reduce_stage_stop) + // + 5'b10000: begin + reduce_z_addr[ 1] = adder0_ab_addr; + reduce_z_addr[ 2] = adder1_ab_addr; + reduce_z_addr[ 3] = adder0_ab_addr; + reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 8] = subtractor_ab_addr; + reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + 5'b01000: begin + // + reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 4] = adder0_ab_addr; + reduce_z_addr[ 5] = adder1_ab_addr; + reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 9] = subtractor_ab_addr; + reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = adder1_ab_addr; + bram_diff_rd_addr = subtractor_ab_addr; + end + // + 5'b00100: begin + // + reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[6] = adder0_ab_addr; + reduce_z_addr[7] = adder1_ab_addr; + reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[10] = subtractor_ab_addr; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = adder1_ab_addr; + bram_diff_rd_addr = subtractor_ab_addr; + end + // + 5'b00010: begin + // + reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = adder0_ab_addr; + bram_diff_rd_addr = subtractor_ab_addr; + end + // + 5'b00001: begin + // + reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = adder0_ab_addr; + bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_diff_rd_addr = adder0_ab_addr; + end + // + default: begin + reduce_z_addr[ 1] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 2] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 3] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 4] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 5] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 6] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 7] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 8] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[ 9] = {WORD_COUNTER_WIDTH{1'bX}}; + reduce_z_addr[10] = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; + end + // + endcase + + + // + // adder 0 + // + always @(*) begin + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder0_a_din = reduce_z_dout[1]; + 5'b01000: adder0_a_din = bram_sum0_rd_dout; + 5'b00100: adder0_a_din = bram_sum0_rd_dout; + 5'b00010: adder0_a_din = bram_sum0_rd_dout; + 5'b00001: adder0_a_din = bram_sum0_rd_dout; + default: adder0_a_din = {32{1'bX}}; + endcase + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder0_b_din = reduce_z_dout[3]; + 5'b01000: adder0_b_din = reduce_z_dout[4]; + 5'b00100: adder0_b_din = reduce_z_dout[6]; + 5'b00010: adder0_b_din = bram_sum1_rd_dout; + 5'b00001: adder0_b_din = bram_diff_rd_dout; + default: adder0_b_din = {32{1'bX}}; + endcase + // + end + + // + // adder 1 + // + always @(*) begin + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder1_a_din = reduce_z_dout[2]; + 5'b01000: adder1_a_din = bram_sum1_rd_dout; + 5'b00100: adder1_a_din = bram_sum1_rd_dout; + 5'b00010: adder1_a_din = {32{1'bX}}; + 5'b00001: adder1_a_din = {32{1'bX}}; + default: adder1_a_din = {32{1'bX}}; + endcase + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: adder1_b_din = reduce_z_dout[2]; + 5'b01000: adder1_b_din = reduce_z_dout[5]; + 5'b00100: adder1_b_din = reduce_z_dout[7]; + 5'b00010: adder1_b_din = {32{1'bX}}; + 5'b00001: adder1_b_din = {32{1'bX}}; + default: adder1_b_din = {32{1'bX}}; + endcase + // + end + + + // + // subtractor + // + always @(*) begin + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: subtractor_a_din = {32{1'b0}}; + 5'b01000: subtractor_a_din = bram_diff_rd_dout; + 5'b00100: subtractor_a_din = bram_diff_rd_dout; + 5'b00010: subtractor_a_din = bram_diff_rd_dout; + 5'b00001: subtractor_a_din = {32{1'bX}}; + default: subtractor_a_din = {32{1'bX}}; + endcase + // + case (fsm_shreg_reduce_stage_stop) + 5'b10000: subtractor_b_din = reduce_z_dout[8]; + 5'b01000: subtractor_b_din = reduce_z_dout[9]; + 5'b00100: subtractor_b_din = reduce_z_dout[10]; + 5'b00010: subtractor_b_din = {32{1'b0}}; + 5'b00001: subtractor_b_din = {32{1'bX}}; + default: subtractor_b_din = {32{1'bX}}; + endcase + // + end + + + // + // Address Mapping + // + assign p_addr = bram_sum0_wr_addr; + assign p_wren = bram_sum0_wr_wren & store_p; + assign p_dout = bram_sum0_wr_din; + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular/modular_subtractor.v b/rtl/modular/modular_subtractor.v new file mode 100644 index 0000000..322aec4 --- /dev/null +++ b/rtl/modular/modular_subtractor.v @@ -0,0 +1,292 @@ +//------------------------------------------------------------------------------ +// +// modular_subtractor.v +// ----------------------------------------------------------------------------- +// Modular subtractor. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_subtractor + ( + clk, rst_n, + ena, rdy, + ab_addr, n_addr, d_addr, d_wren, + a_din, b_din, n_din, d_dout + ); + + + // + // Parameters + // + parameter OPERAND_NUM_WORDS = 8; + parameter WORD_COUNTER_WIDTH = 3; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] d_addr; // index of current D word + output wire d_wren; // store current D word now + + input wire [ 31:0] a_din; // A + input wire [ 31:0] b_din; // B + input wire [ 31:0] n_din; // N + output wire [ 31:0] d_dout; // D = (A - B) mod N + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_ab; + reg [WORD_COUNTER_WIDTH-1:0] index_n; + reg [WORD_COUNTER_WIDTH-1:0] index_d; + + /* map registers to output ports */ + assign ab_addr = index_ab; + assign n_addr = index_n; + assign d_addr = index_d; + + + // + // Subtractor + // + wire [31: 0] sub32_d; + wire sub32_b_in; + wire sub32_b_out; + + subtractor32_wrapper subtractor32 + ( + .clk (clk), + .a (a_din), + .b (b_din), + .d (sub32_d), + .b_in (sub32_b_in), + .b_out (sub32_b_out) + ); + + + // + // Adder + // + wire [31: 0] add32_s; + wire add32_c_in; + wire add32_c_out; + + adder32_wrapper adder32 + ( + .clk (clk), + .a (sub32_d), + .b (n_din), + .s (add32_s), + .c_in (add32_c_in), + .c_out (add32_c_out) + ); + + + // + // FSM + // + + localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)]; + + wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; + + wire inc_index_ab = |fsm_shreg_inc_index_ab; + wire inc_index_n = |fsm_shreg_inc_index_n; + wire store_dif_ab = |fsm_shreg_store_dif_ab; + wire store_dif_ab_n = |fsm_shreg_store_dif_ab_n; + wire store_data_d = |fsm_shreg_store_data_d; + wire inc_index_d = |fsm_shreg_inc_index_d; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Borrow & Carry Masking Logic + // + reg sub32_b_mask; + reg add32_c_mask; + + + always @(posedge clk) begin + // + sub32_b_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0; + add32_c_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0; + // + end + + assign sub32_b_in = sub32_b_out & ~sub32_b_mask; + assign add32_c_in = add32_c_out & ~add32_c_mask; + + + + // + // Borrow & Carry Latch Logic + // + reg sub32_borrow_latch; + + always @(posedge clk) begin + // + if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out; + // + end + + + // + // Intermediate Results + // + reg [32*OPERAND_NUM_WORDS-1:0] d_ab; + reg [32*OPERAND_NUM_WORDS-1:0] d_ab_n; + + always @(posedge clk) + // + if (store_data_d) begin + // + d_ab <= {{32{1'bX}}, d_ab[32*OPERAND_NUM_WORDS-1:32]}; + d_ab_n <= {{32{1'bX}}, d_ab_n[32*OPERAND_NUM_WORDS-1:32]}; + // + end else begin + // + if (store_dif_ab) d_ab <= {sub32_d, d_ab[32*OPERAND_NUM_WORDS-1:32]}; + if (store_dif_ab_n) d_ab_n <= {add32_s, d_ab_n[32*OPERAND_NUM_WORDS-1:32]}; + // + end + + + // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) begin + // + index_ab <= WORD_INDEX_ZERO; + index_n <= WORD_INDEX_ZERO; + index_d <= WORD_INDEX_ZERO; + // + end else begin + // + if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab); + if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n); + if (inc_index_d) index_d <= WORD_INDEX_NEXT_OR_ZERO(index_d); + // + end + + + // + // Output Sum Selector + // + wire mux_select_ab_n = sub32_borrow_latch; + + + // + // Output Data and Write Enable Logic + // + reg d_wren_reg; + reg [31: 0] d_dout_reg; + wire [31: 0] d_dout_mux = mux_select_ab_n ? d_ab_n[31:0] : d_ab[31:0]; + + assign d_wren = d_wren_reg; + assign d_dout = d_dout_reg; + + always @(posedge clk) + // + if (rdy) begin + // + d_wren_reg <= 1'b0; + d_dout_reg <= {32{1'bX}}; + // + end else begin + // + d_wren_reg <= store_data_d; + d_dout_reg <= store_data_d ? d_dout_mux : {32{1'bX}}; + // + end + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/multiword/mw_comparator.v b/rtl/multiword/mw_comparator.v new file mode 100644 index 0000000..b97a6cf --- /dev/null +++ b/rtl/multiword/mw_comparator.v @@ -0,0 +1,220 @@ +//------------------------------------------------------------------------------ +// +// mw_comparator.v +// ----------------------------------------------------------------------------- +// Multi-word comparator. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module mw_comparator + ( + clk, rst_n, + ena, rdy, + xy_addr, x_din, y_din, + cmp_l, cmp_e, cmp_g + ); + + + // + // Parameters + // + parameter WORD_COUNTER_WIDTH = 3; + parameter OPERAND_NUM_WORDS = 8; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREV_OR_LAST; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREV_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-1:0] xy_addr; // address of current X and Y words + input wire [ 32-1:0] x_din; // current X word + input wire [ 32-1:0] y_din; // current Y word + + output wire cmp_l; // X < Y ? + output wire cmp_e; // X = Y ? + output wire cmp_g; // X > Y ? + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_xy; + + reg reg_cmp_l; + reg reg_cmp_e; + reg reg_cmp_g; + + + // + // Output Mapping + // + assign xy_addr = index_xy; + + assign cmp_l = reg_cmp_l; + assign cmp_e = reg_cmp_e; + assign cmp_g = reg_cmp_g; + + + // + // FSM + // + localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 3; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_dec_index_xy = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_calc_leg = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; + wire fsm_shreg_calc_leg_last = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)]; + + wire dec_index_xy = |fsm_shreg_dec_index_xy; + wire calc_leg = |fsm_shreg_calc_leg; + wire calc_leg_last = fsm_shreg_calc_leg_last; + + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) index_xy <= WORD_INDEX_LAST; + else if (dec_index_xy) index_xy <= WORD_INDEX_PREV_OR_LAST(index_xy); + + + // + // 32-bit Subtractor + // + wire [31: 0] sub32_d_out; + wire sub32_b_in; + wire sub32_b_out; + + subtractor32_wrapper subtractor32_inst + ( + .clk (clk), + + .a (x_din), + .b (y_din), + + .d (sub32_d_out), + + .b_in (sub32_b_in), + .b_out (sub32_b_out) + ); + + + // + // Borrow Masking Logic + // + reg sub32_b_mask; + + always @(posedge clk) + // + sub32_b_mask <= (index_xy == WORD_INDEX_LAST) ? 1'b1 : 1'b0; + + assign sub32_b_in = sub32_b_out & ~sub32_b_mask; + + // + // Output Logic + // + wire cmp_unresolved = !(cmp_l || cmp_g); + + wire cmp_borrow_is_set = (sub32_b_out == 1'b1) ? 1'b1 : 1'b0; + wire cmp_difference_is_nonzero = (sub32_d_out != 32'd0) ? 1'b1 : 1'b0; + + always @(posedge clk) + // + if (rdy) begin + // + if (ena) begin + // + reg_cmp_l <= 1'b0; + reg_cmp_e <= 1'b0; + reg_cmp_g <= 1'b0; + // + end + // + end else if (cmp_unresolved && calc_leg) begin + // + if ( cmp_borrow_is_set) reg_cmp_l <= 1'b1; + if (!cmp_borrow_is_set && cmp_difference_is_nonzero) reg_cmp_g <= 1'b1; + if (!cmp_borrow_is_set && !cmp_difference_is_nonzero && calc_leg_last) reg_cmp_e <= 1'b1; + // + end + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/multiword/mw_mover.v b/rtl/multiword/mw_mover.v new file mode 100644 index 0000000..5db95a7 --- /dev/null +++ b/rtl/multiword/mw_mover.v @@ -0,0 +1,175 @@ +//------------------------------------------------------------------------------ +// +// mw_mover.v +// ----------------------------------------------------------------------------- +// Multi-word data mover. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module mw_mover + ( + clk, rst_n, + ena, rdy, + x_addr, y_addr, y_wren, + x_din, y_dout + ); + + + // + // Parameters + // + parameter WORD_COUNTER_WIDTH = 3; + parameter OPERAND_NUM_WORDS = 8; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-1:0] x_addr; // address of current X word + output wire [WORD_COUNTER_WIDTH-1:0] y_addr; // address of current Y word + output wire y_wren; // store current Y word + + input wire [ 32-1:0] x_din; // current X word + output wire [ 32-1:0] y_dout; // current Y word + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_x; + reg [WORD_COUNTER_WIDTH-1:0] index_y; + + + // + // Output Mapping + // + assign x_addr = index_x; + assign y_addr = index_y; + + + // + // FSM + // + localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 2; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_y = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; + + wire inc_index_x = |fsm_shreg_inc_index_x; + wire inc_index_y = |fsm_shreg_inc_index_y; + wire store_word_y = |fsm_shreg_inc_index_x; + + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Word Index Increment Logic + // + always @(posedge clk) + // + if (rdy) begin + index_x <= WORD_INDEX_ZERO; + index_y <= WORD_INDEX_ZERO; + end else begin + if (inc_index_x) index_x <= WORD_INDEX_NEXT_OR_ZERO(index_x); + if (inc_index_y) index_y <= WORD_INDEX_NEXT_OR_ZERO(index_y); + end + + + // + // Write Enable Logic + // + reg y_wren_reg; + + assign y_wren = y_wren_reg; + + always @(posedge clk) + // + if (rdy) y_wren_reg <= 1'b0; + else y_wren_reg <= store_word_y; + + + // + // Output Logic + // + assign y_dout = x_din; + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/util/bram_1rw_1ro_readfirst.v b/rtl/util/bram_1rw_1ro_readfirst.v new file mode 100644 index 0000000..28782c2 --- /dev/null +++ b/rtl/util/bram_1rw_1ro_readfirst.v @@ -0,0 +1,101 @@ +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module bram_1rw_1ro_readfirst + #(parameter MEM_WIDTH = 32, + parameter MEM_ADDR_BITS = 8) + ( + input wire clk, + + input wire [MEM_ADDR_BITS-1:0] a_addr, + input wire a_wr, + input wire [MEM_WIDTH-1:0] a_in, + output wire [MEM_WIDTH-1:0] a_out, + + input wire [MEM_ADDR_BITS-1:0] b_addr, + output wire [MEM_WIDTH-1:0] b_out + ); + + + // + // BRAM + // + (* RAM_STYLE="BLOCK" *) + reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1]; + + + // + // Initialization + // + /** + integer c; + initial begin + for (c=0; c<(2**MEM_ADDR_BITS); c=c+1) + bram[c] = {MEM_WIDTH{1'b0}}; + end + **/ + + + + // + // Output Registers + // + reg [MEM_WIDTH-1:0] bram_reg_a; + reg [MEM_WIDTH-1:0] bram_reg_b; + + assign a_out = bram_reg_a; + assign b_out = bram_reg_b; + + + // + // Read-Write Port A + // + always @(posedge clk) begin + // + bram_reg_a <= bram[a_addr]; + // + if (a_wr) bram[a_addr] <= a_in; + // + end + + + // + // Read-Only Port B + // + always @(posedge clk) + // + bram_reg_b <= bram[b_addr]; + + +endmodule -- cgit v1.2.3