aboutsummaryrefslogtreecommitdiff
path: root/rtl
diff options
context:
space:
mode:
Diffstat (limited to 'rtl')
-rw-r--r--rtl/curve/curve_dbl_add_256.v1748
-rw-r--r--rtl/curve/curve_mul_256.v1440
-rw-r--r--rtl/curve/rom/brom_p256_delta.v38
-rw-r--r--rtl/curve/rom/brom_p256_g_x.v38
-rw-r--r--rtl/curve/rom/brom_p256_g_y.v40
-rw-r--r--rtl/curve/rom/brom_p256_h_x.v38
-rw-r--r--rtl/curve/rom/brom_p256_h_y.v40
-rw-r--r--rtl/curve/rom/brom_p256_one.v38
-rw-r--r--rtl/curve/rom/brom_p256_q.v40
-rw-r--r--rtl/curve/rom/brom_p256_zero.v40
-rw-r--r--rtl/curve/uop/uop_add_rom.v66
-rw-r--r--rtl/curve/uop/uop_conv_rom.v38
-rw-r--r--rtl/curve/uop/uop_dbl_rom.v58
-rw-r--r--rtl/curve/uop/uop_init_rom.v33
-rw-r--r--rtl/curve/uop_ecdsa.v50
-rw-r--r--rtl/ecdsa256.v180
-rw-r--r--rtl/ecdsa256_wrapper.v56
-rw-r--r--rtl/lowlevel/adder32_wrapper.v73
-rw-r--r--rtl/lowlevel/adder47_wrapper.v69
-rw-r--r--rtl/lowlevel/artix7/adder32_artix7.v96
-rw-r--r--rtl/lowlevel/artix7/adder47_artix7.v91
-rw-r--r--rtl/lowlevel/artix7/dsp48e1_wrapper.v159
-rw-r--r--rtl/lowlevel/artix7/mac16_artix7.v90
-rw-r--r--rtl/lowlevel/artix7/subtractor32_artix7.v94
-rw-r--r--rtl/lowlevel/ecdsa_lowlevel_settings.v17
-rw-r--r--rtl/lowlevel/generic/adder32_generic.v67
-rw-r--r--rtl/lowlevel/generic/adder47_generic.v64
-rw-r--r--rtl/lowlevel/generic/mac16_generic.v74
-rw-r--r--rtl/lowlevel/generic/subtractor32_generic.v67
-rw-r--r--rtl/lowlevel/mac16_wrapper.v75
-rw-r--r--rtl/lowlevel/subtractor32_wrapper.v72
-rw-r--r--rtl/modular/modular_adder.v298
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_copy.v148
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_init.v172
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v286
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v408
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v257
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v328
-rw-r--r--rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v153
-rw-r--r--rtl/modular/modular_invertor/modinv_clog2.v10
-rw-r--r--rtl/modular/modular_invertor/modular_invertor.v981
-rw-r--r--rtl/modular/modular_multiplier_256.v804
-rw-r--r--rtl/modular/modular_reductor_256.v1384
-rw-r--r--rtl/modular/modular_subtractor.v292
-rw-r--r--rtl/multiword/mw_comparator.v220
-rw-r--r--rtl/multiword/mw_mover.v175
-rw-r--r--rtl/util/bram_1rw_1ro_readfirst.v101
47 files changed, 2962 insertions, 8144 deletions
diff --git a/rtl/curve/curve_dbl_add_256.v b/rtl/curve/curve_dbl_add_256.v
index 8ef505d..6a93823 100644
--- a/rtl/curve/curve_dbl_add_256.v
+++ b/rtl/curve/curve_dbl_add_256.v
@@ -1,874 +1,874 @@
-//------------------------------------------------------------------------------
-//
-// curve_dbl_add_256.v
-// -----------------------------------------------------------------------------
-// Elliptic curve point adder and doubler.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module curve_dbl_add_256
- (
- clk, rst_n,
- ena, rdy,
- uop_addr, uop,
- px_addr, py_addr, pz_addr, rx_addr, ry_addr, rz_addr, q_addr, v_addr,
- rx_wren, ry_wren, rz_wren,
- px_din, py_din, pz_din,
- rx_din, ry_din, rz_din,
- rx_dout, ry_dout, rz_dout, q_din, v_din
- );
-
-
- //
- // Microcode
- //
-`include "uop_ecdsa.v"
-
-
- //
- // Constants
- //
- localparam WORD_COUNTER_WIDTH = 3; // 0 .. 7
- localparam OPERAND_NUM_WORDS = 8; // 8 * 32 = 256
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output reg [ 6-1: 0] uop_addr;
- input wire [20-1: 0] uop;
-
- output reg [WORD_COUNTER_WIDTH-1:0] px_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] py_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] pz_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] rx_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] ry_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] rz_addr;
- output reg [WORD_COUNTER_WIDTH-1:0] v_addr;
- output wire [WORD_COUNTER_WIDTH-1:0] q_addr;
-
- output wire rx_wren;
- output wire ry_wren;
- output wire rz_wren;
-
- input wire [ 32-1:0] px_din;
- input wire [ 32-1:0] py_din;
- input wire [ 32-1:0] pz_din;
- input wire [ 32-1:0] rx_din;
- input wire [ 32-1:0] ry_din;
- input wire [ 32-1:0] rz_din;
- output wire [ 32-1:0] rx_dout;
- output wire [ 32-1:0] ry_dout;
- output wire [ 32-1:0] rz_dout;
- input wire [ 32-1:0] q_din;
- input wire [ 32-1:0] v_din;
-
-
- //
- // Microcode
- //
- wire [ 4: 0] uop_opcode = uop[19:15];
- wire [ 4: 0] uop_src_a = uop[14:10];
- wire [ 4: 0] uop_src_b = uop[ 9: 5];
- wire [ 2: 0] uop_dst = uop[ 4: 2];
- wire [ 1: 0] uop_exec = uop[ 1: 0];
-
-
- //
- // Multi-Word Comparator
- //
- wire mw_cmp_ena;
- wire mw_cmp_rdy;
-
- wire mw_cmp_out_l;
- wire mw_cmp_out_e;
- wire mw_cmp_out_g;
-
- wire [WORD_COUNTER_WIDTH-1:0] mw_cmp_addr_xy;
-
- wire [ 32-1:0] mw_cmp_din_x;
- wire [ 32-1:0] mw_cmp_din_y;
-
- // flags
- reg flag_pz_is_zero;
- reg flag_t1_is_zero;
- reg flag_t2_is_zero;
-
- mw_comparator #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
- )
- mw_comparator_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (mw_cmp_ena),
- .rdy (mw_cmp_rdy),
-
- .xy_addr (mw_cmp_addr_xy),
- .x_din (mw_cmp_din_x),
- .y_din (mw_cmp_din_y),
-
- .cmp_l (mw_cmp_out_l),
- .cmp_e (mw_cmp_out_e),
- .cmp_g (mw_cmp_out_g)
- );
-
-
- //
- // Modular Adder
- //
- wire mod_add_ena;
- wire mod_add_rdy;
-
- wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_ab;
- wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_n;
- wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_s;
- wire mod_add_wren_s;
-
- wire [ 32-1:0] mod_add_din_a;
- wire [ 32-1:0] mod_add_din_b;
- wire [ 32-1:0] mod_add_din_n;
- wire [ 32-1:0] mod_add_dout_s;
-
- assign mod_add_din_n = q_din;
-
- modular_adder #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
- )
- modular_adder_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (mod_add_ena),
- .rdy (mod_add_rdy),
-
- .ab_addr (mod_add_addr_ab),
- .n_addr (mod_add_addr_n),
- .s_addr (mod_add_addr_s),
- .s_wren (mod_add_wren_s),
-
- .a_din (mod_add_din_a),
- .b_din (mod_add_din_b),
- .n_din (mod_add_din_n),
- .s_dout (mod_add_dout_s)
- );
-
-
- //
- // Modular Subtractor
- //
- wire mod_sub_ena;
- wire mod_sub_rdy;
-
- wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_ab;
- wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_n;
- wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_d;
- wire mod_sub_wren_d;
-
- wire [ 32-1:0] mod_sub_din_a;
- wire [ 32-1:0] mod_sub_din_b;
- wire [ 32-1:0] mod_sub_din_n;
- wire [ 32-1:0] mod_sub_dout_d;
-
- assign mod_sub_din_n = q_din;
-
- modular_subtractor #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
- )
- modular_subtractor_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (mod_sub_ena),
- .rdy (mod_sub_rdy),
-
- .ab_addr (mod_sub_addr_ab),
- .n_addr (mod_sub_addr_n),
- .d_addr (mod_sub_addr_d),
- .d_wren (mod_sub_wren_d),
-
- .a_din (mod_sub_din_a),
- .b_din (mod_sub_din_b),
- .n_din (mod_sub_din_n),
- .d_dout (mod_sub_dout_d)
- );
-
-
- //
- // Modular Multiplier
- //
- wire mod_mul_ena;
- wire mod_mul_rdy;
-
- wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_a;
- wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_b;
- wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_n;
- wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_p;
- wire mod_mul_wren_p;
-
- wire [ 32-1:0] mod_mul_din_a;
- wire [ 32-1:0] mod_mul_din_b;
- wire [ 32-1:0] mod_mul_din_n;
- wire [ 32-1:0] mod_mul_dout_p;
-
- assign mod_mul_din_n = q_din;
-
- modular_multiplier_256 modular_multiplier_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (mod_mul_ena),
- .rdy (mod_mul_rdy),
-
- .a_addr (mod_mul_addr_a),
- .b_addr (mod_mul_addr_b),
- .n_addr (mod_mul_addr_n),
- .p_addr (mod_mul_addr_p),
- .p_wren (mod_mul_wren_p),
-
- .a_din (mod_mul_din_a),
- .b_din (mod_mul_din_b),
- .n_din (mod_mul_din_n),
- .p_dout (mod_mul_dout_p)
- );
-
-
- //
- // Multi-Word Data Mover
- //
- wire mw_mov_ena;
- wire mw_mov_rdy;
-
- wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_x;
- wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_y;
- wire mw_mov_wren_y;
-
- wire [ 32-1:0] mw_mov_din_x;
- wire [ 32-1:0] mw_mov_dout_y;
-
- mw_mover #
- (
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
-
- )
- mw_mover_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (mw_mov_ena),
- .rdy (mw_mov_rdy),
-
- .x_addr (mw_mov_addr_x),
- .y_addr (mw_mov_addr_y),
- .y_wren (mw_mov_wren_y),
-
- .x_din (mw_mov_din_x),
- .y_dout (mw_mov_dout_y)
- );
-
-
- //
- // ROMs
- //
- reg [WORD_COUNTER_WIDTH-1:0] brom_one_addr;
- //reg [WORD_COUNTER_WIDTH-1:0] brom_zero_addr;
- reg [WORD_COUNTER_WIDTH-1:0] brom_delta_addr;
- reg [WORD_COUNTER_WIDTH-1:0] brom_g_x_addr;
- reg [WORD_COUNTER_WIDTH-1:0] brom_g_y_addr;
- reg [WORD_COUNTER_WIDTH-1:0] brom_h_x_addr;
- reg [WORD_COUNTER_WIDTH-1:0] brom_h_y_addr;
-
- wire [ 32-1:0] brom_one_dout;
- wire [ 32-1:0] brom_zero_dout;
- wire [ 32-1:0] brom_delta_dout;
- wire [ 32-1:0] brom_g_x_dout;
- wire [ 32-1:0] brom_g_y_dout;
- wire [ 32-1:0] brom_h_x_dout;
- wire [ 32-1:0] brom_h_y_dout;
-
- (* ROM_STYLE="BLOCK" *) brom_p256_one brom_one_inst
- (.clk(clk), .b_addr(brom_one_addr), .b_out(brom_one_dout));
-
- brom_p256_zero brom_zero_inst
- (.b_out(brom_zero_dout));
-
- (* ROM_STYLE="BLOCK" *) brom_p256_delta brom_delta_inst
- (.clk(clk), .b_addr(brom_delta_addr), .b_out(brom_delta_dout));
-
- (* ROM_STYLE="BLOCK" *) brom_p256_g_x brom_g_x_inst
- (.clk(clk), .b_addr(brom_g_x_addr), .b_out(brom_g_x_dout));
-
- (* ROM_STYLE="BLOCK" *) brom_p256_g_y brom_g_y_inst
- (.clk(clk), .b_addr(brom_g_y_addr), .b_out(brom_g_y_dout));
-
- (* ROM_STYLE="BLOCK" *) brom_p256_h_x brom_h_x_inst
- (.clk(clk), .b_addr(brom_h_x_addr), .b_out(brom_h_x_dout));
-
- (* ROM_STYLE="BLOCK" *) brom_p256_h_y brom_h_y_inst
- (.clk(clk), .b_addr(brom_h_y_addr), .b_out(brom_h_y_dout));
-
-
- //
- // Temporary Variables
- //
- reg [WORD_COUNTER_WIDTH-1:0] bram_t1_wr_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t2_wr_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t3_wr_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t4_wr_addr;
-
- reg [WORD_COUNTER_WIDTH-1:0] bram_t1_rd_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t2_rd_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t3_rd_addr;
- reg [WORD_COUNTER_WIDTH-1:0] bram_t4_rd_addr;
-
- wire bram_t1_wr_en;
- wire bram_t2_wr_en;
- wire bram_t3_wr_en;
- wire bram_t4_wr_en;
-
- wire [ 32-1:0] bram_t1_wr_data;
- wire [ 32-1:0] bram_t2_wr_data;
- wire [ 32-1:0] bram_t3_wr_data;
- wire [ 32-1:0] bram_t4_wr_data;
-
- wire [ 32-1:0] bram_t1_rd_data;
- wire [ 32-1:0] bram_t2_rd_data;
- wire [ 32-1:0] bram_t3_rd_data;
- wire [ 32-1:0] bram_t4_rd_data;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
- )
- bram_t1
- ( .clk (clk),
- .a_addr(bram_t1_wr_addr), .a_wr(bram_t1_wr_en), .a_in(bram_t1_wr_data), .a_out(),
- .b_addr(bram_t1_rd_addr), .b_out(bram_t1_rd_data)
- );
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
- )
- bram_t2
- ( .clk (clk),
- .a_addr(bram_t2_wr_addr), .a_wr(bram_t2_wr_en), .a_in(bram_t2_wr_data), .a_out(),
- .b_addr(bram_t2_rd_addr), .b_out(bram_t2_rd_data)
- );
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
- )
- bram_t3
- ( .clk (clk),
- .a_addr(bram_t3_wr_addr), .a_wr(bram_t3_wr_en), .a_in(bram_t3_wr_data), .a_out(),
- .b_addr(bram_t3_rd_addr), .b_out(bram_t3_rd_data)
- );
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
- )
- bram_t4
- ( .clk (clk),
- .a_addr(bram_t4_wr_addr), .a_wr(bram_t4_wr_en), .a_in(bram_t4_wr_data), .a_out(),
- .b_addr(bram_t4_rd_addr), .b_out(bram_t4_rd_data)
- );
-
-
- //
- // uOP Trigger Logic
- //
- reg uop_trig;
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) uop_trig <= 1'b0;
- else uop_trig <= (fsm_state == FSM_STATE_FETCH) ? 1'b1 : 1'b0;
-
-
- //
- // FSM
- //
- localparam [ 1: 0] FSM_STATE_STALL = 2'b00;
- localparam [ 1: 0] FSM_STATE_FETCH = 2'b01;
- localparam [ 1: 0] FSM_STATE_EXECUTE = 2'b10;
-
- reg [ 1: 0] fsm_state = FSM_STATE_STALL;
- wire [ 1: 0] fsm_state_next = (uop_opcode == OPCODE_RDY) ? FSM_STATE_STALL : FSM_STATE_FETCH;
-
-
- //
- // FSM Transition Logic
- //
- reg uop_done;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) fsm_state <= FSM_STATE_STALL;
- else case (fsm_state)
- FSM_STATE_STALL: fsm_state <= ena ? FSM_STATE_FETCH : FSM_STATE_STALL;
- FSM_STATE_FETCH: fsm_state <= FSM_STATE_EXECUTE;
- FSM_STATE_EXECUTE: fsm_state <= (!uop_trig && uop_done) ? fsm_state_next : FSM_STATE_EXECUTE;
- default: fsm_state <= FSM_STATE_STALL;
- endcase
-
-
- //
- // uOP Address Increment Logic
- //
- always @(posedge clk)
- //
- if (fsm_state == FSM_STATE_STALL)
- uop_addr <= 5'd0;
- else if (fsm_state == FSM_STATE_EXECUTE)
- if (!uop_trig && uop_done)
- uop_addr <= (uop_opcode == OPCODE_RDY) ? 5'd0 : uop_addr + 1'b1;
-
-
- //
- // uOP Completion Logic
- //
- always @(*)
- //
- case (uop_opcode)
- OPCODE_CMP: uop_done = mw_cmp_rdy;
- OPCODE_MOV: uop_done = mw_mov_rdy;
- OPCODE_ADD: uop_done = mod_add_rdy;
- OPCODE_SUB: uop_done = mod_sub_rdy;
- OPCODE_MUL: uop_done = mod_mul_rdy;
- OPCODE_RDY: uop_done = 1'b1;
- default: uop_done = 1'b0;
- endcase
-
-
- //
- // Helper Modules Enable Logic
- //
- assign mw_cmp_ena = uop_opcode[0] & uop_trig;
- assign mw_mov_ena = uop_opcode[1] & uop_trig;
- assign mod_add_ena = uop_opcode[2] & uop_trig;
- assign mod_sub_ena = uop_opcode[3] & uop_trig;
- assign mod_mul_ena = uop_opcode[4] & uop_trig;
-
-
- //
- // uOP Source Value Decoding Logic
- //
- reg [31: 0] uop_src_a_value;
-
- always @(*)
- //
- case (uop_src_a)
- UOP_SRC_PX: uop_src_a_value = px_din;
- UOP_SRC_PY: uop_src_a_value = py_din;
- UOP_SRC_PZ: uop_src_a_value = pz_din;
-
- UOP_SRC_RX: uop_src_a_value = rx_din;
- UOP_SRC_RY: uop_src_a_value = ry_din;
- UOP_SRC_RZ: uop_src_a_value = rz_din;
-
- UOP_SRC_T1: uop_src_a_value = bram_t1_rd_data;
- UOP_SRC_T2: uop_src_a_value = bram_t2_rd_data;
- UOP_SRC_T3: uop_src_a_value = bram_t3_rd_data;
- UOP_SRC_T4: uop_src_a_value = bram_t4_rd_data;
-
- UOP_SRC_ONE: uop_src_a_value = brom_one_dout;
- UOP_SRC_ZERO: uop_src_a_value = brom_zero_dout;
- UOP_SRC_DELTA: uop_src_a_value = brom_delta_dout;
-
- UOP_SRC_G_X: uop_src_a_value = brom_g_x_dout;
- UOP_SRC_G_Y: uop_src_a_value = brom_g_y_dout;
-
- UOP_SRC_H_X: uop_src_a_value = brom_h_x_dout;
- UOP_SRC_H_Y: uop_src_a_value = brom_h_y_dout;
-
- UOP_SRC_V: uop_src_a_value = v_din;
-
- default: uop_src_a_value = {32{1'bX}};
- endcase
-
-
- assign mw_cmp_din_x = uop_src_a_value;
- assign mw_mov_din_x = uop_src_a_value;
- assign mod_add_din_a = uop_src_a_value;
- assign mod_sub_din_a = uop_src_a_value;
- assign mod_mul_din_a = uop_src_a_value;
-
- reg [31: 0] uop_src_b_value;
-
- always @(*)
- //
- case (uop_src_b)
- UOP_SRC_PX: uop_src_b_value = px_din;
- UOP_SRC_PY: uop_src_b_value = py_din;
- UOP_SRC_PZ: uop_src_b_value = pz_din;
-
- UOP_SRC_RX: uop_src_b_value = rx_din;
- UOP_SRC_RY: uop_src_b_value = ry_din;
- UOP_SRC_RZ: uop_src_b_value = rz_din;
-
- UOP_SRC_T1: uop_src_b_value = bram_t1_rd_data;
- UOP_SRC_T2: uop_src_b_value = bram_t2_rd_data;
- UOP_SRC_T3: uop_src_b_value = bram_t3_rd_data;
- UOP_SRC_T4: uop_src_b_value = bram_t4_rd_data;
-
- UOP_SRC_ONE: uop_src_b_value = brom_one_dout;
- UOP_SRC_ZERO: uop_src_b_value = brom_zero_dout;
- UOP_SRC_DELTA: uop_src_b_value = brom_delta_dout;
-
- UOP_SRC_G_X: uop_src_b_value = brom_g_x_dout;
- UOP_SRC_G_Y: uop_src_b_value = brom_g_y_dout;
-
- UOP_SRC_H_X: uop_src_b_value = brom_h_x_dout;
- UOP_SRC_H_Y: uop_src_b_value = brom_h_y_dout;
-
- UOP_SRC_V: uop_src_b_value = v_din;
-
- default: uop_src_b_value = {32{1'bX}};
- endcase
-
- assign mw_cmp_din_y = uop_src_b_value;
- assign mod_add_din_b = uop_src_b_value;
- assign mod_sub_din_b = uop_src_b_value;
- assign mod_mul_din_b = uop_src_b_value;
-
-
- //
- // uOP Source & Destination Address Decoding Logic
- //
- reg [WORD_COUNTER_WIDTH-1:0] uop_src_a_addr;
- reg [WORD_COUNTER_WIDTH-1:0] uop_src_b_addr;
- reg [WORD_COUNTER_WIDTH-1:0] uop_dst_addr;
- reg [WORD_COUNTER_WIDTH-1:0] uop_q_addr;
-
- assign q_addr = uop_q_addr;
-
- always @(*)
- //
- case (uop_opcode)
- //
- OPCODE_CMP: begin
- uop_src_a_addr = mw_cmp_addr_xy;
- uop_src_b_addr = mw_cmp_addr_xy;
- uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}};
- uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- OPCODE_MOV: begin
- uop_src_a_addr = mw_mov_addr_x;
- uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}};
- uop_dst_addr = mw_mov_addr_y;
- uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- OPCODE_ADD: begin
- uop_src_a_addr = mod_add_addr_ab;
- uop_src_b_addr = mod_add_addr_ab;
- uop_dst_addr = mod_add_addr_s;
- uop_q_addr = mod_add_addr_n;
- end
- //
- OPCODE_SUB: begin
- uop_src_a_addr = mod_sub_addr_ab;
- uop_src_b_addr = mod_sub_addr_ab;
- uop_dst_addr = mod_sub_addr_d;
- uop_q_addr = mod_sub_addr_n;
- end
- //
- OPCODE_MUL: begin
- uop_src_a_addr = mod_mul_addr_a;
- uop_src_b_addr = mod_mul_addr_b;
- uop_dst_addr = mod_mul_addr_p;
- uop_q_addr = mod_mul_addr_n;
- end
- //
- default: begin
- uop_src_a_addr = {WORD_COUNTER_WIDTH{1'bX}};
- uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}};
- uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}};
- uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- endcase
-
-
- //
- // uOP Conditional Execution Logic
- //
- reg uop_exec_effective;
-
- always @(*)
- //
- case (uop_exec)
- UOP_EXEC_ALWAYS: uop_exec_effective = 1'b1;
- UOP_EXEC_PZT1T2_0XX: uop_exec_effective = flag_pz_is_zero;
- UOP_EXEC_PZT1T2_100: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && flag_t2_is_zero;
- UOP_EXEC_PZT1T2_101: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && !flag_t2_is_zero;
- endcase
-
-
- //
- // uOP Destination Store Logic
- //
- reg uop_dst_wren;
-
- always @(*)
- //
- case (uop_opcode)
- //
- OPCODE_MOV: uop_dst_wren = mw_mov_wren_y & uop_exec_effective;
- OPCODE_ADD: uop_dst_wren = mod_add_wren_s;
- OPCODE_SUB: uop_dst_wren = mod_sub_wren_d;
- OPCODE_MUL: uop_dst_wren = mod_mul_wren_p;
- default: uop_dst_wren = 1'b0;
- //
- endcase
-
-
- always @(*) begin
- //
- //
- //
- if (uop_src_a == UOP_SRC_PX) px_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_PX) px_addr = uop_src_b_addr;
- else px_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_PY) py_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_PY) py_addr = uop_src_b_addr;
- else py_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_PZ) pz_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_PZ) pz_addr = uop_src_b_addr;
- else pz_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_src_a == UOP_SRC_ONE) brom_one_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_ONE) brom_one_addr = uop_src_b_addr;
- else brom_one_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //if (uop_src_a == UOP_SRC_ZERO) brom_zero_addr = uop_src_a_addr;
- //else if (uop_src_b == UOP_SRC_ZERO) brom_zero_addr = uop_src_b_addr;
- //else brom_zero_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_DELTA) brom_delta_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_DELTA) brom_delta_addr = uop_src_b_addr;
- else brom_delta_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_src_a == UOP_SRC_G_X) brom_g_x_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_G_X) brom_g_x_addr = uop_src_b_addr;
- else brom_g_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_G_Y) brom_g_y_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_G_Y) brom_g_y_addr = uop_src_b_addr;
- else brom_g_y_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_src_a == UOP_SRC_H_X) brom_h_x_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_H_X) brom_h_x_addr = uop_src_b_addr;
- else brom_h_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_H_Y) brom_h_y_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_H_Y) brom_h_y_addr = uop_src_b_addr;
- else brom_h_y_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_src_a == UOP_SRC_V) v_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_V) v_addr = uop_src_b_addr;
- else v_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_src_a == UOP_SRC_T1) bram_t1_rd_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_T1) bram_t1_rd_addr = uop_src_b_addr;
- else bram_t1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_T2) bram_t2_rd_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_T2) bram_t2_rd_addr = uop_src_b_addr;
- else bram_t2_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_T3) bram_t3_rd_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_T3) bram_t3_rd_addr = uop_src_b_addr;
- else bram_t3_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_src_a == UOP_SRC_T4) bram_t4_rd_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_T4) bram_t4_rd_addr = uop_src_b_addr;
- else bram_t4_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if (uop_dst == UOP_DST_T1) bram_t1_wr_addr = uop_dst_addr;
- else bram_t1_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_dst == UOP_DST_T2) bram_t2_wr_addr = uop_dst_addr;
- else bram_t2_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_dst == UOP_DST_T3) bram_t3_wr_addr = uop_dst_addr;
- else bram_t3_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- if (uop_dst == UOP_DST_T4) bram_t4_wr_addr = uop_dst_addr;
- else bram_t4_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
- //
- //
- //
- if ((uop_dst == UOP_DST_RX) && (uop_dst_wren)) rx_addr = uop_dst_addr;
- else begin
- if (uop_src_a == UOP_SRC_RX) rx_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_RX) rx_addr = uop_src_b_addr;
- else rx_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- if ((uop_dst == UOP_DST_RY) && (uop_dst_wren)) ry_addr = uop_dst_addr;
- else begin
- if (uop_src_a == UOP_SRC_RY) ry_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_RY) ry_addr = uop_src_b_addr;
- else ry_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- if ((uop_dst == UOP_DST_RZ) && (uop_dst_wren)) rz_addr = uop_dst_addr;
- else begin
- if (uop_src_a == UOP_SRC_RZ) rz_addr = uop_src_a_addr;
- else if (uop_src_b == UOP_SRC_RZ) rz_addr = uop_src_b_addr;
- else rz_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- end
-
-
- assign rx_wren = uop_dst_wren && (uop_dst == UOP_DST_RX);
- assign ry_wren = uop_dst_wren && (uop_dst == UOP_DST_RY);
- assign rz_wren = uop_dst_wren && (uop_dst == UOP_DST_RZ);
-
- assign bram_t1_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T1);
- assign bram_t2_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T2);
- assign bram_t3_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T3);
- assign bram_t4_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T4);
-
-
-
- //
- // Destination Value Selector
- //
- reg [31: 0] uop_dst_value;
-
- always @(*)
- //
- case (uop_opcode)
-
- OPCODE_MOV: uop_dst_value = mw_mov_dout_y;
- OPCODE_ADD: uop_dst_value = mod_add_dout_s;
- OPCODE_SUB: uop_dst_value = mod_sub_dout_d;
- OPCODE_MUL: uop_dst_value = mod_mul_dout_p;
-
- default: uop_dst_value = {32{1'bX}};
-
- endcase
-
- assign rx_dout = uop_dst_value;
- assign ry_dout = uop_dst_value;
- assign rz_dout = uop_dst_value;
-
- assign bram_t1_wr_data = uop_dst_value;
- assign bram_t2_wr_data = uop_dst_value;
- assign bram_t3_wr_data = uop_dst_value;
- assign bram_t4_wr_data = uop_dst_value;
-
-
- //
- // Latch Comparison Flags
- //
- always @(posedge clk)
- //
- if ( (fsm_state == FSM_STATE_EXECUTE) &&
- (uop_opcode == OPCODE_CMP) &&
- (uop_done && !uop_trig) ) begin
-
- if ( (uop_src_a == UOP_SRC_PZ) && (uop_src_b == UOP_SRC_ZERO) )
- flag_pz_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
-
- if ( (uop_src_a == UOP_SRC_T1) && (uop_src_b == UOP_SRC_ZERO) )
- flag_t1_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
-
- if ( (uop_src_a == UOP_SRC_T2) && (uop_src_b == UOP_SRC_ZERO) )
- flag_t2_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
-
- end
-
-
- //
- // Ready Flag Logic
- //
- reg rdy_reg = 1'b1;
- assign rdy = rdy_reg;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) rdy_reg <= 1'b1;
- else begin
-
- /* clear flag */
- if (fsm_state == FSM_STATE_STALL)
- if (ena) rdy_reg <= 1'b0;
-
- /* set flag */
- if ((fsm_state == FSM_STATE_EXECUTE) && !uop_trig && uop_done)
- if (uop_opcode == OPCODE_RDY) rdy_reg <= 1'b1;
-
- end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// curve_dbl_add_256.v
+// -----------------------------------------------------------------------------
+// Elliptic curve point adder and doubler.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module curve_dbl_add_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ uop_addr, uop,
+ px_addr, py_addr, pz_addr, rx_addr, ry_addr, rz_addr, q_addr, v_addr,
+ rx_wren, ry_wren, rz_wren,
+ px_din, py_din, pz_din,
+ rx_din, ry_din, rz_din,
+ rx_dout, ry_dout, rz_dout, q_din, v_din
+ );
+
+
+ //
+ // Microcode
+ //
+`include "../../../../math/ecdsalib/rtl/curve/uop_ecdsa.v"
+
+
+ //
+ // Constants
+ //
+ localparam WORD_COUNTER_WIDTH = 3; // 0 .. 7
+ localparam OPERAND_NUM_WORDS = 8; // 8 * 32 = 256
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output reg [ 6-1: 0] uop_addr;
+ input wire [20-1: 0] uop;
+
+ output reg [WORD_COUNTER_WIDTH-1:0] px_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] py_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] pz_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] rx_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] ry_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] rz_addr;
+ output reg [WORD_COUNTER_WIDTH-1:0] v_addr;
+ output wire [WORD_COUNTER_WIDTH-1:0] q_addr;
+
+ output wire rx_wren;
+ output wire ry_wren;
+ output wire rz_wren;
+
+ input wire [ 32-1:0] px_din;
+ input wire [ 32-1:0] py_din;
+ input wire [ 32-1:0] pz_din;
+ input wire [ 32-1:0] rx_din;
+ input wire [ 32-1:0] ry_din;
+ input wire [ 32-1:0] rz_din;
+ output wire [ 32-1:0] rx_dout;
+ output wire [ 32-1:0] ry_dout;
+ output wire [ 32-1:0] rz_dout;
+ input wire [ 32-1:0] q_din;
+ input wire [ 32-1:0] v_din;
+
+
+ //
+ // Microcode
+ //
+ wire [ 4: 0] uop_opcode = uop[19:15];
+ wire [ 4: 0] uop_src_a = uop[14:10];
+ wire [ 4: 0] uop_src_b = uop[ 9: 5];
+ wire [ 2: 0] uop_dst = uop[ 4: 2];
+ wire [ 1: 0] uop_exec = uop[ 1: 0];
+
+
+ //
+ // Multi-Word Comparator
+ //
+ wire mw_cmp_ena;
+ wire mw_cmp_rdy;
+
+ wire mw_cmp_out_l;
+ wire mw_cmp_out_e;
+ wire mw_cmp_out_g;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mw_cmp_addr_xy;
+
+ wire [ 32-1:0] mw_cmp_din_x;
+ wire [ 32-1:0] mw_cmp_din_y;
+
+ // flags
+ reg flag_pz_is_zero;
+ reg flag_t1_is_zero;
+ reg flag_t2_is_zero;
+
+ mw_comparator #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ mw_comparator_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mw_cmp_ena),
+ .rdy (mw_cmp_rdy),
+
+ .xy_addr (mw_cmp_addr_xy),
+ .x_din (mw_cmp_din_x),
+ .y_din (mw_cmp_din_y),
+
+ .cmp_l (mw_cmp_out_l),
+ .cmp_e (mw_cmp_out_e),
+ .cmp_g (mw_cmp_out_g)
+ );
+
+
+ //
+ // Modular Adder
+ //
+ wire mod_add_ena;
+ wire mod_add_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_ab;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_n;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_add_addr_s;
+ wire mod_add_wren_s;
+
+ wire [ 32-1:0] mod_add_din_a;
+ wire [ 32-1:0] mod_add_din_b;
+ wire [ 32-1:0] mod_add_din_n;
+ wire [ 32-1:0] mod_add_dout_s;
+
+ assign mod_add_din_n = q_din;
+
+ modular_adder #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ modular_adder_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mod_add_ena),
+ .rdy (mod_add_rdy),
+
+ .ab_addr (mod_add_addr_ab),
+ .n_addr (mod_add_addr_n),
+ .s_addr (mod_add_addr_s),
+ .s_wren (mod_add_wren_s),
+
+ .a_din (mod_add_din_a),
+ .b_din (mod_add_din_b),
+ .n_din (mod_add_din_n),
+ .s_dout (mod_add_dout_s)
+ );
+
+
+ //
+ // Modular Subtractor
+ //
+ wire mod_sub_ena;
+ wire mod_sub_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_ab;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_n;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_sub_addr_d;
+ wire mod_sub_wren_d;
+
+ wire [ 32-1:0] mod_sub_din_a;
+ wire [ 32-1:0] mod_sub_din_b;
+ wire [ 32-1:0] mod_sub_din_n;
+ wire [ 32-1:0] mod_sub_dout_d;
+
+ assign mod_sub_din_n = q_din;
+
+ modular_subtractor #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+ )
+ modular_subtractor_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mod_sub_ena),
+ .rdy (mod_sub_rdy),
+
+ .ab_addr (mod_sub_addr_ab),
+ .n_addr (mod_sub_addr_n),
+ .d_addr (mod_sub_addr_d),
+ .d_wren (mod_sub_wren_d),
+
+ .a_din (mod_sub_din_a),
+ .b_din (mod_sub_din_b),
+ .n_din (mod_sub_din_n),
+ .d_dout (mod_sub_dout_d)
+ );
+
+
+ //
+ // Modular Multiplier
+ //
+ wire mod_mul_ena;
+ wire mod_mul_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_a;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_b;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_n;
+ wire [WORD_COUNTER_WIDTH-1:0] mod_mul_addr_p;
+ wire mod_mul_wren_p;
+
+ wire [ 32-1:0] mod_mul_din_a;
+ wire [ 32-1:0] mod_mul_din_b;
+ wire [ 32-1:0] mod_mul_din_n;
+ wire [ 32-1:0] mod_mul_dout_p;
+
+ assign mod_mul_din_n = q_din;
+
+ modular_multiplier_256 modular_multiplier_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mod_mul_ena),
+ .rdy (mod_mul_rdy),
+
+ .a_addr (mod_mul_addr_a),
+ .b_addr (mod_mul_addr_b),
+ .n_addr (mod_mul_addr_n),
+ .p_addr (mod_mul_addr_p),
+ .p_wren (mod_mul_wren_p),
+
+ .a_din (mod_mul_din_a),
+ .b_din (mod_mul_din_b),
+ .n_din (mod_mul_din_n),
+ .p_dout (mod_mul_dout_p)
+ );
+
+
+ //
+ // Multi-Word Data Mover
+ //
+ wire mw_mov_ena;
+ wire mw_mov_rdy;
+
+ wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_x;
+ wire [WORD_COUNTER_WIDTH-1:0] mw_mov_addr_y;
+ wire mw_mov_wren_y;
+
+ wire [ 32-1:0] mw_mov_din_x;
+ wire [ 32-1:0] mw_mov_dout_y;
+
+ mw_mover #
+ (
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
+
+ )
+ mw_mover_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (mw_mov_ena),
+ .rdy (mw_mov_rdy),
+
+ .x_addr (mw_mov_addr_x),
+ .y_addr (mw_mov_addr_y),
+ .y_wren (mw_mov_wren_y),
+
+ .x_din (mw_mov_din_x),
+ .y_dout (mw_mov_dout_y)
+ );
+
+
+ //
+ // ROMs
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] brom_one_addr;
+ //reg [WORD_COUNTER_WIDTH-1:0] brom_zero_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_delta_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_g_x_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_g_y_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_h_x_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] brom_h_y_addr;
+
+ wire [ 32-1:0] brom_one_dout;
+ wire [ 32-1:0] brom_zero_dout;
+ wire [ 32-1:0] brom_delta_dout;
+ wire [ 32-1:0] brom_g_x_dout;
+ wire [ 32-1:0] brom_g_y_dout;
+ wire [ 32-1:0] brom_h_x_dout;
+ wire [ 32-1:0] brom_h_y_dout;
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_one brom_one_inst
+ (.clk(clk), .b_addr(brom_one_addr), .b_out(brom_one_dout));
+
+ brom_p256_zero brom_zero_inst
+ (.b_out(brom_zero_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_delta brom_delta_inst
+ (.clk(clk), .b_addr(brom_delta_addr), .b_out(brom_delta_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_g_x brom_g_x_inst
+ (.clk(clk), .b_addr(brom_g_x_addr), .b_out(brom_g_x_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_g_y brom_g_y_inst
+ (.clk(clk), .b_addr(brom_g_y_addr), .b_out(brom_g_y_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_h_x brom_h_x_inst
+ (.clk(clk), .b_addr(brom_h_x_addr), .b_out(brom_h_x_dout));
+
+ (* ROM_STYLE="BLOCK" *) brom_p256_h_y brom_h_y_inst
+ (.clk(clk), .b_addr(brom_h_y_addr), .b_out(brom_h_y_dout));
+
+
+ //
+ // Temporary Variables
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t1_wr_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t2_wr_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t3_wr_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t4_wr_addr;
+
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t1_rd_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t2_rd_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t3_rd_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_t4_rd_addr;
+
+ wire bram_t1_wr_en;
+ wire bram_t2_wr_en;
+ wire bram_t3_wr_en;
+ wire bram_t4_wr_en;
+
+ wire [ 32-1:0] bram_t1_wr_data;
+ wire [ 32-1:0] bram_t2_wr_data;
+ wire [ 32-1:0] bram_t3_wr_data;
+ wire [ 32-1:0] bram_t4_wr_data;
+
+ wire [ 32-1:0] bram_t1_rd_data;
+ wire [ 32-1:0] bram_t2_rd_data;
+ wire [ 32-1:0] bram_t3_rd_data;
+ wire [ 32-1:0] bram_t4_rd_data;
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t1
+ ( .clk (clk),
+ .a_addr(bram_t1_wr_addr), .a_wr(bram_t1_wr_en), .a_in(bram_t1_wr_data), .a_out(),
+ .b_addr(bram_t1_rd_addr), .b_out(bram_t1_rd_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t2
+ ( .clk (clk),
+ .a_addr(bram_t2_wr_addr), .a_wr(bram_t2_wr_en), .a_in(bram_t2_wr_data), .a_out(),
+ .b_addr(bram_t2_rd_addr), .b_out(bram_t2_rd_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t3
+ ( .clk (clk),
+ .a_addr(bram_t3_wr_addr), .a_wr(bram_t3_wr_en), .a_in(bram_t3_wr_data), .a_out(),
+ .b_addr(bram_t3_rd_addr), .b_out(bram_t3_rd_data)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)
+ )
+ bram_t4
+ ( .clk (clk),
+ .a_addr(bram_t4_wr_addr), .a_wr(bram_t4_wr_en), .a_in(bram_t4_wr_data), .a_out(),
+ .b_addr(bram_t4_rd_addr), .b_out(bram_t4_rd_data)
+ );
+
+
+ //
+ // uOP Trigger Logic
+ //
+ reg uop_trig;
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) uop_trig <= 1'b0;
+ else uop_trig <= (fsm_state == FSM_STATE_FETCH) ? 1'b1 : 1'b0;
+
+
+ //
+ // FSM
+ //
+ localparam [ 1: 0] FSM_STATE_STALL = 2'b00;
+ localparam [ 1: 0] FSM_STATE_FETCH = 2'b01;
+ localparam [ 1: 0] FSM_STATE_EXECUTE = 2'b10;
+
+ reg [ 1: 0] fsm_state = FSM_STATE_STALL;
+ wire [ 1: 0] fsm_state_next = (uop_opcode == OPCODE_RDY) ? FSM_STATE_STALL : FSM_STATE_FETCH;
+
+
+ //
+ // FSM Transition Logic
+ //
+ reg uop_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) fsm_state <= FSM_STATE_STALL;
+ else case (fsm_state)
+ FSM_STATE_STALL: fsm_state <= ena ? FSM_STATE_FETCH : FSM_STATE_STALL;
+ FSM_STATE_FETCH: fsm_state <= FSM_STATE_EXECUTE;
+ FSM_STATE_EXECUTE: fsm_state <= (!uop_trig && uop_done) ? fsm_state_next : FSM_STATE_EXECUTE;
+ default: fsm_state <= FSM_STATE_STALL;
+ endcase
+
+
+ //
+ // uOP Address Increment Logic
+ //
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_STALL)
+ uop_addr <= 5'd0;
+ else if (fsm_state == FSM_STATE_EXECUTE)
+ if (!uop_trig && uop_done)
+ uop_addr <= (uop_opcode == OPCODE_RDY) ? 5'd0 : uop_addr + 1'b1;
+
+
+ //
+ // uOP Completion Logic
+ //
+ always @(*)
+ //
+ case (uop_opcode)
+ OPCODE_CMP: uop_done = mw_cmp_rdy;
+ OPCODE_MOV: uop_done = mw_mov_rdy;
+ OPCODE_ADD: uop_done = mod_add_rdy;
+ OPCODE_SUB: uop_done = mod_sub_rdy;
+ OPCODE_MUL: uop_done = mod_mul_rdy;
+ OPCODE_RDY: uop_done = 1'b1;
+ default: uop_done = 1'b0;
+ endcase
+
+
+ //
+ // Helper Modules Enable Logic
+ //
+ assign mw_cmp_ena = uop_opcode[0] & uop_trig;
+ assign mw_mov_ena = uop_opcode[1] & uop_trig;
+ assign mod_add_ena = uop_opcode[2] & uop_trig;
+ assign mod_sub_ena = uop_opcode[3] & uop_trig;
+ assign mod_mul_ena = uop_opcode[4] & uop_trig;
+
+
+ //
+ // uOP Source Value Decoding Logic
+ //
+ reg [31: 0] uop_src_a_value;
+
+ always @(*)
+ //
+ case (uop_src_a)
+ UOP_SRC_PX: uop_src_a_value = px_din;
+ UOP_SRC_PY: uop_src_a_value = py_din;
+ UOP_SRC_PZ: uop_src_a_value = pz_din;
+
+ UOP_SRC_RX: uop_src_a_value = rx_din;
+ UOP_SRC_RY: uop_src_a_value = ry_din;
+ UOP_SRC_RZ: uop_src_a_value = rz_din;
+
+ UOP_SRC_T1: uop_src_a_value = bram_t1_rd_data;
+ UOP_SRC_T2: uop_src_a_value = bram_t2_rd_data;
+ UOP_SRC_T3: uop_src_a_value = bram_t3_rd_data;
+ UOP_SRC_T4: uop_src_a_value = bram_t4_rd_data;
+
+ UOP_SRC_ONE: uop_src_a_value = brom_one_dout;
+ UOP_SRC_ZERO: uop_src_a_value = brom_zero_dout;
+ UOP_SRC_DELTA: uop_src_a_value = brom_delta_dout;
+
+ UOP_SRC_G_X: uop_src_a_value = brom_g_x_dout;
+ UOP_SRC_G_Y: uop_src_a_value = brom_g_y_dout;
+
+ UOP_SRC_H_X: uop_src_a_value = brom_h_x_dout;
+ UOP_SRC_H_Y: uop_src_a_value = brom_h_y_dout;
+
+ UOP_SRC_V: uop_src_a_value = v_din;
+
+ default: uop_src_a_value = {32{1'bX}};
+ endcase
+
+
+ assign mw_cmp_din_x = uop_src_a_value;
+ assign mw_mov_din_x = uop_src_a_value;
+ assign mod_add_din_a = uop_src_a_value;
+ assign mod_sub_din_a = uop_src_a_value;
+ assign mod_mul_din_a = uop_src_a_value;
+
+ reg [31: 0] uop_src_b_value;
+
+ always @(*)
+ //
+ case (uop_src_b)
+ UOP_SRC_PX: uop_src_b_value = px_din;
+ UOP_SRC_PY: uop_src_b_value = py_din;
+ UOP_SRC_PZ: uop_src_b_value = pz_din;
+
+ UOP_SRC_RX: uop_src_b_value = rx_din;
+ UOP_SRC_RY: uop_src_b_value = ry_din;
+ UOP_SRC_RZ: uop_src_b_value = rz_din;
+
+ UOP_SRC_T1: uop_src_b_value = bram_t1_rd_data;
+ UOP_SRC_T2: uop_src_b_value = bram_t2_rd_data;
+ UOP_SRC_T3: uop_src_b_value = bram_t3_rd_data;
+ UOP_SRC_T4: uop_src_b_value = bram_t4_rd_data;
+
+ UOP_SRC_ONE: uop_src_b_value = brom_one_dout;
+ UOP_SRC_ZERO: uop_src_b_value = brom_zero_dout;
+ UOP_SRC_DELTA: uop_src_b_value = brom_delta_dout;
+
+ UOP_SRC_G_X: uop_src_b_value = brom_g_x_dout;
+ UOP_SRC_G_Y: uop_src_b_value = brom_g_y_dout;
+
+ UOP_SRC_H_X: uop_src_b_value = brom_h_x_dout;
+ UOP_SRC_H_Y: uop_src_b_value = brom_h_y_dout;
+
+ UOP_SRC_V: uop_src_b_value = v_din;
+
+ default: uop_src_b_value = {32{1'bX}};
+ endcase
+
+ assign mw_cmp_din_y = uop_src_b_value;
+ assign mod_add_din_b = uop_src_b_value;
+ assign mod_sub_din_b = uop_src_b_value;
+ assign mod_mul_din_b = uop_src_b_value;
+
+
+ //
+ // uOP Source & Destination Address Decoding Logic
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] uop_src_a_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] uop_src_b_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] uop_dst_addr;
+ reg [WORD_COUNTER_WIDTH-1:0] uop_q_addr;
+
+ assign q_addr = uop_q_addr;
+
+ always @(*)
+ //
+ case (uop_opcode)
+ //
+ OPCODE_CMP: begin
+ uop_src_a_addr = mw_cmp_addr_xy;
+ uop_src_b_addr = mw_cmp_addr_xy;
+ uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ OPCODE_MOV: begin
+ uop_src_a_addr = mw_mov_addr_x;
+ uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_dst_addr = mw_mov_addr_y;
+ uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ OPCODE_ADD: begin
+ uop_src_a_addr = mod_add_addr_ab;
+ uop_src_b_addr = mod_add_addr_ab;
+ uop_dst_addr = mod_add_addr_s;
+ uop_q_addr = mod_add_addr_n;
+ end
+ //
+ OPCODE_SUB: begin
+ uop_src_a_addr = mod_sub_addr_ab;
+ uop_src_b_addr = mod_sub_addr_ab;
+ uop_dst_addr = mod_sub_addr_d;
+ uop_q_addr = mod_sub_addr_n;
+ end
+ //
+ OPCODE_MUL: begin
+ uop_src_a_addr = mod_mul_addr_a;
+ uop_src_b_addr = mod_mul_addr_b;
+ uop_dst_addr = mod_mul_addr_p;
+ uop_q_addr = mod_mul_addr_n;
+ end
+ //
+ default: begin
+ uop_src_a_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_src_b_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_dst_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ uop_q_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ endcase
+
+
+ //
+ // uOP Conditional Execution Logic
+ //
+ reg uop_exec_effective;
+
+ always @(*)
+ //
+ case (uop_exec)
+ UOP_EXEC_ALWAYS: uop_exec_effective = 1'b1;
+ UOP_EXEC_PZT1T2_0XX: uop_exec_effective = flag_pz_is_zero;
+ UOP_EXEC_PZT1T2_100: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && flag_t2_is_zero;
+ UOP_EXEC_PZT1T2_101: uop_exec_effective = !flag_pz_is_zero && flag_t1_is_zero && !flag_t2_is_zero;
+ endcase
+
+
+ //
+ // uOP Destination Store Logic
+ //
+ reg uop_dst_wren;
+
+ always @(*)
+ //
+ case (uop_opcode)
+ //
+ OPCODE_MOV: uop_dst_wren = mw_mov_wren_y & uop_exec_effective;
+ OPCODE_ADD: uop_dst_wren = mod_add_wren_s;
+ OPCODE_SUB: uop_dst_wren = mod_sub_wren_d;
+ OPCODE_MUL: uop_dst_wren = mod_mul_wren_p;
+ default: uop_dst_wren = 1'b0;
+ //
+ endcase
+
+
+ always @(*) begin
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_PX) px_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_PX) px_addr = uop_src_b_addr;
+ else px_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_PY) py_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_PY) py_addr = uop_src_b_addr;
+ else py_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_PZ) pz_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_PZ) pz_addr = uop_src_b_addr;
+ else pz_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_ONE) brom_one_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_ONE) brom_one_addr = uop_src_b_addr;
+ else brom_one_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //if (uop_src_a == UOP_SRC_ZERO) brom_zero_addr = uop_src_a_addr;
+ //else if (uop_src_b == UOP_SRC_ZERO) brom_zero_addr = uop_src_b_addr;
+ //else brom_zero_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_DELTA) brom_delta_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_DELTA) brom_delta_addr = uop_src_b_addr;
+ else brom_delta_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_G_X) brom_g_x_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_G_X) brom_g_x_addr = uop_src_b_addr;
+ else brom_g_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_G_Y) brom_g_y_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_G_Y) brom_g_y_addr = uop_src_b_addr;
+ else brom_g_y_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_H_X) brom_h_x_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_H_X) brom_h_x_addr = uop_src_b_addr;
+ else brom_h_x_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_H_Y) brom_h_y_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_H_Y) brom_h_y_addr = uop_src_b_addr;
+ else brom_h_y_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_V) v_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_V) v_addr = uop_src_b_addr;
+ else v_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_src_a == UOP_SRC_T1) bram_t1_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T1) bram_t1_rd_addr = uop_src_b_addr;
+ else bram_t1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_T2) bram_t2_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T2) bram_t2_rd_addr = uop_src_b_addr;
+ else bram_t2_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_T3) bram_t3_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T3) bram_t3_rd_addr = uop_src_b_addr;
+ else bram_t3_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_src_a == UOP_SRC_T4) bram_t4_rd_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_T4) bram_t4_rd_addr = uop_src_b_addr;
+ else bram_t4_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if (uop_dst == UOP_DST_T1) bram_t1_wr_addr = uop_dst_addr;
+ else bram_t1_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_dst == UOP_DST_T2) bram_t2_wr_addr = uop_dst_addr;
+ else bram_t2_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_dst == UOP_DST_T3) bram_t3_wr_addr = uop_dst_addr;
+ else bram_t3_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ if (uop_dst == UOP_DST_T4) bram_t4_wr_addr = uop_dst_addr;
+ else bram_t4_wr_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ //
+ //
+ //
+ if ((uop_dst == UOP_DST_RX) && (uop_dst_wren)) rx_addr = uop_dst_addr;
+ else begin
+ if (uop_src_a == UOP_SRC_RX) rx_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_RX) rx_addr = uop_src_b_addr;
+ else rx_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ if ((uop_dst == UOP_DST_RY) && (uop_dst_wren)) ry_addr = uop_dst_addr;
+ else begin
+ if (uop_src_a == UOP_SRC_RY) ry_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_RY) ry_addr = uop_src_b_addr;
+ else ry_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ if ((uop_dst == UOP_DST_RZ) && (uop_dst_wren)) rz_addr = uop_dst_addr;
+ else begin
+ if (uop_src_a == UOP_SRC_RZ) rz_addr = uop_src_a_addr;
+ else if (uop_src_b == UOP_SRC_RZ) rz_addr = uop_src_b_addr;
+ else rz_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ end
+
+
+ assign rx_wren = uop_dst_wren && (uop_dst == UOP_DST_RX);
+ assign ry_wren = uop_dst_wren && (uop_dst == UOP_DST_RY);
+ assign rz_wren = uop_dst_wren && (uop_dst == UOP_DST_RZ);
+
+ assign bram_t1_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T1);
+ assign bram_t2_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T2);
+ assign bram_t3_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T3);
+ assign bram_t4_wr_en = uop_dst_wren && (uop_dst == UOP_DST_T4);
+
+
+
+ //
+ // Destination Value Selector
+ //
+ reg [31: 0] uop_dst_value;
+
+ always @(*)
+ //
+ case (uop_opcode)
+
+ OPCODE_MOV: uop_dst_value = mw_mov_dout_y;
+ OPCODE_ADD: uop_dst_value = mod_add_dout_s;
+ OPCODE_SUB: uop_dst_value = mod_sub_dout_d;
+ OPCODE_MUL: uop_dst_value = mod_mul_dout_p;
+
+ default: uop_dst_value = {32{1'bX}};
+
+ endcase
+
+ assign rx_dout = uop_dst_value;
+ assign ry_dout = uop_dst_value;
+ assign rz_dout = uop_dst_value;
+
+ assign bram_t1_wr_data = uop_dst_value;
+ assign bram_t2_wr_data = uop_dst_value;
+ assign bram_t3_wr_data = uop_dst_value;
+ assign bram_t4_wr_data = uop_dst_value;
+
+
+ //
+ // Latch Comparison Flags
+ //
+ always @(posedge clk)
+ //
+ if ( (fsm_state == FSM_STATE_EXECUTE) &&
+ (uop_opcode == OPCODE_CMP) &&
+ (uop_done && !uop_trig) ) begin
+
+ if ( (uop_src_a == UOP_SRC_PZ) && (uop_src_b == UOP_SRC_ZERO) )
+ flag_pz_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
+
+ if ( (uop_src_a == UOP_SRC_T1) && (uop_src_b == UOP_SRC_ZERO) )
+ flag_t1_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
+
+ if ( (uop_src_a == UOP_SRC_T2) && (uop_src_b == UOP_SRC_ZERO) )
+ flag_t2_is_zero <= !mw_cmp_out_l && mw_cmp_out_e && !mw_cmp_out_g;
+
+ end
+
+
+ //
+ // Ready Flag Logic
+ //
+ reg rdy_reg = 1'b1;
+ assign rdy = rdy_reg;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) rdy_reg <= 1'b1;
+ else begin
+
+ /* clear flag */
+ if (fsm_state == FSM_STATE_STALL)
+ if (ena) rdy_reg <= 1'b0;
+
+ /* set flag */
+ if ((fsm_state == FSM_STATE_EXECUTE) && !uop_trig && uop_done)
+ if (uop_opcode == OPCODE_RDY) rdy_reg <= 1'b1;
+
+ end
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/curve/curve_mul_256.v b/rtl/curve/curve_mul_256.v
index 0ac2be0..423c863 100644
--- a/rtl/curve/curve_mul_256.v
+++ b/rtl/curve/curve_mul_256.v
@@ -1,720 +1,720 @@
-//------------------------------------------------------------------------------
-//
-// curve_mul_256.v
-// -----------------------------------------------------------------------------
-// Elliptic curve point scalar multiplier.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module curve_mul_256
- (
- clk, rst_n,
- ena, rdy,
- k_addr, rx_addr, ry_addr,
- rx_wren, ry_wren,
- k_din,
- rx_dout, ry_dout
- );
-
-
- //
- // Constants
- //
- localparam WORD_COUNTER_WIDTH = 3; // 0 .. 7
- localparam OPERAND_NUM_WORDS = 8; // 8 * 32 = 256
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [ 2: 0] k_addr;
- output wire [ 2: 0] rx_addr;
- output wire [ 2: 0] ry_addr;
-
- output wire rx_wren;
- output wire ry_wren;
-
- input wire [31: 0] k_din;
-
- output wire [31: 0] rx_dout;
- output wire [31: 0] ry_dout;
-
-
- //
- // Temporary Variables
- //
- reg [ 2: 0] bram_tx_wr_addr;
- reg [ 2: 0] bram_ty_wr_addr;
- reg [ 2: 0] bram_tz_wr_addr;
-
- reg [ 2: 0] bram_rx_wr_addr;
- reg [ 2: 0] bram_ry_wr_addr;
- reg [ 2: 0] bram_rz_wr_addr;
- wire [ 2: 0] bram_rz1_wr_addr;
-
- reg [ 2: 0] bram_tx_rd_addr;
- reg [ 2: 0] bram_ty_rd_addr;
- reg [ 2: 0] bram_tz_rd_addr;
-
- reg [ 2: 0] bram_rx_rd_addr;
- reg [ 2: 0] bram_ry_rd_addr;
- reg [ 2: 0] bram_rz_rd_addr;
- wire [ 2: 0] bram_rz1_rd_addr;
-
- reg bram_tx_wr_en;
- reg bram_ty_wr_en;
- reg bram_tz_wr_en;
-
- reg bram_rx_wr_en;
- reg bram_ry_wr_en;
- reg bram_rz_wr_en;
- wire bram_rz1_wr_en;
-
- wire [31: 0] bram_tx_rd_data;
- wire [31: 0] bram_ty_rd_data;
- wire [31: 0] bram_tz_rd_data;
-
- wire [31: 0] bram_rx_rd_data;
- wire [31: 0] bram_ry_rd_data;
- wire [31: 0] bram_rz_rd_data;
- wire [31: 0] bram_rz1_rd_data;
-
- reg [31: 0] bram_tx_wr_data_in;
- reg [31: 0] bram_ty_wr_data_in;
- reg [31: 0] bram_tz_wr_data_in;
-
- reg [31: 0] bram_rx_wr_data_in;
- reg [31: 0] bram_ry_wr_data_in;
- reg [31: 0] bram_rz_wr_data_in;
- wire [31: 0] bram_rz1_wr_data_in;
-
- wire [31: 0] bram_tx_wr_data_out;
- wire [31: 0] bram_ty_wr_data_out;
- wire [31: 0] bram_tz_wr_data_out;
-
- wire [31: 0] bram_rx_wr_data_out;
- wire [31: 0] bram_ry_wr_data_out;
- wire [31: 0] bram_rz_wr_data_out;
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_tx (.clk(clk),
- .a_addr(bram_tx_wr_addr), .a_wr(bram_tx_wr_en), .a_in(bram_tx_wr_data_in), .a_out(bram_tx_wr_data_out),
- .b_addr(bram_tx_rd_addr), .b_out(bram_tx_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_ty (.clk(clk),
- .a_addr(bram_ty_wr_addr), .a_wr(bram_ty_wr_en), .a_in(bram_ty_wr_data_in), .a_out(bram_ty_wr_data_out),
- .b_addr(bram_ty_rd_addr), .b_out(bram_ty_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_tz (.clk(clk),
- .a_addr(bram_tz_wr_addr), .a_wr(bram_tz_wr_en), .a_in(bram_tz_wr_data_in), .a_out(bram_tz_wr_data_out),
- .b_addr(bram_tz_rd_addr), .b_out(bram_tz_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_rx (.clk(clk),
- .a_addr(bram_rx_wr_addr), .a_wr(bram_rx_wr_en), .a_in(bram_rx_wr_data_in), .a_out(bram_rx_wr_data_out),
- .b_addr(bram_rx_rd_addr), .b_out(bram_rx_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_ry (.clk(clk),
- .a_addr(bram_ry_wr_addr), .a_wr(bram_ry_wr_en), .a_in(bram_ry_wr_data_in), .a_out(bram_ry_wr_data_out),
- .b_addr(bram_ry_rd_addr), .b_out(bram_ry_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_rz (.clk(clk),
- .a_addr(bram_rz_wr_addr), .a_wr(bram_rz_wr_en), .a_in(bram_rz_wr_data_in), .a_out(bram_rz_wr_data_out),
- .b_addr(bram_rz_rd_addr), .b_out(bram_rz_rd_data));
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
- bram_rz1 (.clk(clk),
- .a_addr(bram_rz1_wr_addr), .a_wr(bram_rz1_wr_en), .a_in(bram_rz1_wr_data_in), .a_out(),
- .b_addr(bram_rz1_rd_addr), .b_out(bram_rz1_rd_data));
-
-
- //
- // FSM
- //
- localparam [ 3: 0] FSM_STATE_IDLE = 4'd00;
- localparam [ 3: 0] FSM_STATE_PREPARE_TRIG = 4'd01;
- localparam [ 3: 0] FSM_STATE_PREPARE_WAIT = 4'd02;
- localparam [ 3: 0] FSM_STATE_DOUBLE_TRIG = 4'd03;
- localparam [ 3: 0] FSM_STATE_DOUBLE_WAIT = 4'd04;
- localparam [ 3: 0] FSM_STATE_ADD_TRIG = 4'd05;
- localparam [ 3: 0] FSM_STATE_ADD_WAIT = 4'd06;
- localparam [ 3: 0] FSM_STATE_COPY_TRIG = 4'd07;
- localparam [ 3: 0] FSM_STATE_COPY_WAIT = 4'd08;
- localparam [ 3: 0] FSM_STATE_INVERT_TRIG = 4'd09;
- localparam [ 3: 0] FSM_STATE_INVERT_WAIT = 4'd10;
- localparam [ 3: 0] FSM_STATE_CONVERT_TRIG = 4'd11;
- localparam [ 3: 0] FSM_STATE_CONVERT_WAIT = 4'd12;
- localparam [ 3: 0] FSM_STATE_DONE = 4'd13;
-
- reg [3:0] fsm_state = FSM_STATE_IDLE;
-
-
- //
- // Round Counter
- //
- reg [ 7: 0] bit_counter;
- wire [ 7: 0] bit_counter_max = 8'd255;
- wire [ 7: 0] bit_counter_zero = 8'd0;
- wire [ 7: 0] bit_counter_next =
- (bit_counter < bit_counter_max) ? bit_counter + 1'b1 : bit_counter_zero;
-
-
- //
- // Round Completion
- //
- wire [ 3: 0] fsm_state_round_next = (bit_counter < bit_counter_max) ?
- FSM_STATE_DOUBLE_TRIG : FSM_STATE_INVERT_TRIG;
-
-
- //
- // OP Trigger Logic
- //
- reg op_trig;
- wire op_done;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) op_trig <= 1'b0;
- else op_trig <= (fsm_state == FSM_STATE_PREPARE_TRIG) ||
- (fsm_state == FSM_STATE_DOUBLE_TRIG) ||
- (fsm_state == FSM_STATE_ADD_TRIG) ||
- (fsm_state == FSM_STATE_CONVERT_TRIG);
-
- //
- // Microprograms
- //
- wire [ 5: 0] op_rom_addr;
- wire [19: 0] op_rom_init_data;
- wire [19: 0] op_rom_dbl_data;
- wire [19: 0] op_rom_add_data;
- wire [19: 0] op_rom_conv_data;
- reg [19: 0] op_rom_mux_data;
-
- (* RAM_STYLE="BLOCK" *)
- uop_init_rom op_rom_init
- (
- .clk (clk),
- .addr (op_rom_addr),
- .data (op_rom_init_data)
- );
-
- (* RAM_STYLE="BLOCK" *)
- uop_dbl_rom op_rom_dbl
- (
- .clk (clk),
- .addr (op_rom_addr),
- .data (op_rom_dbl_data)
- );
-
- (* RAM_STYLE="BLOCK" *)
- uop_add_rom op_rom_add
- (
- .clk (clk),
- .addr (op_rom_addr),
- .data (op_rom_add_data)
- );
-
- (* RAM_STYLE="BLOCK" *)
- uop_conv_rom op_rom_conv
- (
- .clk (clk),
- .addr (op_rom_addr),
- .data (op_rom_conv_data)
- );
-
- always @(*)
- //
- case (fsm_state)
- FSM_STATE_PREPARE_WAIT: op_rom_mux_data = op_rom_init_data;
- FSM_STATE_DOUBLE_WAIT: op_rom_mux_data = op_rom_dbl_data;
- FSM_STATE_ADD_WAIT: op_rom_mux_data = op_rom_add_data;
- FSM_STATE_CONVERT_WAIT: op_rom_mux_data = op_rom_conv_data;
- default: op_rom_mux_data = {20{1'bX}};
- endcase
-
-
-
- //
- // Modulus
- //
- reg [ 2: 0] rom_q_addr;
- wire [31: 0] rom_q_data;
-
- brom_p256_q rom_q
- (
- .clk (clk),
- .b_addr (rom_q_addr),
- .b_out (rom_q_data)
- );
-
-
- //
- // Worker
- //
- wire [ 2: 0] worker_addr_px;
- wire [ 2: 0] worker_addr_py;
- wire [ 2: 0] worker_addr_pz;
-
- wire [ 2: 0] worker_addr_rx;
- wire [ 2: 0] worker_addr_ry;
- wire [ 2: 0] worker_addr_rz;
-
- wire [ 2: 0] worker_addr_q;
-
- wire worker_wren_rx;
- wire worker_wren_ry;
- wire worker_wren_rz;
-
- reg [31: 0] worker_din_px;
- reg [31: 0] worker_din_py;
- reg [31: 0] worker_din_pz;
-
- reg [31: 0] worker_din_rx;
- reg [31: 0] worker_din_ry;
- reg [31: 0] worker_din_rz;
-
- wire [31: 0] worker_dout_rx;
- wire [31: 0] worker_dout_ry;
- wire [31: 0] worker_dout_rz;
-
- curve_dbl_add_256 worker
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (op_trig),
- .rdy (op_done),
-
- .uop_addr (op_rom_addr),
- .uop (op_rom_mux_data),
-
- .px_addr (worker_addr_px),
- .py_addr (worker_addr_py),
- .pz_addr (worker_addr_pz),
-
- .rx_addr (worker_addr_rx),
- .ry_addr (worker_addr_ry),
- .rz_addr (worker_addr_rz),
-
- .q_addr (worker_addr_q),
-
- .v_addr (bram_rz1_rd_addr),
-
- .rx_wren (worker_wren_rx),
- .ry_wren (worker_wren_ry),
- .rz_wren (worker_wren_rz),
-
- .px_din (worker_din_px),
- .py_din (worker_din_py),
- .pz_din (worker_din_pz),
-
- .rx_din (worker_din_rx),
- .ry_din (worker_din_ry),
- .rz_din (worker_din_rz),
-
- .rx_dout (worker_dout_rx),
- .ry_dout (worker_dout_ry),
- .rz_dout (worker_dout_rz),
-
- .q_din (rom_q_data),
-
- .v_din (bram_rz1_rd_data)
- );
-
-
- //
- // Mover
- //
- reg move_trig;
- wire move_done;
-
- wire [ 2: 0] mover_addr_x;
- wire [ 2: 0] mover_addr_y;
-
- wire mover_wren_y;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) move_trig <= 1'b0;
- else move_trig <= (fsm_state == FSM_STATE_COPY_TRIG);
-
- mw_mover #
- (
- .WORD_COUNTER_WIDTH (3),
- .OPERAND_NUM_WORDS (8)
- )
- mover
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (move_trig),
- .rdy (move_done),
-
- .x_addr (mover_addr_x),
- .y_addr (mover_addr_y),
- .y_wren (mover_wren_y),
-
- .x_din ({32{1'bX}}),
- .y_dout ()
- );
-
-
- //
- // Invertor
- //
- reg invert_trig;
- wire invert_done;
-
- wire [ 2: 0] invertor_addr_a;
- wire [ 2: 0] invertor_addr_q;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) invert_trig <= 1'b0;
- else invert_trig <= (fsm_state == FSM_STATE_INVERT_TRIG);
-
- modular_invertor #
- (
- .MAX_OPERAND_WIDTH(256)
- )
- invertor
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (invert_trig),
- .rdy (invert_done),
-
- .a_addr (invertor_addr_a),
- .q_addr (invertor_addr_q),
- .a1_addr (bram_rz1_wr_addr),
- .a1_wren (bram_rz1_wr_en),
-
- .a_din (bram_rz_rd_data),
- .q_din (rom_q_data),
- .a1_dout (bram_rz1_wr_data_in)
- );
-
-
- //
- // FSM Transition Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
- else case (fsm_state)
-
- FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_PREPARE_TRIG : FSM_STATE_IDLE;
-
- FSM_STATE_PREPARE_TRIG: fsm_state <= FSM_STATE_PREPARE_WAIT;
- FSM_STATE_PREPARE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DOUBLE_TRIG : FSM_STATE_PREPARE_WAIT;
-
- FSM_STATE_DOUBLE_TRIG: fsm_state <= FSM_STATE_DOUBLE_WAIT;
- FSM_STATE_DOUBLE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_ADD_TRIG : FSM_STATE_DOUBLE_WAIT;
-
- FSM_STATE_ADD_TRIG: fsm_state <= FSM_STATE_ADD_WAIT;
- FSM_STATE_ADD_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_COPY_TRIG : FSM_STATE_ADD_WAIT;
-
- FSM_STATE_COPY_TRIG: fsm_state <= FSM_STATE_COPY_WAIT;
- FSM_STATE_COPY_WAIT: fsm_state <= (!move_trig && move_done) ? fsm_state_round_next : FSM_STATE_COPY_WAIT;
-
- FSM_STATE_INVERT_TRIG: fsm_state <= FSM_STATE_INVERT_WAIT;
- FSM_STATE_INVERT_WAIT: fsm_state <= (!invert_trig && invert_done) ? FSM_STATE_CONVERT_TRIG : FSM_STATE_INVERT_WAIT;
-
- FSM_STATE_CONVERT_TRIG: fsm_state <= FSM_STATE_CONVERT_WAIT;
- FSM_STATE_CONVERT_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DONE : FSM_STATE_CONVERT_WAIT;
-
- FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE;
-
- default: fsm_state <= FSM_STATE_IDLE;
-
- endcase
-
-
- //
- // Bit Counter Increment
- //
- always @(posedge clk) begin
- //
- if ((fsm_state == FSM_STATE_PREPARE_WAIT) && !op_trig && op_done)
- bit_counter <= bit_counter_zero;
- //
- if ((fsm_state == FSM_STATE_COPY_WAIT) && !move_trig && move_done)
- bit_counter <= bit_counter_next;
- //
- end
-
-
- //
- // K Latch Logic
- //
- reg [ 2: 0] k_addr_reg;
- reg [31: 0] k_din_reg;
-
- assign k_addr = k_addr_reg;
-
- always @(posedge clk) begin
- //
- if (fsm_state == FSM_STATE_DOUBLE_TRIG)
- k_addr_reg <= 3'd7 - bit_counter[7:5];
- //
- if (fsm_state == FSM_STATE_ADD_TRIG)
- k_din_reg <= (bit_counter[4:0] == 5'd0) ? k_din : {k_din_reg[30:0], 1'bX};
- //
- end
-
-
-
- //
- // Copy Inhibit Logic
- //
- wire move_inhibit = k_din_reg[31];
-
- wire copy_t2r_int = mover_wren_y & ~move_inhibit;
-
-
- always @(*) begin
- //
- // Q
- //
- case (fsm_state)
- FSM_STATE_DOUBLE_WAIT: rom_q_addr = worker_addr_q;
- FSM_STATE_ADD_WAIT: rom_q_addr = worker_addr_q;
- FSM_STATE_INVERT_WAIT: rom_q_addr = invertor_addr_q;
- FSM_STATE_CONVERT_WAIT: rom_q_addr = worker_addr_q;
- default: rom_q_addr = worker_addr_q;
- endcase
-
- //
- // R(X,Y,Z)
- //
- case (fsm_state)
- //
- FSM_STATE_PREPARE_WAIT: begin
- //
- bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
- bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz;
- bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz;
- bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz;
- //
- end
- //
- FSM_STATE_DOUBLE_WAIT: begin
- //
- bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz;
- bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
- bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
- bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
- //
- end
- //
- FSM_STATE_ADD_WAIT: begin
- //
- bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
- bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz;
- bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz;
- bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz;
- //
- end
- //
- FSM_STATE_COPY_WAIT: begin
- //
- bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
- bram_rx_wr_addr <= mover_addr_y; bram_ry_wr_addr <= mover_addr_y; bram_rz_wr_addr <= mover_addr_y;
- bram_rx_wr_en <= copy_t2r_int; bram_ry_wr_en <= copy_t2r_int; bram_rz_wr_en <= copy_t2r_int;
- bram_rx_wr_data_in <= bram_tx_rd_data; bram_ry_wr_data_in <= bram_ty_rd_data; bram_rz_wr_data_in <= bram_tz_rd_data;
- //
- end
- //
- FSM_STATE_INVERT_WAIT: begin
- //
- bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= invertor_addr_a;
- bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
- bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
- bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
- //
- end
- //
- FSM_STATE_CONVERT_WAIT: begin
- //
- bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz;
- bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
- bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
- bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
- //
- end
-
- //
- default: begin
- //
- bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
- bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
- bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
- bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
- //
- end
- //
- endcase
- //
- // T(X,Y,Z)
- //
- case (fsm_state)
- //
- FSM_STATE_DOUBLE_WAIT: begin
- //
- bram_tx_rd_addr <= {3{1'bX}}; bram_ty_rd_addr <= {3{1'bX}}; bram_tz_rd_addr <= {3{1'bX}};
- bram_tx_wr_addr <= worker_addr_rx; bram_ty_wr_addr <= worker_addr_ry; bram_tz_wr_addr <= worker_addr_rz;
- bram_tx_wr_en <= worker_wren_rx; bram_ty_wr_en <= worker_wren_ry; bram_tz_wr_en <= worker_wren_rz;
- bram_tx_wr_data_in <= worker_dout_rx; bram_ty_wr_data_in <= worker_dout_ry; bram_tz_wr_data_in <= worker_dout_rz;
- //
- end
- //
- FSM_STATE_ADD_WAIT: begin
- //
- bram_tx_rd_addr <= worker_addr_px; bram_ty_rd_addr <= worker_addr_py; bram_tz_rd_addr <= worker_addr_pz;
- bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
- bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
- bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
- //
- end
- //
- FSM_STATE_COPY_WAIT: begin
- //
- bram_tx_rd_addr <= mover_addr_x; bram_ty_rd_addr <= mover_addr_x; bram_tz_rd_addr <= mover_addr_x;
- bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
- bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
- bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
- //
- end
-
- //
- default: begin
- //
- bram_tx_rd_addr <= {3{1'bX}}; bram_ty_rd_addr <= {3{1'bX}}; bram_tz_rd_addr <= {3{1'bX}};
- bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
- bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
- bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
- //
- end
- //
- endcase
- //
- // Worker
- //
- case (fsm_state)
- //
- FSM_STATE_DOUBLE_WAIT: begin
- //
- worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data;
- worker_din_rx <= bram_tx_wr_data_out; worker_din_ry <= bram_ty_wr_data_out; worker_din_rz <= bram_tz_wr_data_out;
- //
- end
- //
- FSM_STATE_ADD_WAIT: begin
- //
- worker_din_px <= bram_tx_rd_data; worker_din_py <= bram_ty_rd_data; worker_din_pz <= bram_tz_rd_data;
- worker_din_rx <= bram_rx_wr_data_out; worker_din_ry <= bram_ry_wr_data_out; worker_din_rz <= bram_rz_wr_data_out;
- //
- end
- //
- FSM_STATE_CONVERT_WAIT: begin
- //
- worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data;
- worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}};
- //
- end
- //
- default: begin
- //
- worker_din_px <= {32{1'bX}}; worker_din_py <= {32{1'bX}}; worker_din_pz <= {32{1'bX}};
- worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}};
- //
- end
- //
- endcase
- //
- end
-
-
- //
- // Output Mapping
- //
- assign rx_wren = worker_wren_rx && (fsm_state == FSM_STATE_CONVERT_WAIT);
- assign ry_wren = worker_wren_ry && (fsm_state == FSM_STATE_CONVERT_WAIT);
-
- assign rx_dout = worker_dout_rx;
- assign ry_dout = worker_dout_ry;
-
- assign rx_addr = worker_addr_rx;
- assign ry_addr = worker_addr_ry;
-
-
- //
- // Ready Flag Logic
- //
- reg rdy_reg = 1'b1;
- assign rdy = rdy_reg;
-
- always @(posedge clk or negedge rst_n)
-
- if (rst_n == 1'b0) rdy_reg <= 1'b1;
- else begin
-
- /* clear flag */
- if ((fsm_state == FSM_STATE_IDLE) && ena)
- rdy_reg <= 1'b0;
-
- /* set flag */
- if (fsm_state == FSM_STATE_DONE)
- rdy_reg <= 1'b1;
-
- end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// curve_mul_256.v
+// -----------------------------------------------------------------------------
+// Elliptic curve point scalar multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module curve_mul_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ k_addr, rx_addr, ry_addr,
+ rx_wren, ry_wren,
+ k_din,
+ rx_dout, ry_dout
+ );
+
+
+ //
+ // Constants
+ //
+ localparam WORD_COUNTER_WIDTH = 3; // 0 .. 7
+ localparam OPERAND_NUM_WORDS = 8; // 8 * 32 = 256
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [ 2: 0] k_addr;
+ output wire [ 2: 0] rx_addr;
+ output wire [ 2: 0] ry_addr;
+
+ output wire rx_wren;
+ output wire ry_wren;
+
+ input wire [31: 0] k_din;
+
+ output wire [31: 0] rx_dout;
+ output wire [31: 0] ry_dout;
+
+
+ //
+ // Temporary Variables
+ //
+ reg [ 2: 0] bram_tx_wr_addr;
+ reg [ 2: 0] bram_ty_wr_addr;
+ reg [ 2: 0] bram_tz_wr_addr;
+
+ reg [ 2: 0] bram_rx_wr_addr;
+ reg [ 2: 0] bram_ry_wr_addr;
+ reg [ 2: 0] bram_rz_wr_addr;
+ wire [ 2: 0] bram_rz1_wr_addr;
+
+ reg [ 2: 0] bram_tx_rd_addr;
+ reg [ 2: 0] bram_ty_rd_addr;
+ reg [ 2: 0] bram_tz_rd_addr;
+
+ reg [ 2: 0] bram_rx_rd_addr;
+ reg [ 2: 0] bram_ry_rd_addr;
+ reg [ 2: 0] bram_rz_rd_addr;
+ wire [ 2: 0] bram_rz1_rd_addr;
+
+ reg bram_tx_wr_en;
+ reg bram_ty_wr_en;
+ reg bram_tz_wr_en;
+
+ reg bram_rx_wr_en;
+ reg bram_ry_wr_en;
+ reg bram_rz_wr_en;
+ wire bram_rz1_wr_en;
+
+ wire [31: 0] bram_tx_rd_data;
+ wire [31: 0] bram_ty_rd_data;
+ wire [31: 0] bram_tz_rd_data;
+
+ wire [31: 0] bram_rx_rd_data;
+ wire [31: 0] bram_ry_rd_data;
+ wire [31: 0] bram_rz_rd_data;
+ wire [31: 0] bram_rz1_rd_data;
+
+ reg [31: 0] bram_tx_wr_data_in;
+ reg [31: 0] bram_ty_wr_data_in;
+ reg [31: 0] bram_tz_wr_data_in;
+
+ reg [31: 0] bram_rx_wr_data_in;
+ reg [31: 0] bram_ry_wr_data_in;
+ reg [31: 0] bram_rz_wr_data_in;
+ wire [31: 0] bram_rz1_wr_data_in;
+
+ wire [31: 0] bram_tx_wr_data_out;
+ wire [31: 0] bram_ty_wr_data_out;
+ wire [31: 0] bram_tz_wr_data_out;
+
+ wire [31: 0] bram_rx_wr_data_out;
+ wire [31: 0] bram_ry_wr_data_out;
+ wire [31: 0] bram_rz_wr_data_out;
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_tx (.clk(clk),
+ .a_addr(bram_tx_wr_addr), .a_wr(bram_tx_wr_en), .a_in(bram_tx_wr_data_in), .a_out(bram_tx_wr_data_out),
+ .b_addr(bram_tx_rd_addr), .b_out(bram_tx_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_ty (.clk(clk),
+ .a_addr(bram_ty_wr_addr), .a_wr(bram_ty_wr_en), .a_in(bram_ty_wr_data_in), .a_out(bram_ty_wr_data_out),
+ .b_addr(bram_ty_rd_addr), .b_out(bram_ty_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_tz (.clk(clk),
+ .a_addr(bram_tz_wr_addr), .a_wr(bram_tz_wr_en), .a_in(bram_tz_wr_data_in), .a_out(bram_tz_wr_data_out),
+ .b_addr(bram_tz_rd_addr), .b_out(bram_tz_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_rx (.clk(clk),
+ .a_addr(bram_rx_wr_addr), .a_wr(bram_rx_wr_en), .a_in(bram_rx_wr_data_in), .a_out(bram_rx_wr_data_out),
+ .b_addr(bram_rx_rd_addr), .b_out(bram_rx_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_ry (.clk(clk),
+ .a_addr(bram_ry_wr_addr), .a_wr(bram_ry_wr_en), .a_in(bram_ry_wr_data_in), .a_out(bram_ry_wr_data_out),
+ .b_addr(bram_ry_rd_addr), .b_out(bram_ry_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_rz (.clk(clk),
+ .a_addr(bram_rz_wr_addr), .a_wr(bram_rz_wr_en), .a_in(bram_rz_wr_data_in), .a_out(bram_rz_wr_data_out),
+ .b_addr(bram_rz_rd_addr), .b_out(bram_rz_rd_data));
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(3))
+ bram_rz1 (.clk(clk),
+ .a_addr(bram_rz1_wr_addr), .a_wr(bram_rz1_wr_en), .a_in(bram_rz1_wr_data_in), .a_out(),
+ .b_addr(bram_rz1_rd_addr), .b_out(bram_rz1_rd_data));
+
+
+ //
+ // FSM
+ //
+ localparam [ 3: 0] FSM_STATE_IDLE = 4'd00;
+ localparam [ 3: 0] FSM_STATE_PREPARE_TRIG = 4'd01;
+ localparam [ 3: 0] FSM_STATE_PREPARE_WAIT = 4'd02;
+ localparam [ 3: 0] FSM_STATE_DOUBLE_TRIG = 4'd03;
+ localparam [ 3: 0] FSM_STATE_DOUBLE_WAIT = 4'd04;
+ localparam [ 3: 0] FSM_STATE_ADD_TRIG = 4'd05;
+ localparam [ 3: 0] FSM_STATE_ADD_WAIT = 4'd06;
+ localparam [ 3: 0] FSM_STATE_COPY_TRIG = 4'd07;
+ localparam [ 3: 0] FSM_STATE_COPY_WAIT = 4'd08;
+ localparam [ 3: 0] FSM_STATE_INVERT_TRIG = 4'd09;
+ localparam [ 3: 0] FSM_STATE_INVERT_WAIT = 4'd10;
+ localparam [ 3: 0] FSM_STATE_CONVERT_TRIG = 4'd11;
+ localparam [ 3: 0] FSM_STATE_CONVERT_WAIT = 4'd12;
+ localparam [ 3: 0] FSM_STATE_DONE = 4'd13;
+
+ reg [3:0] fsm_state = FSM_STATE_IDLE;
+
+
+ //
+ // Round Counter
+ //
+ reg [ 7: 0] bit_counter;
+ wire [ 7: 0] bit_counter_max = 8'd255;
+ wire [ 7: 0] bit_counter_zero = 8'd0;
+ wire [ 7: 0] bit_counter_next =
+ (bit_counter < bit_counter_max) ? bit_counter + 1'b1 : bit_counter_zero;
+
+
+ //
+ // Round Completion
+ //
+ wire [ 3: 0] fsm_state_round_next = (bit_counter < bit_counter_max) ?
+ FSM_STATE_DOUBLE_TRIG : FSM_STATE_INVERT_TRIG;
+
+
+ //
+ // OP Trigger Logic
+ //
+ reg op_trig;
+ wire op_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) op_trig <= 1'b0;
+ else op_trig <= (fsm_state == FSM_STATE_PREPARE_TRIG) ||
+ (fsm_state == FSM_STATE_DOUBLE_TRIG) ||
+ (fsm_state == FSM_STATE_ADD_TRIG) ||
+ (fsm_state == FSM_STATE_CONVERT_TRIG);
+
+ //
+ // Microprograms
+ //
+ wire [ 5: 0] op_rom_addr;
+ wire [19: 0] op_rom_init_data;
+ wire [19: 0] op_rom_dbl_data;
+ wire [19: 0] op_rom_add_data;
+ wire [19: 0] op_rom_conv_data;
+ reg [19: 0] op_rom_mux_data;
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_init_rom op_rom_init
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_init_data)
+ );
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_dbl_rom op_rom_dbl
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_dbl_data)
+ );
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_add_rom op_rom_add
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_add_data)
+ );
+
+ (* RAM_STYLE="BLOCK" *)
+ uop_conv_rom op_rom_conv
+ (
+ .clk (clk),
+ .addr (op_rom_addr),
+ .data (op_rom_conv_data)
+ );
+
+ always @(*)
+ //
+ case (fsm_state)
+ FSM_STATE_PREPARE_WAIT: op_rom_mux_data = op_rom_init_data;
+ FSM_STATE_DOUBLE_WAIT: op_rom_mux_data = op_rom_dbl_data;
+ FSM_STATE_ADD_WAIT: op_rom_mux_data = op_rom_add_data;
+ FSM_STATE_CONVERT_WAIT: op_rom_mux_data = op_rom_conv_data;
+ default: op_rom_mux_data = {20{1'bX}};
+ endcase
+
+
+
+ //
+ // Modulus
+ //
+ reg [ 2: 0] rom_q_addr;
+ wire [31: 0] rom_q_data;
+
+ brom_p256_q rom_q
+ (
+ .clk (clk),
+ .b_addr (rom_q_addr),
+ .b_out (rom_q_data)
+ );
+
+
+ //
+ // Worker
+ //
+ wire [ 2: 0] worker_addr_px;
+ wire [ 2: 0] worker_addr_py;
+ wire [ 2: 0] worker_addr_pz;
+
+ wire [ 2: 0] worker_addr_rx;
+ wire [ 2: 0] worker_addr_ry;
+ wire [ 2: 0] worker_addr_rz;
+
+ wire [ 2: 0] worker_addr_q;
+
+ wire worker_wren_rx;
+ wire worker_wren_ry;
+ wire worker_wren_rz;
+
+ reg [31: 0] worker_din_px;
+ reg [31: 0] worker_din_py;
+ reg [31: 0] worker_din_pz;
+
+ reg [31: 0] worker_din_rx;
+ reg [31: 0] worker_din_ry;
+ reg [31: 0] worker_din_rz;
+
+ wire [31: 0] worker_dout_rx;
+ wire [31: 0] worker_dout_ry;
+ wire [31: 0] worker_dout_rz;
+
+ curve_dbl_add_256 worker
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (op_trig),
+ .rdy (op_done),
+
+ .uop_addr (op_rom_addr),
+ .uop (op_rom_mux_data),
+
+ .px_addr (worker_addr_px),
+ .py_addr (worker_addr_py),
+ .pz_addr (worker_addr_pz),
+
+ .rx_addr (worker_addr_rx),
+ .ry_addr (worker_addr_ry),
+ .rz_addr (worker_addr_rz),
+
+ .q_addr (worker_addr_q),
+
+ .v_addr (bram_rz1_rd_addr),
+
+ .rx_wren (worker_wren_rx),
+ .ry_wren (worker_wren_ry),
+ .rz_wren (worker_wren_rz),
+
+ .px_din (worker_din_px),
+ .py_din (worker_din_py),
+ .pz_din (worker_din_pz),
+
+ .rx_din (worker_din_rx),
+ .ry_din (worker_din_ry),
+ .rz_din (worker_din_rz),
+
+ .rx_dout (worker_dout_rx),
+ .ry_dout (worker_dout_ry),
+ .rz_dout (worker_dout_rz),
+
+ .q_din (rom_q_data),
+
+ .v_din (bram_rz1_rd_data)
+ );
+
+
+ //
+ // Mover
+ //
+ reg move_trig;
+ wire move_done;
+
+ wire [ 2: 0] mover_addr_x;
+ wire [ 2: 0] mover_addr_y;
+
+ wire mover_wren_y;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) move_trig <= 1'b0;
+ else move_trig <= (fsm_state == FSM_STATE_COPY_TRIG);
+
+ mw_mover #
+ (
+ .WORD_COUNTER_WIDTH (3),
+ .OPERAND_NUM_WORDS (8)
+ )
+ mover
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (move_trig),
+ .rdy (move_done),
+
+ .x_addr (mover_addr_x),
+ .y_addr (mover_addr_y),
+ .y_wren (mover_wren_y),
+
+ .x_din ({32{1'bX}}),
+ .y_dout ()
+ );
+
+
+ //
+ // Invertor
+ //
+ reg invert_trig;
+ wire invert_done;
+
+ wire [ 2: 0] invertor_addr_a;
+ wire [ 2: 0] invertor_addr_q;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) invert_trig <= 1'b0;
+ else invert_trig <= (fsm_state == FSM_STATE_INVERT_TRIG);
+
+ modular_invertor #
+ (
+ .MAX_OPERAND_WIDTH(256)
+ )
+ invertor
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (invert_trig),
+ .rdy (invert_done),
+
+ .a_addr (invertor_addr_a),
+ .q_addr (invertor_addr_q),
+ .a1_addr (bram_rz1_wr_addr),
+ .a1_wren (bram_rz1_wr_en),
+
+ .a_din (bram_rz_rd_data),
+ .q_din (rom_q_data),
+ .a1_dout (bram_rz1_wr_data_in)
+ );
+
+
+ //
+ // FSM Transition Logic
+ //
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
+ else case (fsm_state)
+
+ FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_PREPARE_TRIG : FSM_STATE_IDLE;
+
+ FSM_STATE_PREPARE_TRIG: fsm_state <= FSM_STATE_PREPARE_WAIT;
+ FSM_STATE_PREPARE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DOUBLE_TRIG : FSM_STATE_PREPARE_WAIT;
+
+ FSM_STATE_DOUBLE_TRIG: fsm_state <= FSM_STATE_DOUBLE_WAIT;
+ FSM_STATE_DOUBLE_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_ADD_TRIG : FSM_STATE_DOUBLE_WAIT;
+
+ FSM_STATE_ADD_TRIG: fsm_state <= FSM_STATE_ADD_WAIT;
+ FSM_STATE_ADD_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_COPY_TRIG : FSM_STATE_ADD_WAIT;
+
+ FSM_STATE_COPY_TRIG: fsm_state <= FSM_STATE_COPY_WAIT;
+ FSM_STATE_COPY_WAIT: fsm_state <= (!move_trig && move_done) ? fsm_state_round_next : FSM_STATE_COPY_WAIT;
+
+ FSM_STATE_INVERT_TRIG: fsm_state <= FSM_STATE_INVERT_WAIT;
+ FSM_STATE_INVERT_WAIT: fsm_state <= (!invert_trig && invert_done) ? FSM_STATE_CONVERT_TRIG : FSM_STATE_INVERT_WAIT;
+
+ FSM_STATE_CONVERT_TRIG: fsm_state <= FSM_STATE_CONVERT_WAIT;
+ FSM_STATE_CONVERT_WAIT: fsm_state <= (!op_trig && op_done) ? FSM_STATE_DONE : FSM_STATE_CONVERT_WAIT;
+
+ FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE;
+
+ default: fsm_state <= FSM_STATE_IDLE;
+
+ endcase
+
+
+ //
+ // Bit Counter Increment
+ //
+ always @(posedge clk) begin
+ //
+ if ((fsm_state == FSM_STATE_PREPARE_WAIT) && !op_trig && op_done)
+ bit_counter <= bit_counter_zero;
+ //
+ if ((fsm_state == FSM_STATE_COPY_WAIT) && !move_trig && move_done)
+ bit_counter <= bit_counter_next;
+ //
+ end
+
+
+ //
+ // K Latch Logic
+ //
+ reg [ 2: 0] k_addr_reg;
+ reg [31: 0] k_din_reg;
+
+ assign k_addr = k_addr_reg;
+
+ always @(posedge clk) begin
+ //
+ if (fsm_state == FSM_STATE_DOUBLE_TRIG)
+ k_addr_reg <= 3'd7 - bit_counter[7:5];
+ //
+ if (fsm_state == FSM_STATE_ADD_TRIG)
+ k_din_reg <= (bit_counter[4:0] == 5'd0) ? k_din : {k_din_reg[30:0], 1'bX};
+ //
+ end
+
+
+
+ //
+ // Copy Inhibit Logic
+ //
+ wire move_inhibit = k_din_reg[31];
+
+ wire copy_t2r_int = mover_wren_y & ~move_inhibit;
+
+
+ always @(*) begin
+ //
+ // Q
+ //
+ case (fsm_state)
+ FSM_STATE_DOUBLE_WAIT: rom_q_addr = worker_addr_q;
+ FSM_STATE_ADD_WAIT: rom_q_addr = worker_addr_q;
+ FSM_STATE_INVERT_WAIT: rom_q_addr = invertor_addr_q;
+ FSM_STATE_CONVERT_WAIT: rom_q_addr = worker_addr_q;
+ default: rom_q_addr = worker_addr_q;
+ endcase
+
+ //
+ // R(X,Y,Z)
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_PREPARE_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz;
+ bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz;
+ bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz;
+ //
+ end
+ //
+ FSM_STATE_DOUBLE_WAIT: begin
+ //
+ bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz;
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ FSM_STATE_ADD_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= worker_addr_rx; bram_ry_wr_addr <= worker_addr_ry; bram_rz_wr_addr <= worker_addr_rz;
+ bram_rx_wr_en <= worker_wren_rx; bram_ry_wr_en <= worker_wren_ry; bram_rz_wr_en <= worker_wren_rz;
+ bram_rx_wr_data_in <= worker_dout_rx; bram_ry_wr_data_in <= worker_dout_ry; bram_rz_wr_data_in <= worker_dout_rz;
+ //
+ end
+ //
+ FSM_STATE_COPY_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= mover_addr_y; bram_ry_wr_addr <= mover_addr_y; bram_rz_wr_addr <= mover_addr_y;
+ bram_rx_wr_en <= copy_t2r_int; bram_ry_wr_en <= copy_t2r_int; bram_rz_wr_en <= copy_t2r_int;
+ bram_rx_wr_data_in <= bram_tx_rd_data; bram_ry_wr_data_in <= bram_ty_rd_data; bram_rz_wr_data_in <= bram_tz_rd_data;
+ //
+ end
+ //
+ FSM_STATE_INVERT_WAIT: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= invertor_addr_a;
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ FSM_STATE_CONVERT_WAIT: begin
+ //
+ bram_rx_rd_addr <= worker_addr_px; bram_ry_rd_addr <= worker_addr_py; bram_rz_rd_addr <= worker_addr_pz;
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+
+ //
+ default: begin
+ //
+ bram_rx_rd_addr <= {3{1'bX}}; bram_ry_rd_addr <= {3{1'bX}}; bram_rz_rd_addr <= {3{1'bX}};
+ bram_rx_wr_addr <= {3{1'bX}}; bram_ry_wr_addr <= {3{1'bX}}; bram_rz_wr_addr <= {3{1'bX}};
+ bram_rx_wr_en <= 1'b0; bram_ry_wr_en <= 1'b0; bram_rz_wr_en <= 1'b0;
+ bram_rx_wr_data_in <= {32{1'bX}}; bram_ry_wr_data_in <= {32{1'bX}}; bram_rz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ endcase
+ //
+ // T(X,Y,Z)
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_DOUBLE_WAIT: begin
+ //
+ bram_tx_rd_addr <= {3{1'bX}}; bram_ty_rd_addr <= {3{1'bX}}; bram_tz_rd_addr <= {3{1'bX}};
+ bram_tx_wr_addr <= worker_addr_rx; bram_ty_wr_addr <= worker_addr_ry; bram_tz_wr_addr <= worker_addr_rz;
+ bram_tx_wr_en <= worker_wren_rx; bram_ty_wr_en <= worker_wren_ry; bram_tz_wr_en <= worker_wren_rz;
+ bram_tx_wr_data_in <= worker_dout_rx; bram_ty_wr_data_in <= worker_dout_ry; bram_tz_wr_data_in <= worker_dout_rz;
+ //
+ end
+ //
+ FSM_STATE_ADD_WAIT: begin
+ //
+ bram_tx_rd_addr <= worker_addr_px; bram_ty_rd_addr <= worker_addr_py; bram_tz_rd_addr <= worker_addr_pz;
+ bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
+ bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
+ bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ FSM_STATE_COPY_WAIT: begin
+ //
+ bram_tx_rd_addr <= mover_addr_x; bram_ty_rd_addr <= mover_addr_x; bram_tz_rd_addr <= mover_addr_x;
+ bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
+ bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
+ bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
+ //
+ end
+
+ //
+ default: begin
+ //
+ bram_tx_rd_addr <= {3{1'bX}}; bram_ty_rd_addr <= {3{1'bX}}; bram_tz_rd_addr <= {3{1'bX}};
+ bram_tx_wr_addr <= {3{1'bX}}; bram_ty_wr_addr <= {3{1'bX}}; bram_tz_wr_addr <= {3{1'bX}};
+ bram_tx_wr_en <= 1'b0; bram_ty_wr_en <= 1'b0; bram_tz_wr_en <= 1'b0;
+ bram_tx_wr_data_in <= {32{1'bX}}; bram_ty_wr_data_in <= {32{1'bX}}; bram_tz_wr_data_in <= {32{1'bX}};
+ //
+ end
+ //
+ endcase
+ //
+ // Worker
+ //
+ case (fsm_state)
+ //
+ FSM_STATE_DOUBLE_WAIT: begin
+ //
+ worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data;
+ worker_din_rx <= bram_tx_wr_data_out; worker_din_ry <= bram_ty_wr_data_out; worker_din_rz <= bram_tz_wr_data_out;
+ //
+ end
+ //
+ FSM_STATE_ADD_WAIT: begin
+ //
+ worker_din_px <= bram_tx_rd_data; worker_din_py <= bram_ty_rd_data; worker_din_pz <= bram_tz_rd_data;
+ worker_din_rx <= bram_rx_wr_data_out; worker_din_ry <= bram_ry_wr_data_out; worker_din_rz <= bram_rz_wr_data_out;
+ //
+ end
+ //
+ FSM_STATE_CONVERT_WAIT: begin
+ //
+ worker_din_px <= bram_rx_rd_data; worker_din_py <= bram_ry_rd_data; worker_din_pz <= bram_rz_rd_data;
+ worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}};
+ //
+ end
+ //
+ default: begin
+ //
+ worker_din_px <= {32{1'bX}}; worker_din_py <= {32{1'bX}}; worker_din_pz <= {32{1'bX}};
+ worker_din_rx <= {32{1'bX}}; worker_din_ry <= {32{1'bX}}; worker_din_rz <= {32{1'bX}};
+ //
+ end
+ //
+ endcase
+ //
+ end
+
+
+ //
+ // Output Mapping
+ //
+ assign rx_wren = worker_wren_rx && (fsm_state == FSM_STATE_CONVERT_WAIT);
+ assign ry_wren = worker_wren_ry && (fsm_state == FSM_STATE_CONVERT_WAIT);
+
+ assign rx_dout = worker_dout_rx;
+ assign ry_dout = worker_dout_ry;
+
+ assign rx_addr = worker_addr_rx;
+ assign ry_addr = worker_addr_ry;
+
+
+ //
+ // Ready Flag Logic
+ //
+ reg rdy_reg = 1'b1;
+ assign rdy = rdy_reg;
+
+ always @(posedge clk or negedge rst_n)
+
+ if (rst_n == 1'b0) rdy_reg <= 1'b1;
+ else begin
+
+ /* clear flag */
+ if ((fsm_state == FSM_STATE_IDLE) && ena)
+ rdy_reg <= 1'b0;
+
+ /* set flag */
+ if (fsm_state == FSM_STATE_DONE)
+ rdy_reg <= 1'b1;
+
+ end
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/curve/rom/brom_p256_delta.v b/rtl/curve/rom/brom_p256_delta.v
index b9a345a..4637575 100644
--- a/rtl/curve/rom/brom_p256_delta.v
+++ b/rtl/curve/rom/brom_p256_delta.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_delta
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'h00000000;
- 3'b001: bram_reg_b <= 32'h00000000;
- 3'b010: bram_reg_b <= 32'h80000000;
- 3'b011: bram_reg_b <= 32'h00000000;
- 3'b100: bram_reg_b <= 32'h00000000;
- 3'b101: bram_reg_b <= 32'h80000000;
- 3'b110: bram_reg_b <= 32'h80000000;
- 3'b111: bram_reg_b <= 32'h7fffffff;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h00000000;
+ 3'b001: bram_reg_b <= 32'h00000000;
+ 3'b010: bram_reg_b <= 32'h80000000;
+ 3'b011: bram_reg_b <= 32'h00000000;
+ 3'b100: bram_reg_b <= 32'h00000000;
+ 3'b101: bram_reg_b <= 32'h80000000;
+ 3'b110: bram_reg_b <= 32'h80000000;
+ 3'b111: bram_reg_b <= 32'h7fffffff;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_g_x.v b/rtl/curve/rom/brom_p256_g_x.v
index 0816ef6..86aeafd 100644
--- a/rtl/curve/rom/brom_p256_g_x.v
+++ b/rtl/curve/rom/brom_p256_g_x.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_g_x
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'hd898c296;
- 3'b001: bram_reg_b <= 32'hf4a13945;
- 3'b010: bram_reg_b <= 32'h2deb33a0;
- 3'b011: bram_reg_b <= 32'h77037d81;
- 3'b100: bram_reg_b <= 32'h63a440f2;
- 3'b101: bram_reg_b <= 32'hf8bce6e5;
- 3'b110: bram_reg_b <= 32'he12c4247;
- 3'b111: bram_reg_b <= 32'h6b17d1f2;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'hd898c296;
+ 3'b001: bram_reg_b <= 32'hf4a13945;
+ 3'b010: bram_reg_b <= 32'h2deb33a0;
+ 3'b011: bram_reg_b <= 32'h77037d81;
+ 3'b100: bram_reg_b <= 32'h63a440f2;
+ 3'b101: bram_reg_b <= 32'hf8bce6e5;
+ 3'b110: bram_reg_b <= 32'he12c4247;
+ 3'b111: bram_reg_b <= 32'h6b17d1f2;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_g_y.v b/rtl/curve/rom/brom_p256_g_y.v
index 4d9c61e..39f9116 100644
--- a/rtl/curve/rom/brom_p256_g_y.v
+++ b/rtl/curve/rom/brom_p256_g_y.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_g_y
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
-
+
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'h37bf51f5;
- 3'b001: bram_reg_b <= 32'hcbb64068;
- 3'b010: bram_reg_b <= 32'h6b315ece;
- 3'b011: bram_reg_b <= 32'h2bce3357;
- 3'b100: bram_reg_b <= 32'h7c0f9e16;
- 3'b101: bram_reg_b <= 32'h8ee7eb4a;
- 3'b110: bram_reg_b <= 32'hfe1a7f9b;
- 3'b111: bram_reg_b <= 32'h4fe342e2;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h37bf51f5;
+ 3'b001: bram_reg_b <= 32'hcbb64068;
+ 3'b010: bram_reg_b <= 32'h6b315ece;
+ 3'b011: bram_reg_b <= 32'h2bce3357;
+ 3'b100: bram_reg_b <= 32'h7c0f9e16;
+ 3'b101: bram_reg_b <= 32'h8ee7eb4a;
+ 3'b110: bram_reg_b <= 32'hfe1a7f9b;
+ 3'b111: bram_reg_b <= 32'h4fe342e2;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_h_x.v b/rtl/curve/rom/brom_p256_h_x.v
index 0b69f77..554d346 100644
--- a/rtl/curve/rom/brom_p256_h_x.v
+++ b/rtl/curve/rom/brom_p256_h_x.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_h_x
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'h4ece7ad0;
- 3'b001: bram_reg_b <= 32'h16bd8d74;
- 3'b010: bram_reg_b <= 32'ha42998be;
- 3'b011: bram_reg_b <= 32'h11f904fe;
- 3'b100: bram_reg_b <= 32'h38b77e1b;
- 3'b101: bram_reg_b <= 32'h0e863235;
- 3'b110: bram_reg_b <= 32'h3da77b71;
- 3'b111: bram_reg_b <= 32'h29d05c19;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h4ece7ad0;
+ 3'b001: bram_reg_b <= 32'h16bd8d74;
+ 3'b010: bram_reg_b <= 32'ha42998be;
+ 3'b011: bram_reg_b <= 32'h11f904fe;
+ 3'b100: bram_reg_b <= 32'h38b77e1b;
+ 3'b101: bram_reg_b <= 32'h0e863235;
+ 3'b110: bram_reg_b <= 32'h3da77b71;
+ 3'b111: bram_reg_b <= 32'h29d05c19;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_h_y.v b/rtl/curve/rom/brom_p256_h_y.v
index 362fce6..6b5b8d9 100644
--- a/rtl/curve/rom/brom_p256_h_y.v
+++ b/rtl/curve/rom/brom_p256_h_y.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_h_y
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
-
+
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'hc840ae07;
- 3'b001: bram_reg_b <= 32'h3449bf97;
- 3'b010: bram_reg_b <= 32'h94cea131;
- 3'b011: bram_reg_b <= 32'hd431cca9;
- 3'b100: bram_reg_b <= 32'h83f061e9;
- 3'b101: bram_reg_b <= 32'h711814b5;
- 3'b110: bram_reg_b <= 32'h01e58065;
- 3'b111: bram_reg_b <= 32'hb01cbd1c;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'hc840ae07;
+ 3'b001: bram_reg_b <= 32'h3449bf97;
+ 3'b010: bram_reg_b <= 32'h94cea131;
+ 3'b011: bram_reg_b <= 32'hd431cca9;
+ 3'b100: bram_reg_b <= 32'h83f061e9;
+ 3'b101: bram_reg_b <= 32'h711814b5;
+ 3'b110: bram_reg_b <= 32'h01e58065;
+ 3'b111: bram_reg_b <= 32'hb01cbd1c;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_one.v b/rtl/curve/rom/brom_p256_one.v
index 4097874..15e3746 100644
--- a/rtl/curve/rom/brom_p256_one.v
+++ b/rtl/curve/rom/brom_p256_one.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_one
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'h00000001;
- 3'b001: bram_reg_b <= 32'h00000000;
- 3'b010: bram_reg_b <= 32'h00000000;
- 3'b011: bram_reg_b <= 32'h00000000;
- 3'b100: bram_reg_b <= 32'h00000000;
- 3'b101: bram_reg_b <= 32'h00000000;
- 3'b110: bram_reg_b <= 32'h00000000;
- 3'b111: bram_reg_b <= 32'h00000000;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'h00000001;
+ 3'b001: bram_reg_b <= 32'h00000000;
+ 3'b010: bram_reg_b <= 32'h00000000;
+ 3'b011: bram_reg_b <= 32'h00000000;
+ 3'b100: bram_reg_b <= 32'h00000000;
+ 3'b101: bram_reg_b <= 32'h00000000;
+ 3'b110: bram_reg_b <= 32'h00000000;
+ 3'b111: bram_reg_b <= 32'h00000000;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_q.v b/rtl/curve/rom/brom_p256_q.v
index fe94593..101a524 100644
--- a/rtl/curve/rom/brom_p256_q.v
+++ b/rtl/curve/rom/brom_p256_q.v
@@ -33,36 +33,36 @@
`timescale 1ns / 1ps
module brom_p256_q
- (
- input wire clk,
- input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ input wire clk,
+ input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
//
// Output Registers
//
- reg [31:0] bram_reg_b;
+ reg [31:0] bram_reg_b;
assign b_out = bram_reg_b;
-
+
//
// Read-Only Port B
- //
- always @(posedge clk)
- //
- case (b_addr)
- 3'b000: bram_reg_b <= 32'hffffffff;
- 3'b001: bram_reg_b <= 32'hffffffff;
- 3'b010: bram_reg_b <= 32'hffffffff;
- 3'b011: bram_reg_b <= 32'h00000000;
- 3'b100: bram_reg_b <= 32'h00000000;
- 3'b101: bram_reg_b <= 32'h00000000;
- 3'b110: bram_reg_b <= 32'h00000001;
- 3'b111: bram_reg_b <= 32'hffffffff;
- endcase
+ //
+ always @(posedge clk)
+ //
+ case (b_addr)
+ 3'b000: bram_reg_b <= 32'hffffffff;
+ 3'b001: bram_reg_b <= 32'hffffffff;
+ 3'b010: bram_reg_b <= 32'hffffffff;
+ 3'b011: bram_reg_b <= 32'h00000000;
+ 3'b100: bram_reg_b <= 32'h00000000;
+ 3'b101: bram_reg_b <= 32'h00000000;
+ 3'b110: bram_reg_b <= 32'h00000001;
+ 3'b111: bram_reg_b <= 32'hffffffff;
+ endcase
endmodule
diff --git a/rtl/curve/rom/brom_p256_zero.v b/rtl/curve/rom/brom_p256_zero.v
index f6d19a1..2672cf2 100644
--- a/rtl/curve/rom/brom_p256_zero.v
+++ b/rtl/curve/rom/brom_p256_zero.v
@@ -33,14 +33,14 @@
`timescale 1ns / 1ps
module brom_p256_zero
- (
- //input wire clk,
- //input wire [ 3-1:0] b_addr,
- output wire [32-1:0] b_out
- );
+ (
+ //input wire clk,
+ //input wire [ 3-1:0] b_addr,
+ output wire [32-1:0] b_out
+ );
-
- assign b_out = {32{1'b0}};
+
+ assign b_out = {32{1'b0}};
//
// Output Registers
@@ -52,19 +52,19 @@ module brom_p256_zero
//
// Read-Only Port B
- //
- //always @(posedge clk)
- //
- //case (b_addr)
- //3'b000: bram_reg_b <= 32'h00000000;
- //3'b001: bram_reg_b <= 32'h00000000;
- //3'b010: bram_reg_b <= 32'h00000000;
- //3'b011: bram_reg_b <= 32'h00000000;
- //3'b100: bram_reg_b <= 32'h00000000;
- //3'b101: bram_reg_b <= 32'h00000000;
- //3'b110: bram_reg_b <= 32'h00000000;
- //3'b111: bram_reg_b <= 32'h00000000;
- //endcase
+ //
+ //always @(posedge clk)
+ //
+ //case (b_addr)
+ //3'b000: bram_reg_b <= 32'h00000000;
+ //3'b001: bram_reg_b <= 32'h00000000;
+ //3'b010: bram_reg_b <= 32'h00000000;
+ //3'b011: bram_reg_b <= 32'h00000000;
+ //3'b100: bram_reg_b <= 32'h00000000;
+ //3'b101: bram_reg_b <= 32'h00000000;
+ //3'b110: bram_reg_b <= 32'h00000000;
+ //3'b111: bram_reg_b <= 32'h00000000;
+ //endcase
endmodule
diff --git a/rtl/curve/uop/uop_add_rom.v b/rtl/curve/uop/uop_add_rom.v
deleted file mode 100644
index c807736..0000000
--- a/rtl/curve/uop/uop_add_rom.v
+++ /dev/null
@@ -1,66 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_add_rom
- (
- input wire clk,
- input wire [ 5: 0] addr,
- output reg [19: 0] data
- );
-
-
- //
- // Microcode
- //
-`include "..\uop_ecdsa.v"
-
-
- //
- // Addition Microprogram
- //
- always @(posedge clk)
-
- case (addr)
-
-/* 2. */6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-/* 3. */6'd01: data <= {OPCODE_MOV, UOP_SRC_PZ, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd02: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 4. */6'd03: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 5. */6'd04: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_G_X, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 6. */6'd05: data <= {OPCODE_MUL, UOP_SRC_T2, UOP_SRC_G_Y, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 7. */6'd06: data <= {OPCODE_SUB, UOP_SRC_T1, UOP_SRC_PX, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 8. */6'd07: data <= {OPCODE_SUB, UOP_SRC_T2, UOP_SRC_PY, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 9. */6'd08: data <= {OPCODE_CMP, UOP_SRC_T1, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
- 6'd09: data <= {OPCODE_CMP, UOP_SRC_T2, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-/* 10. */6'd10: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_RZ, UOP_EXEC_ALWAYS};
-/* 11. */6'd11: data <= {OPCODE_MOV, UOP_SRC_T1, UOP_SRC_DUMMY, UOP_DST_T3, UOP_EXEC_ALWAYS};
- 6'd12: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS};
-/* 12. */6'd13: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_T4, UOP_EXEC_ALWAYS};
-/* 13. */6'd14: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS};
-/* 14. */6'd15: data <= {OPCODE_ADD, UOP_SRC_T3, UOP_SRC_T3, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 15. */6'd16: data <= {OPCODE_MOV, UOP_SRC_T2, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS};
- 6'd17: data <= {OPCODE_MUL, UOP_SRC_RX, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS};
-/* 16. */6'd18: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T1, UOP_DST_RX, UOP_EXEC_ALWAYS};
-/* 17. */6'd19: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T4, UOP_DST_RX, UOP_EXEC_ALWAYS};
-/* 18. */6'd20: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_RX, UOP_DST_T3, UOP_EXEC_ALWAYS};
-/* 19. */6'd21: data <= {OPCODE_MUL, UOP_SRC_T2, UOP_SRC_T3, UOP_DST_T3, UOP_EXEC_ALWAYS};
-/* 20. */6'd22: data <= {OPCODE_MUL, UOP_SRC_PY, UOP_SRC_T4, UOP_DST_T4, UOP_EXEC_ALWAYS};
-/* 21. */6'd23: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_T4, UOP_DST_RY, UOP_EXEC_ALWAYS};
-
- 6'd24: data <= {OPCODE_MOV, UOP_SRC_G_X, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX};
- 6'd25: data <= {OPCODE_MOV, UOP_SRC_G_Y, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX};
- 6'd26: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_0XX};
-
- 6'd27: data <= {OPCODE_MOV, UOP_SRC_H_X, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_100};
- 6'd28: data <= {OPCODE_MOV, UOP_SRC_H_Y, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_100};
- 6'd29: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_100};
-
- 6'd30: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_101};
- 6'd31: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_101};
- 6'd32: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_101};
-
- default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
-
- endcase
-
-
-endmodule
diff --git a/rtl/curve/uop/uop_conv_rom.v b/rtl/curve/uop/uop_conv_rom.v
deleted file mode 100644
index 3097736..0000000
--- a/rtl/curve/uop/uop_conv_rom.v
+++ /dev/null
@@ -1,38 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_conv_rom
- (
- input wire clk,
- input wire [ 5: 0] addr,
- output reg [19: 0] data
- );
-
-
- //
- // Microcode
- //
-`include "..\uop_ecdsa.v"
-
-
- //
- // Doubling Microprogram
- //
- always @(posedge clk)
-
- case (addr)
-
- 6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
- 6'd01: data <= {OPCODE_MOV, UOP_SRC_V, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd02: data <= {OPCODE_MUL, UOP_SRC_V, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS};
- 6'd03: data <= {OPCODE_MUL, UOP_SRC_V, UOP_SRC_T2, UOP_DST_T3, UOP_EXEC_ALWAYS};
- 6'd04: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS};
- 6'd05: data <= {OPCODE_MUL, UOP_SRC_PY, UOP_SRC_T3, UOP_DST_RY, UOP_EXEC_ALWAYS};
- 6'd06: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX};
- 6'd07: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX};
-
- default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
-
- endcase
-
-
-endmodule
diff --git a/rtl/curve/uop/uop_dbl_rom.v b/rtl/curve/uop/uop_dbl_rom.v
deleted file mode 100644
index 1939ca9..0000000
--- a/rtl/curve/uop/uop_dbl_rom.v
+++ /dev/null
@@ -1,58 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_dbl_rom
- (
- input wire clk,
- input wire [ 5: 0] addr,
- output reg [19: 0] data
- );
-
-
- //
- // Microcode
- //
-`include "..\uop_ecdsa.v"
-
-
- //
- // Doubling Microprogram
- //
- always @(posedge clk)
-
- case (addr)
-
-/* 1. */6'd00: data <= {OPCODE_CMP, UOP_SRC_PZ, UOP_SRC_ZERO, UOP_DST_DUMMY, UOP_EXEC_ALWAYS};
-/* 2. */6'd01: data <= {OPCODE_MOV, UOP_SRC_PZ, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 5'd02: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 3. */6'd03: data <= {OPCODE_SUB, UOP_SRC_PX, UOP_SRC_T1, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 4. */6'd04: data <= {OPCODE_ADD, UOP_SRC_PX, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 5. */6'd05: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 6. */6'd06: data <= {OPCODE_ADD, UOP_SRC_T2, UOP_SRC_T2, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd07: data <= {OPCODE_ADD, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T2, UOP_EXEC_ALWAYS};
-/* 7. */6'd08: data <= {OPCODE_ADD, UOP_SRC_PY, UOP_SRC_PY, UOP_DST_RY, UOP_EXEC_ALWAYS};
-/* 8. */6'd09: data <= {OPCODE_MUL, UOP_SRC_PZ, UOP_SRC_RY, UOP_DST_RZ, UOP_EXEC_ALWAYS};
-/* 9. */6'd10: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd11: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T3, UOP_EXEC_ALWAYS};
- 6'd12: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T3, UOP_DST_RY, UOP_EXEC_ALWAYS};
-/* 10. */6'd13: data <= {OPCODE_MUL, UOP_SRC_PX, UOP_SRC_RY, UOP_DST_T3, UOP_EXEC_ALWAYS};
-/* 11. */6'd14: data <= {OPCODE_MOV, UOP_SRC_RY, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd15: data <= {OPCODE_MUL, UOP_SRC_RY, UOP_SRC_T1, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 12. */6'd16: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_DELTA, UOP_DST_RY, UOP_EXEC_ALWAYS};
-/* 13. */6'd17: data <= {OPCODE_MOV, UOP_SRC_T2, UOP_SRC_DUMMY, UOP_DST_T1, UOP_EXEC_ALWAYS};
- 6'd18: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_RX, UOP_EXEC_ALWAYS};
-/* 14. */6'd19: data <= {OPCODE_ADD, UOP_SRC_T3, UOP_SRC_T3, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 15. */6'd20: data <= {OPCODE_SUB, UOP_SRC_RX, UOP_SRC_T1, UOP_DST_RX, UOP_EXEC_ALWAYS};
-/* 16. */6'd21: data <= {OPCODE_SUB, UOP_SRC_T3, UOP_SRC_RX, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 17. */6'd22: data <= {OPCODE_MUL, UOP_SRC_T1, UOP_SRC_T2, UOP_DST_T1, UOP_EXEC_ALWAYS};
-/* 18. */6'd23: data <= {OPCODE_SUB, UOP_SRC_T1, UOP_SRC_RY, UOP_DST_RY, UOP_EXEC_ALWAYS};
-
- 6'd24: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_PZT1T2_0XX};
- 6'd25: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_PZT1T2_0XX};
- 6'd26: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_PZT1T2_0XX};
-
- default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
-
- endcase
-
-
-endmodule
diff --git a/rtl/curve/uop/uop_init_rom.v b/rtl/curve/uop/uop_init_rom.v
deleted file mode 100644
index ac44b55..0000000
--- a/rtl/curve/uop/uop_init_rom.v
+++ /dev/null
@@ -1,33 +0,0 @@
-`timescale 1ns / 1ps
-
-module uop_init_rom
- (
- input wire clk,
- input wire [ 5: 0] addr,
- output reg [19: 0] data
- );
-
-
- //
- // Microcode
- //
-`include "..\uop_ecdsa.v"
-
-
- //
- // Doubling Microprogram
- //
- always @(posedge clk)
-
- case (addr)
-
- 6'd00: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RX, UOP_EXEC_ALWAYS};
- 6'd01: data <= {OPCODE_MOV, UOP_SRC_ONE, UOP_SRC_DUMMY, UOP_DST_RY, UOP_EXEC_ALWAYS};
- 6'd02: data <= {OPCODE_MOV, UOP_SRC_ZERO, UOP_SRC_DUMMY, UOP_DST_RZ, UOP_EXEC_ALWAYS};
-
- default: data <= {OPCODE_RDY, UOP_SRC_DUMMY, UOP_SRC_DUMMY, UOP_DST_DUMMY};
-
- endcase
-
-
-endmodule
diff --git a/rtl/curve/uop_ecdsa.v b/rtl/curve/uop_ecdsa.v
deleted file mode 100644
index e64119d..0000000
--- a/rtl/curve/uop_ecdsa.v
+++ /dev/null
@@ -1,50 +0,0 @@
-localparam [ 4: 0] OPCODE_CMP = 5'b00001;
-localparam [ 4: 0] OPCODE_MOV = 5'b00010;
-localparam [ 4: 0] OPCODE_ADD = 5'b00100;
-localparam [ 4: 0] OPCODE_SUB = 5'b01000;
-localparam [ 4: 0] OPCODE_MUL = 5'b10000;
-localparam [ 4: 0] OPCODE_RDY = 5'b00000;
-
-localparam [ 4: 0] UOP_SRC_PX = 5'h0_0;
-localparam [ 4: 0] UOP_SRC_PY = 5'h0_1;
-localparam [ 4: 0] UOP_SRC_PZ = 5'h0_2;
-
-localparam [ 4: 0] UOP_SRC_RX = 5'h0_3;
-localparam [ 4: 0] UOP_SRC_RY = 5'h0_4;
-localparam [ 4: 0] UOP_SRC_RZ = 5'h0_5;
-
-localparam [ 4: 0] UOP_SRC_T1 = 5'h0_6;
-localparam [ 4: 0] UOP_SRC_T2 = 5'h0_7;
-localparam [ 4: 0] UOP_SRC_T3 = 5'h0_8;
-localparam [ 4: 0] UOP_SRC_T4 = 5'h0_9;
-
-localparam [ 4: 0] UOP_SRC_ONE = 5'h0_A;
-localparam [ 4: 0] UOP_SRC_ZERO = 5'h0_B;
-localparam [ 4: 0] UOP_SRC_DELTA = 5'h0_C;
-
-localparam [ 4: 0] UOP_SRC_V = 5'h0_F;
-
-localparam [ 4: 0] UOP_SRC_G_X = 5'h1_0;
-localparam [ 4: 0] UOP_SRC_G_Y = 5'h1_1;
-
-localparam [ 4: 0] UOP_SRC_H_X = 5'h1_2;
-localparam [ 4: 0] UOP_SRC_H_Y = 5'h1_3;
-
-localparam [ 4: 0] UOP_SRC_DUMMY = 5'hX_X;
-
-localparam [ 2: 0] UOP_DST_RX = 3'd0;
-localparam [ 2: 0] UOP_DST_RY = 3'd1;
-localparam [ 2: 0] UOP_DST_RZ = 3'd2;
-
-localparam [ 2: 0] UOP_DST_T1 = 3'd3;
-localparam [ 2: 0] UOP_DST_T2 = 3'd4;
-localparam [ 2: 0] UOP_DST_T3 = 3'd5;
-localparam [ 2: 0] UOP_DST_T4 = 3'd6;
-
-localparam [ 2: 0] UOP_DST_DUMMY = 3'dX;
-
-localparam UOP_EXEC_ALWAYS = 2'b11; // R
-localparam UOP_EXEC_PZT1T2_0XX = 2'b10; // G
-localparam UOP_EXEC_PZT1T2_100 = 2'b00; // H
-localparam UOP_EXEC_PZT1T2_101 = 2'b01; // O
-
diff --git a/rtl/ecdsa256.v b/rtl/ecdsa256.v
index 1e712bf..11276a2 100644
--- a/rtl/ecdsa256.v
+++ b/rtl/ecdsa256.v
@@ -34,115 +34,115 @@
module ecdsa256
(
- input wire clk,
- input wire rst_n,
+ input wire clk,
+ input wire rst_n,
- input wire next,
- output wire valid,
+ input wire next,
+ output wire valid,
- input wire bus_cs,
- input wire bus_we,
- input wire [ 4:0] bus_addr,
- input wire [31:0] bus_data_wr,
+ input wire bus_cs,
+ input wire bus_we,
+ input wire [ 4:0] bus_addr,
+ input wire [31:0] bus_data_wr,
output wire [31:0] bus_data_rd
);
-
- //
- // Memory Banks
- //
- localparam [1:0] BUS_ADDR_BANK_K = 2'b00;
- localparam [1:0] BUS_ADDR_BANK_X = 2'b01;
- localparam [1:0] BUS_ADDR_BANK_Y = 2'b10;
-
- wire [1:0] bus_addr_upper = bus_addr[4:3];
- wire [2:0] bus_addr_lower = bus_addr[2:0];
-
-
+
+ //
+ // Memory Banks
+ //
+ localparam [1:0] BUS_ADDR_BANK_K = 2'b00;
+ localparam [1:0] BUS_ADDR_BANK_X = 2'b01;
+ localparam [1:0] BUS_ADDR_BANK_Y = 2'b10;
+
+ wire [1:0] bus_addr_upper = bus_addr[4:3];
+ wire [2:0] bus_addr_lower = bus_addr[2:0];
+
+
//
// Memories
- //
-
- wire [31:0] user_rw_k_bram_out;
- wire [31:0] user_ro_x_bram_out;
- wire [31:0] user_ro_y_bram_out;
-
- wire [ 2:0] core_ro_k_bram_addr;
- wire [ 2:0] core_rw_x_bram_addr;
- wire [ 2:0] core_rw_y_bram_addr;
-
- wire core_rw_x_bram_wren;
- wire core_rw_y_bram_wren;
-
- wire [31:0] core_ro_k_bram_dout;
- wire [31:0] core_rw_x_bram_din;
- wire [31:0] core_rw_y_bram_din;
-
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
+ //
+
+ wire [31:0] user_rw_k_bram_out;
+ wire [31:0] user_ro_x_bram_out;
+ wire [31:0] user_ro_y_bram_out;
+
+ wire [ 2:0] core_ro_k_bram_addr;
+ wire [ 2:0] core_rw_x_bram_addr;
+ wire [ 2:0] core_rw_y_bram_addr;
+
+ wire core_rw_x_bram_wren;
+ wire core_rw_y_bram_wren;
+
+ wire [31:0] core_ro_k_bram_dout;
+ wire [31:0] core_rw_x_bram_din;
+ wire [31:0] core_rw_y_bram_din;
+
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
)
- bram_k
- ( .clk(clk),
- .a_addr(bus_addr_lower), .a_out(user_rw_k_bram_out), .a_wr(bus_cs && bus_we && (bus_addr_upper == BUS_ADDR_BANK_K)), .a_in(bus_data_wr),
- .b_addr(core_ro_k_bram_addr), .b_out(core_ro_k_bram_dout)
- );
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
+ bram_k
+ ( .clk(clk),
+ .a_addr(bus_addr_lower), .a_out(user_rw_k_bram_out), .a_wr(bus_cs && bus_we && (bus_addr_upper == BUS_ADDR_BANK_K)), .a_in(bus_data_wr),
+ .b_addr(core_ro_k_bram_addr), .b_out(core_ro_k_bram_dout)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
)
- bram_x
- ( .clk(clk),
- .a_addr(core_rw_x_bram_addr), .a_out(), .a_wr(core_rw_x_bram_wren), .a_in(core_rw_x_bram_din),
- .b_addr(bus_addr_lower), .b_out(user_ro_x_bram_out)
- );
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
+ bram_x
+ ( .clk(clk),
+ .a_addr(core_rw_x_bram_addr), .a_out(), .a_wr(core_rw_x_bram_wren), .a_in(core_rw_x_bram_din),
+ .b_addr(bus_addr_lower), .b_out(user_ro_x_bram_out)
+ );
+
+ bram_1rw_1ro_readfirst #
+ ( .MEM_WIDTH(32), .MEM_ADDR_BITS(3)
)
- bram_y
- ( .clk(clk),
- .a_addr(core_rw_y_bram_addr), .a_out(), .a_wr(core_rw_y_bram_wren), .a_in(core_rw_y_bram_din),
- .b_addr(bus_addr_lower), .b_out(user_ro_y_bram_out)
- );
+ bram_y
+ ( .clk(clk),
+ .a_addr(core_rw_y_bram_addr), .a_out(), .a_wr(core_rw_y_bram_wren), .a_in(core_rw_y_bram_din),
+ .b_addr(bus_addr_lower), .b_out(user_ro_y_bram_out)
+ );
//
// Curve Base Point Multiplier
- //
- reg next_dly;
-
- always @(posedge clk) next_dly <= next;
-
- wire next_trig = next && !next_dly;
-
- curve_mul_256 base_point_multiplier_p256
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (next_trig),
- .rdy (valid),
-
- .k_addr (core_ro_k_bram_addr),
- .rx_addr (core_rw_x_bram_addr),
- .ry_addr (core_rw_y_bram_addr),
-
- .rx_wren (core_rw_x_bram_wren),
- .ry_wren (core_rw_y_bram_wren),
-
- .k_din (core_ro_k_bram_dout),
- .rx_dout (core_rw_x_bram_din),
- .ry_dout (core_rw_y_bram_din)
- );
+ //
+ reg next_dly;
+
+ always @(posedge clk) next_dly <= next;
- //
+ wire next_trig = next && !next_dly;
+
+ curve_mul_256 base_point_multiplier_p256
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (next_trig),
+ .rdy (valid),
+
+ .k_addr (core_ro_k_bram_addr),
+ .rx_addr (core_rw_x_bram_addr),
+ .ry_addr (core_rw_y_bram_addr),
+
+ .rx_wren (core_rw_x_bram_wren),
+ .ry_wren (core_rw_y_bram_wren),
+
+ .k_din (core_ro_k_bram_dout),
+ .rx_dout (core_rw_x_bram_din),
+ .ry_dout (core_rw_y_bram_din)
+ );
+
+ //
// Output Selector
//
- reg [1:0] bus_addr_upper_prev;
+ reg [1:0] bus_addr_upper_prev;
always @(posedge clk) bus_addr_upper_prev = bus_addr_upper;
- reg [31: 0] bus_data_rd_mux;
+ reg [31: 0] bus_data_rd_mux;
assign bus_data_rd = bus_data_rd_mux;
always @(*)
diff --git a/rtl/ecdsa256_wrapper.v b/rtl/ecdsa256_wrapper.v
index c6e93ea..db043d0 100644
--- a/rtl/ecdsa256_wrapper.v
+++ b/rtl/ecdsa256_wrapper.v
@@ -32,11 +32,11 @@
module ecdsa256_wrapper
(
- input wire clk,
- input wire rst_n,
+ input wire clk,
+ input wire reset_n,
- input wire cs,
- input wire we,
+ input wire cs,
+ input wire we,
input wire [5: 0] address,
input wire [31: 0] write_data,
@@ -47,18 +47,18 @@ module ecdsa256_wrapper
//
// Address Decoder
//
- localparam ADDR_MSB_REGS = 1'b0;
- localparam ADDR_MSB_CORE = 1'b1;
-
- wire [0:0] addr_msb = address[5];
- wire [4:0] addr_lsb = address[4:0];
+ localparam ADDR_MSB_REGS = 1'b0;
+ localparam ADDR_MSB_CORE = 1'b1;
+
+ wire [0:0] addr_msb = address[5];
+ wire [4:0] addr_lsb = address[4:0];
//
// Output Mux
//
- wire [31: 0] read_data_regs;
- wire [31: 0] read_data_core;
+ wire [31: 0] read_data_regs;
+ wire [31: 0] read_data_core;
//
@@ -69,13 +69,13 @@ module ecdsa256_wrapper
localparam ADDR_VERSION = 5'h02;
localparam ADDR_CONTROL = 5'h08; // {next, init}
- localparam ADDR_STATUS = 5'h09; // {valid, ready}
- localparam ADDR_DUMMY = 5'h0F; // don't care
+ localparam ADDR_STATUS = 5'h09; // {valid, ready}
+ localparam ADDR_DUMMY = 5'h0F; // don't care
-// localparam CONTROL_INIT_BIT = 0; -- not used
+ // localparam CONTROL_INIT_BIT = 0; -- not used
localparam CONTROL_NEXT_BIT = 1;
-// localparam STATUS_READY_BIT = 0; -- hardcoded to always read 1
+ // localparam STATUS_READY_BIT = 0; -- hardcoded to always read 1
localparam STATUS_VALID_BIT = 1;
localparam CORE_NAME0 = 32'h65636473; // "ecds"
@@ -86,23 +86,23 @@ module ecdsa256_wrapper
//
// Registers
//
- reg reg_control;
- reg [31:0] reg_dummy;
+ reg reg_control;
+ reg [31:0] reg_dummy;
//
// Wires
//
- wire reg_status;
+ wire reg_status;
//
// ECDSA256
//
ecdsa256 ecdsa256_inst
- (
- .clk (clk),
- .rst_n (rst_n),
+ (
+ .clk (clk),
+ .rst_n (reset_n),
.next (reg_control),
.valid (reg_status),
@@ -112,13 +112,13 @@ module ecdsa256_wrapper
.bus_addr (addr_lsb),
.bus_data_wr (write_data),
.bus_data_rd (read_data_core)
- );
+ );
//
// Read Latch
//
- reg [31: 0] tmp_read_data;
+ reg [31: 0] tmp_read_data;
//
@@ -126,7 +126,7 @@ module ecdsa256_wrapper
//
always @(posedge clk)
//
- if (!rst_n) begin
+ if (!reset_n) begin
//
reg_control <= 1'b0;
//
@@ -138,8 +138,8 @@ module ecdsa256_wrapper
//
case (addr_lsb)
//
- ADDR_CONTROL: reg_control <= write_data[1];
- ADDR_DUMMY: reg_dummy <= write_data[31:0];
+ ADDR_CONTROL: reg_control <= write_data[1];
+ ADDR_DUMMY: reg_dummy <= write_data[31:0];
//
endcase
//
@@ -153,8 +153,8 @@ module ecdsa256_wrapper
ADDR_NAME1: tmp_read_data <= CORE_NAME1;
ADDR_VERSION: tmp_read_data <= CORE_VERSION;
ADDR_CONTROL: tmp_read_data <= {{30{1'b0}}, reg_control, 1'b0};
- ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status, 1'b1};
- ADDR_DUMMY: tmp_read_data <= reg_dummy;
+ ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status, 1'b1};
+ ADDR_DUMMY: tmp_read_data <= reg_dummy;
//
default: tmp_read_data <= 32'h00000000;
//
diff --git a/rtl/lowlevel/adder32_wrapper.v b/rtl/lowlevel/adder32_wrapper.v
deleted file mode 100644
index ebfd8ce..0000000
--- a/rtl/lowlevel/adder32_wrapper.v
+++ /dev/null
@@ -1,73 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder32_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 32-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder32_wrapper
- (
- input clk, // clock
- input [31: 0] a, // operand input
- input [31: 0] b, // operand input
- output [31: 0] s, // sum output
- input c_in, // carry input
- output c_out // carry output
- );
-
- //
- // Include Primitive Selector
- //
-`include "ecdsa_lowlevel_settings.v"
-
-
- //
- // Instantiate Vendor/Generic Primitive
- //
- `ADDER32_PRIMITIVE adder32_inst
- (
- .clk(clk),
- .a(a),
- .b(b),
- .s(s),
- .c_in(c_in),
- .c_out(c_out)
- );
-
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/adder47_wrapper.v b/rtl/lowlevel/adder47_wrapper.v
deleted file mode 100644
index 1a0a18e..0000000
--- a/rtl/lowlevel/adder47_wrapper.v
+++ /dev/null
@@ -1,69 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder47_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 47-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder47_wrapper
- (
- input clk, // clock
- input [46: 0] a, // operand input
- input [46: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
- //
- // Include Primitive Selector
- //
-`include "ecdsa_lowlevel_settings.v"
-
-
- //
- // Instantiate Vendor/Generic Primitive
- //
- `ADDER47_PRIMITIVE adder47_inst
- (
- .clk(clk),
- .a(a),
- .b(b),
- .s(s)
- );
-
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/adder32_artix7.v b/rtl/lowlevel/artix7/adder32_artix7.v
deleted file mode 100644
index 5f9ba79..0000000
--- a/rtl/lowlevel/artix7/adder32_artix7.v
+++ /dev/null
@@ -1,96 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder32_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 32-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder32_artix7
- (
- input clk, // clock
- input [31: 0] a, // operand input
- input [31: 0] b, // operand input
- output [31: 0] s, // sum output
- input c_in, // carry input
- output c_out // carry output
- );
-
- //
- // Lower and higher parts of operand
- //
- wire [17: 0] bl = b[17: 0];
- wire [13: 0] bh = b[31:18];
-
-
- //
- // DSP48E1 Slice
- //
-
- /* Operation Mode */
- wire [ 3: 0] dsp48e1_alumode = 4'b0000;
- wire [ 6: 0] dsp48e1_opmode = 7'b0110011;
-
- /* Internal Product */
- wire [47: 0] p_int;
-
- dsp48e1_wrapper dsp_adder
- (
- .clk (clk),
-
- .ce (1'b1),
-
- .carry (c_in),
-
- .alumode (dsp48e1_alumode),
- .opmode (dsp48e1_opmode),
-
- .a ({{16{1'b0}}, bh}),
- .b (bl),
- .c ({{16{1'b0}}, a}),
-
- .p (p_int)
- );
-
- //
- // Output Mapping
- //
- assign s = p_int[31: 0];
- assign c_out = p_int[32];
-
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/adder47_artix7.v b/rtl/lowlevel/artix7/adder47_artix7.v
deleted file mode 100644
index 00566e4..0000000
--- a/rtl/lowlevel/artix7/adder47_artix7.v
+++ /dev/null
@@ -1,91 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder47_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 47-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder47_artix7
- (
- input clk, // clock
- input [46: 0] a, // operand input
- input [46: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
- //
- // Lower and higher parts of operand
- //
- wire [17: 0] bl = b[17: 0];
- wire [28: 0] bh = b[46:18];
-
- //
- // DSP48E1 Slice
- //
-
- /* Operation Mode */
- wire [ 3: 0] dsp48e1_alumode = 4'b0000;
- wire [ 6: 0] dsp48e1_opmode = 7'b0110011;
-
- /* Internal Product */
- wire [47: 0] p_int;
-
- dsp48e1_wrapper dsp_adder
- (
- .clk (clk),
-
- .ce (1'b1),
-
- .carry (1'b0),
-
- .alumode (dsp48e1_alumode),
- .opmode (dsp48e1_opmode),
-
- .a ({1'b0, bh}),
- .b (bl),
- .c ({1'b0, a}),
-
- .p (p_int)
- );
-
- //
- // Output Mapping
- //
- assign s = p_int[46: 0];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/dsp48e1_wrapper.v b/rtl/lowlevel/artix7/dsp48e1_wrapper.v
deleted file mode 100644
index 11a21bc..0000000
--- a/rtl/lowlevel/artix7/dsp48e1_wrapper.v
+++ /dev/null
@@ -1,159 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// dsp48e1_wrapper.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) tile wrapper.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module dsp48e1_wrapper
- (
- input clk,
-
- input ce,
-
- input [ 6: 0] opmode,
- input [ 3: 0] alumode,
-
- input carry,
-
- input [29: 0] a,
- input [17: 0] b,
- input [47: 0] c,
-
- output [47: 0] p
- );
-
-
- //
- // Tile instantiation
- //
- DSP48E1 #
- (
- .AREG (0),
- .BREG (0),
- .CREG (0),
- .DREG (0),
- .MREG (0),
- .PREG (1),
- .ADREG (0),
-
- .ACASCREG (0),
- .BCASCREG (0),
- .ALUMODEREG (0),
- .INMODEREG (0),
- .OPMODEREG (0),
- .CARRYINREG (0),
- .CARRYINSELREG (0),
-
- .A_INPUT ("DIRECT"),
- .B_INPUT ("DIRECT"),
-
- .USE_DPORT ("FALSE"),
- .USE_MULT ("DYNAMIC"),
- .USE_SIMD ("ONE48"),
-
- .USE_PATTERN_DETECT ("NO_PATDET"),
- .SEL_PATTERN ("PATTERN"),
- .SEL_MASK ("MASK"),
- .PATTERN (48'h000000000000),
- .MASK (48'h3fffffffffff),
- .AUTORESET_PATDET ("NO_RESET")
- )
- DSP48E1_inst
- (
- .CLK (clk),
-
- .RSTA (1'b0),
- .RSTB (1'b0),
- .RSTC (1'b0),
- .RSTD (1'b0),
- .RSTM (1'b0),
- .RSTP (1'b0),
-
- .RSTCTRL (1'b0),
- .RSTINMODE (1'b0),
- .RSTALUMODE (1'b0),
- .RSTALLCARRYIN (1'b0),
-
- .CEA1 (1'b0),
- .CEA2 (1'b0),
- .CEB1 (1'b0),
- .CEB2 (1'b0),
- .CEC (1'b0),
- .CED (1'b0),
- .CEM (1'b0),
- .CEP (ce),
- .CEAD (1'b0),
- .CEALUMODE (1'b0),
- .CEINMODE (1'b0),
-
- .CECTRL (1'b0),
- .CECARRYIN (1'b0),
-
- .A (a),
- .B (b),
- .C (c),
- .D ({25{1'b1}}),
- .P (p),
-
- .CARRYIN (carry),
- .CARRYOUT (),
- .CARRYINSEL (3'b000),
-
- .CARRYCASCIN (1'b0),
- .CARRYCASCOUT (),
-
- .PATTERNDETECT (),
- .PATTERNBDETECT (),
-
- .OPMODE (opmode),
- .ALUMODE (alumode),
- .INMODE (5'b00000),
-
- .MULTSIGNIN (1'b0),
- .MULTSIGNOUT (),
-
- .UNDERFLOW (),
- .OVERFLOW (),
-
- .ACIN (30'd0),
- .BCIN (18'd0),
- .PCIN (48'd0),
-
- .ACOUT (),
- .BCOUT (),
- .PCOUT ()
- );
-
-endmodule
diff --git a/rtl/lowlevel/artix7/mac16_artix7.v b/rtl/lowlevel/artix7/mac16_artix7.v
deleted file mode 100644
index 63b74ab..0000000
--- a/rtl/lowlevel/artix7/mac16_artix7.v
+++ /dev/null
@@ -1,90 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mac16_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 16-bit multiplier and 47-bit accumulator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mac16_artix7
- (
- input clk, // clock
- input clr, // clear accumulator (active-high)
- input ce, // enable clock (active-high)
- input [15: 0] a, // operand input
- input [15: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
-
- //
- // DSP48E1 Slice
- //
-
- /* Operation Mode */
- wire [ 3: 0] dsp48e1_alumode = 4'b0000;
- wire [ 6: 0] dsp48e1_opmode = {2'b01, clr, 4'b0101};
-
- /* Internal Product */
- wire [47: 0] p_int;
-
- dsp48e1_wrapper dsp_adder
- (
- .clk (clk),
-
- .ce (ce),
-
- .carry (1'b0),
-
- .alumode (dsp48e1_alumode),
- .opmode (dsp48e1_opmode),
-
- .a ({{14{1'b0}}, a}),
- .b ({{ 2{1'b0}}, b}),
- .c ({48{1'b0}}),
-
- .p (p_int)
- );
-
- //
- // Output Mapping
- //
- assign s = p_int[46:0];
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/artix7/subtractor32_artix7.v b/rtl/lowlevel/artix7/subtractor32_artix7.v
deleted file mode 100644
index b46ac5c..0000000
--- a/rtl/lowlevel/artix7/subtractor32_artix7.v
+++ /dev/null
@@ -1,94 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// subtractor32_artix7.v
-// -----------------------------------------------------------------------------
-// Hardware (Artix-7 DSP48E1) 32-bit subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module subtractor32_artix7
- (
- input clk,
- input [31: 0] a,
- input [31: 0] b,
- output [31: 0] d,
- input b_in,
- output b_out
- );
-
- //
- // Lower and higher parts of operand
- //
- wire [17: 0] bl = b[17: 0];
- wire [13: 0] bh = b[31:18];
-
- //
- // DSP48E1 Slice
- //
-
- /* Operation Mode */
- wire [ 3: 0] dsp48e1_alumode = 4'b0011;
- wire [ 6: 0] dsp48e1_opmode = 7'b0110011;
-
- /* Internal Product */
- wire [47: 0] p_int;
-
- dsp48e1_wrapper dsp_subtractor
- (
- .clk (clk),
-
- .ce (1'b1),
-
- .carry (b_in),
-
- .alumode (dsp48e1_alumode),
- .opmode (dsp48e1_opmode),
-
- .a ({{16{1'b0}}, bh}),
- .b (bl),
- .c ({{16{1'b0}}, a}),
-
- .p (p_int)
- );
-
- //
- // Output Mapping
- //
- assign d = p_int[31: 0];
- assign b_out = p_int[32];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/ecdsa_lowlevel_settings.v b/rtl/lowlevel/ecdsa_lowlevel_settings.v
deleted file mode 100644
index 8f95e2f..0000000
--- a/rtl/lowlevel/ecdsa_lowlevel_settings.v
+++ /dev/null
@@ -1,17 +0,0 @@
-`define USE_VENDOR_PRIMITIVES
-
-`ifdef USE_VENDOR_PRIMITIVES
-
-`define MAC16_PRIMITIVE mac16_artix7
-`define ADDER32_PRIMITIVE adder32_artix7
-`define ADDER47_PRIMITIVE adder47_artix7
-`define SUBTRACTOR32_PRIMITIVE subtractor32_artix7
-
-`else
-
-`define MAC16_PRIMITIVE mac16_generic
-`define ADDER32_PRIMITIVE adder32_generic
-`define ADDER47_PRIMITIVE adder47_generic
-`define SUBTRACTOR32_PRIMITIVE subtractor32_generic
-
-`endif
diff --git a/rtl/lowlevel/generic/adder32_generic.v b/rtl/lowlevel/generic/adder32_generic.v
deleted file mode 100644
index b9c94aa..0000000
--- a/rtl/lowlevel/generic/adder32_generic.v
+++ /dev/null
@@ -1,67 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder32_generic.v
-// -----------------------------------------------------------------------------
-// Generic 32-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder32_generic
- (
- input clk, // clock
- input [31: 0] a, // operand input
- input [31: 0] b, // operand input
- output [31: 0] s, // sum output
- input c_in, // carry input
- output c_out // carry output
- );
-
- //
- // Sum
- //
- reg [32: 0] s_int;
-
- always @(posedge clk)
- s_int <= {1'b0, a} + {1'b0, b} + {{32{1'b0}}, c_in};
-
- //
- // Output
- //
- assign s = s_int[31:0];
- assign c_out = s_int[32];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/generic/adder47_generic.v b/rtl/lowlevel/generic/adder47_generic.v
deleted file mode 100644
index f472061..0000000
--- a/rtl/lowlevel/generic/adder47_generic.v
+++ /dev/null
@@ -1,64 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// adder47_generic.v
-// -----------------------------------------------------------------------------
-// Generic 47-bit adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module adder47_generic
- (
- input clk, // clock
- input [46: 0] a, // operand input
- input [46: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
- //
- // Sum
- //
- reg [46: 0] s_int;
-
- always @(posedge clk)
- s_int <= a + b;
-
- //
- // Output
- //
- assign s = s_int;
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/generic/mac16_generic.v b/rtl/lowlevel/generic/mac16_generic.v
deleted file mode 100644
index dc95645..0000000
--- a/rtl/lowlevel/generic/mac16_generic.v
+++ /dev/null
@@ -1,74 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mac16_generic.v
-// -----------------------------------------------------------------------------
-// Generic 16-bit multiplier and 47-bit accumulator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mac16_generic
- (
- input clk, // clock
- input clr, // clear accumulator (active-high)
- input ce, // enable clock (active-high)
- input [15: 0] a, // operand input
- input [15: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
- //
- // Multiplier
- //
- wire [31: 0] p = {{16{1'b0}}, a} * {{16{1'b0}}, b};
- wire [46: 0] p_ext = {{15{1'b0}}, p};
-
- //
- // Accumulator
- //
- reg [46: 0] s_int;
-
- always @(posedge clk)
- //
- if (ce) s_int <= clr ? p_ext : p_ext + s_int;
-
- //
- // Output
- //
- assign s = s_int;
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/generic/subtractor32_generic.v b/rtl/lowlevel/generic/subtractor32_generic.v
deleted file mode 100644
index 46aefe8..0000000
--- a/rtl/lowlevel/generic/subtractor32_generic.v
+++ /dev/null
@@ -1,67 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// subtractor32_generic.v
-// -----------------------------------------------------------------------------
-// Generic 32-bit subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module subtractor32_generic
- (
- input clk,
- input [31: 0] a,
- input [31: 0] b,
- output [31: 0] d,
- input b_in,
- output b_out
- );
-
- //
- // Difference
- //
- reg [32: 0] d_int;
-
- always @(posedge clk)
- d_int <= {1'b0, a} - {1'b0, b} - {{32{1'b0}}, b_in};
-
- //
- // Output
- //
- assign d = d_int[31:0];
- assign b_out = d_int[32];
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/mac16_wrapper.v b/rtl/lowlevel/mac16_wrapper.v
deleted file mode 100644
index b91e518..0000000
--- a/rtl/lowlevel/mac16_wrapper.v
+++ /dev/null
@@ -1,75 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mac16_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 16-bit multiplier and 48-bit accumulator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mac16_wrapper
- (
- input clk, // clock
- input clr, // clear accumulator (active-high)
- input ce, // enable clock (active-high)
- input [15: 0] a, // operand input
- input [15: 0] b, // operand input
- output [46: 0] s // sum output
- );
-
-
- //
- // Include Primitive Selector
- //
-`include "ecdsa_lowlevel_settings.v"
-
-
- //
- // Instantiate Vendor/Generic Primitive
- //
- `MAC16_PRIMITIVE mac16_inst
- (
- .clk(clk),
- .clr(clr),
- .ce(ce),
- .a(a),
- .b(b),
- .s(s)
- );
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/lowlevel/subtractor32_wrapper.v b/rtl/lowlevel/subtractor32_wrapper.v
deleted file mode 100644
index 3c7e5e9..0000000
--- a/rtl/lowlevel/subtractor32_wrapper.v
+++ /dev/null
@@ -1,72 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// subtractor32_wrapper.v
-// -----------------------------------------------------------------------------
-// Wrapper for 32-bit subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module subtractor32_wrapper
- (
- input clk,
- input [31: 0] a,
- input [31: 0] b,
- output [31: 0] d,
- input b_in,
- output b_out
- );
-
- //
- // Include Primitive Selector
- //
-`include "ecdsa_lowlevel_settings.v"
-
-
- //
- // Instantiate Vendor/Generic Primitive
- //
- `SUBTRACTOR32_PRIMITIVE subtractor32_inst
- (
- .clk(clk),
- .a(a),
- .b(b),
- .d(d),
- .b_in(b_in),
- .b_out(b_out)
- );
-
-endmodule
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_adder.v b/rtl/modular/modular_adder.v
deleted file mode 100644
index 5641feb..0000000
--- a/rtl/modular/modular_adder.v
+++ /dev/null
@@ -1,298 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// modular_adder.v
-// -----------------------------------------------------------------------------
-// Modular adder.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_adder
- (
- clk, rst_n,
- ena, rdy,
- ab_addr, n_addr, s_addr, s_wren,
- a_din, b_din, n_din, s_dout
- );
-
-
- //
- // Parameters
- //
- parameter OPERAND_NUM_WORDS = 8;
- parameter WORD_COUNTER_WIDTH = 3;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
- WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words
- output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
- output wire [WORD_COUNTER_WIDTH-1:0] s_addr; // index of current S word
- output wire s_wren; // store current S word now
-
- input wire [ 31:0] a_din; // A
- input wire [ 31:0] b_din; // B
- input wire [ 31:0] n_din; // N
- output wire [ 31:0] s_dout; // S = (A + B) mod N
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH-1:0] index_ab;
- reg [WORD_COUNTER_WIDTH-1:0] index_n;
- reg [WORD_COUNTER_WIDTH-1:0] index_s;
-
- /* map registers to output ports */
- assign ab_addr = index_ab;
- assign n_addr = index_n;
- assign s_addr = index_s;
-
-
- //
- // Adder
- //
- wire [31: 0] add32_s;
- wire add32_c_in;
- wire add32_c_out;
-
- adder32_wrapper adder32
- (
- .clk (clk),
- .a (a_din),
- .b (b_din),
- .s (add32_s),
- .c_in (add32_c_in),
- .c_out (add32_c_out)
- );
-
-
- //
- // Subtractor
- //
- wire [31: 0] sub32_d;
- wire sub32_b_in;
- wire sub32_b_out;
-
- subtractor32_wrapper subtractor32
- (
- .clk (clk),
- .a (add32_s),
- .b (n_din),
- .d (sub32_d),
- .b_in (sub32_b_in),
- .b_out (sub32_b_out)
- );
-
-
- //
- // FSM
- //
-
- localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_sum_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_s = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)];
-
- wire fsm_latch_msb_carry = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
- wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
-
- wire inc_index_ab = |fsm_shreg_inc_index_ab;
- wire inc_index_n = |fsm_shreg_inc_index_n;
- wire store_sum_ab = |fsm_shreg_store_sum_ab;
- wire store_sum_ab_n = |fsm_shreg_store_sum_ab_n;
- wire store_data_s = |fsm_shreg_store_data_s;
- wire inc_index_s = |fsm_shreg_inc_index_s;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
-
-
-
-
-
- //
- // Carry & Borrow Masking Logic
- //
- reg add32_c_mask;
- reg sub32_b_mask;
-
- always @(posedge clk) begin
- //
- add32_c_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
- sub32_b_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
- //
- end
-
- assign add32_c_in = add32_c_out & ~add32_c_mask;
- assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
-
-
- //
- // Carry & Borrow Latch Logic
- //
- reg add32_carry_latch;
- reg sub32_borrow_latch;
-
- always @(posedge clk) begin
- //
- if (fsm_latch_msb_carry) add32_carry_latch <= add32_c_out;
- if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out;
- //
- end
-
-
- //
- // Intermediate Results
- //
- reg [32*OPERAND_NUM_WORDS-1:0] s_ab;
- reg [32*OPERAND_NUM_WORDS-1:0] s_ab_n;
-
- always @(posedge clk)
- //
- if (store_data_s) begin
- //
- s_ab <= {{32{1'bX}}, s_ab[32*OPERAND_NUM_WORDS-1:32]};
- s_ab_n <= {{32{1'bX}}, s_ab_n[32*OPERAND_NUM_WORDS-1:32]};
- //
- end else begin
- //
- if (store_sum_ab) s_ab <= {add32_s, s_ab[32*OPERAND_NUM_WORDS-1:32]};
- if (store_sum_ab_n) s_ab_n <= {sub32_d, s_ab_n[32*OPERAND_NUM_WORDS-1:32]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- always @(posedge clk)
- //
- if (rdy) begin
- //
- index_ab <= WORD_INDEX_ZERO;
- index_n <= WORD_INDEX_ZERO;
- index_s <= WORD_INDEX_ZERO;
- //
- end else begin
- //
- if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab);
- if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n);
- if (inc_index_s) index_s <= WORD_INDEX_NEXT_OR_ZERO(index_s);
- //
- end
-
-
- //
- // Output Sum Selector
- //
- wire mux_select_ab = sub32_borrow_latch && !add32_carry_latch;
-
-
- //
- // Output Data and Write Enable Logic
- //
- reg s_wren_reg;
- reg [31: 0] s_dout_reg;
- wire [31: 0] s_dout_mux = mux_select_ab ? s_ab[31:0] : s_ab_n[31:0];
-
- assign s_wren = s_wren_reg;
- assign s_dout = s_dout_reg;
-
- always @(posedge clk)
- //
- if (rdy) begin
- //
- s_wren_reg <= 1'b0;
- s_dout_reg <= {32{1'bX}};
- //
- end else begin
- //
- s_wren_reg <= store_data_s;
- s_dout_reg <= store_data_s ? s_dout_mux : {32{1'bX}};
- //
- end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_copy.v b/rtl/modular/modular_invertor/helper/modinv_helper_copy.v
deleted file mode 100644
index 07c1b4f..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_copy.v
+++ /dev/null
@@ -1,148 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_copy
- (
- clk, rst_n,
- ena, rdy,
- s_addr, s_din,
- a1_addr, a1_wren, a1_dout
- );
-
-
- //
- // Parameters
- //
- parameter OPERAND_NUM_WORDS = 8;
- parameter OPERAND_ADDR_BITS = 3;
-
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 2;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
-
- input wire ena;
- output wire rdy;
-
- output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [OPERAND_ADDR_BITS-1:0] a1_addr;
-
- output wire a1_wren;
-
- input wire [ 31:0] s_din;
-
- output wire [ 31:0] a1_dout;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [OPERAND_ADDR_BITS-1:0] addr_s;
-
- wire [OPERAND_ADDR_BITS-1:0] addr_s_max = OPERAND_NUM_WORDS - 1;
- wire [OPERAND_ADDR_BITS-1:0] addr_s_zero = {OPERAND_ADDR_BITS{1'b0}};
- wire [OPERAND_ADDR_BITS-1:0] addr_s_next = (addr_s < addr_s_max) ?
- addr_s + 1'b1 : addr_s_zero;
-
- reg [OPERAND_ADDR_BITS-1:0] addr_a1;
-
- wire [OPERAND_ADDR_BITS-1:0] addr_a1_max = OPERAND_NUM_WORDS - 1;
- wire [OPERAND_ADDR_BITS-1:0] addr_a1_zero = {OPERAND_ADDR_BITS{1'b0}};
- wire [OPERAND_ADDR_BITS-1:0] addr_a1_next = (addr_a1 < addr_a1_max) ?
- addr_a1 + 1'b1 : addr_a1_zero;
-
- assign s_addr = {{(BUFFER_ADDR_BITS - OPERAND_ADDR_BITS){1'b0}}, addr_s};
- assign a1_addr = addr_a1;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment Logic
- //
- wire inc_addr_s;
- wire inc_addr_a1;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_start = 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_s_stop = OPERAND_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_a1_stop = OPERAND_NUM_WORDS + 1;
-
- assign inc_addr_s = (proc_cnt >= cnt_inc_addr_s_start) && (proc_cnt <= cnt_inc_addr_s_stop);
- assign inc_addr_a1 = (proc_cnt >= cnt_inc_addr_a1_start) && (proc_cnt <= cnt_inc_addr_a1_stop);
-
- always @(posedge clk) begin
- //
- if (inc_addr_s) addr_s <= addr_s_next;
- else addr_s <= addr_s_zero;
- //
- if (inc_addr_a1) addr_a1 <= addr_a1_next;
- else addr_a1 <= addr_a1_zero;
- //
- end
-
-
- //
- // Write Enable Logic
- //
- wire wren_a1;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_a1_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_wren_a1_stop = OPERAND_NUM_WORDS + 1;
-
- assign wren_a1 = (proc_cnt >= cnt_wren_a1_start) && (proc_cnt <= cnt_wren_a1_stop);
-
- assign a1_wren = wren_a1;
-
-
- //
- // Data Logic
- //
- assign a1_dout = s_din;
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_init.v b/rtl/modular/modular_invertor/helper/modinv_helper_init.v
deleted file mode 100644
index 0468134..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_init.v
+++ /dev/null
@@ -1,172 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_init
- (
- clk, rst_n,
- ena, rdy,
- a_addr, a_din,
- q_addr, q_din,
- r_addr, r_wren, r_dout,
- s_addr, s_wren, s_dout,
- u_addr, u_wren, u_dout,
- v_addr, v_wren, v_dout
- );
-
-
- //
- // Parameters
- //
- parameter OPERAND_NUM_WORDS = 8;
- parameter OPERAND_ADDR_BITS = 3;
-
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = OPERAND_NUM_WORDS + 3;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- output wire [OPERAND_ADDR_BITS-1:0] a_addr;
- output wire [OPERAND_ADDR_BITS-1:0] q_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] r_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] v_addr;
-
- output wire r_wren;
- output wire s_wren;
- output wire u_wren;
- output wire v_wren;
-
- input wire [ 31:0] a_din;
- input wire [ 31:0] q_din;
- output wire [ 31:0] r_dout;
- output wire [ 31:0] s_dout;
- output wire [ 31:0] u_dout;
- output wire [ 31:0] v_dout;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [OPERAND_ADDR_BITS-1:0] addr_aq;
-
- wire [OPERAND_ADDR_BITS-1:0] addr_aq_max = OPERAND_NUM_WORDS - 1;
- wire [OPERAND_ADDR_BITS-1:0] addr_aq_zero = {OPERAND_ADDR_BITS{1'b0}};
- wire [OPERAND_ADDR_BITS-1:0] addr_aq_next = (addr_aq < addr_aq_max) ?
- addr_aq + 1'b1 : addr_aq_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_rsuv;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_max = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_rsuv_next = (addr_rsuv < addr_rsuv_max) ?
- addr_rsuv + 1'b1 : addr_rsuv_zero;
-
- assign a_addr = addr_aq;
- assign q_addr = addr_aq;
-
- assign r_addr = addr_rsuv;
- assign s_addr = addr_rsuv;
- assign u_addr = addr_rsuv;
- assign v_addr = addr_rsuv;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment Logic
- //
- wire inc_addr_aq;
- wire inc_addr_rsuv;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_start = 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_aq_stop = OPERAND_NUM_WORDS;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_rsuv_stop = BUFFER_NUM_WORDS + 1;
-
- assign inc_addr_aq = (proc_cnt >= cnt_inc_addr_aq_start) && (proc_cnt <= cnt_inc_addr_aq_stop);
- assign inc_addr_rsuv = (proc_cnt >= cnt_inc_addr_rsuv_start) && (proc_cnt <= cnt_inc_addr_rsuv_stop);
-
- always @(posedge clk) begin
- //
- if (inc_addr_aq) addr_aq <= addr_aq_next;
- else addr_aq <= addr_aq_zero;
- //
- if (inc_addr_rsuv) addr_rsuv <= addr_rsuv_next;
- else addr_rsuv <= addr_rsuv_zero;
- //
- end
-
-
- //
- // Write Enable Logic
- //
- wire wren_rsuv;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_wren_rsuv_stop = BUFFER_NUM_WORDS + 1;
-
- assign wren_rsuv = (proc_cnt >= cnt_wren_rsuv_start) && (proc_cnt <= cnt_wren_rsuv_stop);
-
- assign r_wren = wren_rsuv;
- assign s_wren = wren_rsuv;
- assign u_wren = wren_rsuv;
- assign v_wren = wren_rsuv;
-
-
- //
- // Data Logic
- //
- assign r_dout = 32'd0;
- assign s_dout = (proc_cnt == cnt_wren_rsuv_start) ? 32'd1 : 32'd0;
- assign u_dout = (proc_cnt != cnt_wren_rsuv_stop) ? q_din : 32'd0;
- assign v_dout = (proc_cnt != cnt_wren_rsuv_stop) ? a_din : 32'd0;
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v
deleted file mode 100644
index 6b65eb1..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_compare.v
+++ /dev/null
@@ -1,286 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_invert_compare
- (
- clk, rst_n,
- ena, rdy,
-
- u_addr, u_din,
- v_addr, v_din,
-
- u_gt_v, v_eq_1,
- u_is_even, v_is_even
- );
-
-
- //
- // Parameters
- //
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = 1 * BUFFER_NUM_WORDS + 10;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- output wire [BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_addr;
-
- input wire [ 32-1:0] u_din;
- input wire [ 32-1:0] v_din;
-
- output wire u_gt_v;
- output wire v_eq_1;
- output wire u_is_even;
- output wire v_is_even;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [BUFFER_ADDR_BITS-1:0] addr_in;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ?
- addr_in - 1'b1 : addr_in_last;
-
- assign u_addr = addr_in;
- assign v_addr = addr_in;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Decrement Logic
- //
- wire dec_addr_in;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 0 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 1 * BUFFER_NUM_WORDS + 0;
-
- assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop);
-
- always @(posedge clk)
- //
- if (rdy) addr_in <= addr_in_last;
- else if (dec_addr_in) addr_in <= addr_in_prev;
-
-
- //
- // Comparison Stage Flags
- //
- wire calc_leg;
- wire calc_leg_final;
- wire calc_parity;
-
- wire [PROC_CNT_BITS-1:0] cnt_calc_leg_start = 0 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_calc_leg_stop = 1 * BUFFER_NUM_WORDS + 2;
- wire [PROC_CNT_BITS-1:0] cnt_calc_parity = 1 * BUFFER_NUM_WORDS + 1;
-
- assign calc_leg = (proc_cnt >= cnt_calc_leg_start) && (proc_cnt <= cnt_calc_leg_stop);
- assign calc_leg_final = (proc_cnt == cnt_calc_leg_stop);
- assign calc_parity = (proc_cnt == cnt_calc_parity);
-
-
- //
- // Dummy Input
- //
- reg sub32_din_1_lsb;
- wire [31: 0] sub32_din_1 = {{31{1'b0}}, sub32_din_1_lsb};
-
- always @(posedge clk)
- //
- sub32_din_1_lsb <= (addr_in == addr_in_zero) ? 1'b1 : 1'b0;
-
-
- //
- // Subtractor (u - v)
- //
- wire [31: 0] sub32_u_minus_v_difference_out;
- wire sub32_u_minus_v_borrow_in;
- wire sub32_u_minus_v_borrow_out;
-
- subtractor32_wrapper sub32_u_minus_v
- (
- .clk (clk),
- .a (u_din),
- .b (v_din),
- .d (sub32_u_minus_v_difference_out),
- .b_in (sub32_u_minus_v_borrow_in),
- .b_out (sub32_u_minus_v_borrow_out)
- );
-
-
- //
- // Subtractor (v - 1)
- //
- wire [31: 0] sub32_v_minus_1_difference_out;
- wire sub32_v_minus_1_borrow_in;
- wire sub32_v_minus_1_borrow_out;
-
- subtractor32_wrapper sub32_v_minus_1
- (
- .clk (clk),
- .a (v_din),
- .b (sub32_din_1),
- .d (sub32_v_minus_1_difference_out),
- .b_in (sub32_v_minus_1_borrow_in),
- .b_out (sub32_v_minus_1_borrow_out)
- );
-
-
-
- //
- // Borrow Masking Logic
- //
- reg mask_borrow;
-
- always @(posedge clk)
- //
- mask_borrow <= ((proc_cnt > cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop)) ?
- 1'b0 : 1'b1;
-
- assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_borrow;
- assign sub32_v_minus_1_borrow_in = sub32_v_minus_1_borrow_out & ~mask_borrow;
-
-
- //
- // Comparison Logic
- //
- reg cmp_u_v_l;
- reg cmp_u_v_e;
- reg cmp_u_v_g;
-
- reg cmp_v_1_l;
- reg cmp_v_1_e;
- reg cmp_v_1_g;
-
- wire cmp_unresolved_u_v = !(cmp_u_v_l || cmp_u_v_g);
- wire cmp_unresolved_v_1 = !(cmp_v_1_l || cmp_v_1_g);
-
- wire cmp_u_v_borrow_is_set = (sub32_u_minus_v_borrow_out == 1'b1) ? 1'b1 : 1'b0;
- wire cmp_u_v_difference_is_nonzero = (sub32_u_minus_v_difference_out != 32'd0) ? 1'b1 : 1'b0;
-
- wire cmp_v_1_borrow_is_set = (sub32_v_minus_1_borrow_out == 1'b1) ? 1'b1 : 1'b0;
- wire cmp_v_1_difference_is_nonzero = (sub32_v_minus_1_difference_out != 32'd0) ? 1'b1 : 1'b0;
-
- reg u_is_even_reg;
- reg v_is_even_reg;
-
- always @(posedge clk)
- //
- if (rdy) begin
- //
- if (ena) begin
- //
- cmp_u_v_l <= 1'b0;
- cmp_u_v_e <= 1'b0;
- cmp_u_v_g <= 1'b0;
- //
- cmp_v_1_l <= 1'b0;
- cmp_v_1_e <= 1'b0;
- cmp_v_1_g <= 1'b0;
- //
- u_is_even_reg <= 1'bX;
- v_is_even_reg <= 1'bX;
- //
- end
- //
- end else begin
- //
- // parity
- //
- if (calc_parity) begin
- u_is_even_reg <= ~u_din[0];
- v_is_even_reg <= ~v_din[0];
- end
- //
- // u <> v
- //
- if (cmp_unresolved_u_v && calc_leg) begin
- //
- if (cmp_u_v_borrow_is_set)
- cmp_u_v_l <= 1'b1;
- //
- if (!cmp_u_v_borrow_is_set && cmp_u_v_difference_is_nonzero)
- cmp_u_v_g <= 1'b1;
- //
- if (!cmp_u_v_borrow_is_set && !cmp_u_v_difference_is_nonzero && calc_leg_final)
- cmp_u_v_e <= 1'b1;
- //
- end
- //
- // v <> 1
- //
- if (cmp_unresolved_v_1 && calc_leg) begin
- //
- if (cmp_v_1_borrow_is_set)
- cmp_v_1_l <= 1'b1;
- //
- if (!cmp_v_1_borrow_is_set && cmp_v_1_difference_is_nonzero)
- cmp_v_1_g <= 1'b1;
- //
- if (!cmp_v_1_borrow_is_set && !cmp_v_1_difference_is_nonzero && calc_leg_final)
- cmp_v_1_e <= 1'b1;
- //
- end
- //
- end
-
-
- //
- // Output Flags
- //
- assign u_gt_v = !cmp_u_v_l && !cmp_u_v_e && cmp_u_v_g;
- assign v_eq_1 = !cmp_v_1_l && cmp_v_1_e && !cmp_v_1_g;
-
- assign u_is_even = u_is_even_reg;
- assign v_is_even = v_is_even_reg;
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v
deleted file mode 100644
index ab15563..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_precalc.v
+++ /dev/null
@@ -1,408 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_invert_precalc
- (
- clk, rst_n,
- ena, rdy,
-
- r_addr, r_din,
- s_addr, s_din,
- u_addr, u_din,
- v_addr, v_din,
-
- r_dbl_addr, r_dbl_wren, r_dbl_dout,
- s_dbl_addr, s_dbl_wren, s_dbl_dout,
- r_plus_s_addr, r_plus_s_wren, r_plus_s_dout,
- u_half_addr, u_half_wren, u_half_dout,
- v_half_addr, v_half_wren, v_half_dout,
- u_minus_v_addr, u_minus_v_wren, u_minus_v_dout, u_minus_v_din,
- v_minus_u_addr, v_minus_u_wren, v_minus_u_dout, v_minus_u_din,
- u_minus_v_half_addr, u_minus_v_half_wren, u_minus_v_half_dout,
- v_minus_u_half_addr, v_minus_u_half_wren, v_minus_u_half_dout
- );
-
-
- //
- // Parameters
- //
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- output wire [BUFFER_ADDR_BITS-1:0] r_addr;
- output wire [BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_addr;
-
- input wire [ 32-1:0] r_din;
- input wire [ 32-1:0] s_din;
- input wire [ 32-1:0] u_din;
- input wire [ 32-1:0] v_din;
-
- output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr;
- output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr;
- output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr;
-
- output wire [ 32-1:0] r_dbl_dout;
- output wire [ 32-1:0] s_dbl_dout;
- output wire [ 32-1:0] r_plus_s_dout;
- output wire [ 32-1:0] u_half_dout;
- output wire [ 32-1:0] v_half_dout;
- output wire [ 32-1:0] u_minus_v_dout;
- output wire [ 32-1:0] v_minus_u_dout;
- output wire [ 32-1:0] u_minus_v_half_dout;
- output wire [ 32-1:0] v_minus_u_half_dout;
-
- output wire r_dbl_wren;
- output wire s_dbl_wren;
- output wire r_plus_s_wren;
- output wire u_half_wren;
- output wire v_half_wren;
- output wire u_minus_v_wren;
- output wire v_minus_u_wren;
- output wire u_minus_v_half_wren;
- output wire v_minus_u_half_wren;
-
- input wire [ 32-1:0] u_minus_v_din;
- input wire [ 32-1:0] v_minus_u_din;
-
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [BUFFER_ADDR_BITS-1:0] addr_in;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_in_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_last) ?
- addr_in + 1'b1 : addr_in_zero;
- wire [BUFFER_ADDR_BITS-1:0] addr_in_prev = (addr_in > addr_in_zero) ?
- addr_in - 1'b1 : addr_in_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out1;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ?
- addr_out1 + 1'b1 : addr_out1_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out2;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_next = (addr_out2 < addr_out2_last) ?
- addr_out2 + 1'b1 : addr_out2_zero;
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ?
- addr_out2 - 1'b1 : addr_out2_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out3;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ?
- addr_out3 - 1'b1 : addr_out3_last;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out4;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out4_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out4_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out4_prev = (addr_out4 > addr_out4_zero) ?
- addr_out4 - 1'b1 : addr_out4_last;
-
-
- assign r_addr = addr_in;
- assign s_addr = addr_in;
- assign u_addr = addr_in;
- assign v_addr = addr_in;
-
- assign r_dbl_addr = addr_out1;
- assign s_dbl_addr = addr_out1;
- assign r_plus_s_addr = addr_out2;
- assign u_half_addr = addr_out3;
- assign v_half_addr = addr_out3;
- assign u_minus_v_addr = addr_out2;
- assign v_minus_u_addr = addr_out2;
- assign u_minus_v_half_addr = addr_out4;
- assign v_minus_u_half_addr = addr_out4;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment/Decrement Logic
- //
- wire inc_addr_in;
- wire dec_addr_in;
- wire inc_addr_out1;
- wire inc_addr_out2;
- wire dec_addr_out2;
- wire dec_addr_out3;
- wire dec_addr_out4;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 0 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = 1 * BUFFER_NUM_WORDS - 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 2;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_start = 0 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out2_stop = 1 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_start = 1 * BUFFER_NUM_WORDS + 0;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_in_stop = 2 * BUFFER_NUM_WORDS - 2;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_start = 1 * BUFFER_NUM_WORDS + 4;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out4_stop = 2 * BUFFER_NUM_WORDS + 3;
-
- assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
- assign dec_addr_in = (proc_cnt >= cnt_dec_addr_in_start) && (proc_cnt <= cnt_dec_addr_in_stop);
- assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
- assign inc_addr_out2 = (proc_cnt >= cnt_inc_addr_out2_start) && (proc_cnt <= cnt_inc_addr_out2_stop);
- assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
- assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
- assign dec_addr_out4 = (proc_cnt >= cnt_dec_addr_out4_start) && (proc_cnt <= cnt_dec_addr_out4_stop);
-
-
- always @(posedge clk) begin
- //
- if (rdy) begin
- //
- addr_in <= addr_in_zero;
- addr_out1 <= addr_out1_zero;
- addr_out2 <= addr_out2_zero;
- addr_out3 <= addr_out3_last;
- addr_out4 <= addr_out4_last;
- //
- end else begin
- //
- if (inc_addr_in) addr_in <= addr_in_next;
- else if (dec_addr_in) addr_in <= addr_in_prev;
- //
- if (inc_addr_out1) addr_out1 <= addr_out1_next;
- else addr_out1 <= addr_out1_zero;
- //
- if (inc_addr_out2) addr_out2 <= addr_out2_next;
- else if (dec_addr_out2) addr_out2 <= addr_out2_prev;
- //
- if (dec_addr_out3) addr_out3 <= addr_out3_prev;
- else addr_out3 <= addr_out3_last;
- //
- if (dec_addr_out4) addr_out4 <= addr_out4_prev;
- else addr_out4 <= addr_out4_last;
- //
- end
- //
- end
-
-
- //
- // Write Enable Logic
- //
- wire wren_out1;
- wire wren_out2;
- wire wren_out3;
- wire wren_out4;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 2;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 0 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 1 * BUFFER_NUM_WORDS + 2;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out4_start = 1 * BUFFER_NUM_WORDS + 4;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out4_stop = 2 * BUFFER_NUM_WORDS + 3;
-
- assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
- assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
- assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
- assign wren_out4 = (proc_cnt >= cnt_wren_out4_start) && (proc_cnt <= cnt_wren_out4_stop);
-
- assign r_dbl_wren = wren_out1;
- assign s_dbl_wren = wren_out1;
- assign r_plus_s_wren = wren_out2;
- assign u_half_wren = wren_out3;
- assign v_half_wren = wren_out3;
- assign u_minus_v_wren = wren_out2;
- assign v_minus_u_wren = wren_out2;
- assign u_minus_v_half_wren = wren_out4;
- assign v_minus_u_half_wren = wren_out4;
-
-
- //
- // Adder (r + s)
- //
- wire [31: 0] add32_r_plus_s_sum_out;
- wire add32_r_plus_s_carry_in;
- wire add32_r_plus_s_carry_out;
-
- adder32_wrapper add32_r_plus_s
- (
- .clk (clk),
- .a (r_din),
- .b (s_din),
- .s (add32_r_plus_s_sum_out),
- .c_in (add32_r_plus_s_carry_in),
- .c_out (add32_r_plus_s_carry_out)
- );
-
- //
- // Subtractor (u - v)
- //
- wire [31: 0] sub32_u_minus_v_difference_out;
- wire sub32_u_minus_v_borrow_in;
- wire sub32_u_minus_v_borrow_out;
-
- subtractor32_wrapper sub32_u_minus_v
- (
- .clk (clk),
- .a (u_din),
- .b (v_din),
- .d (sub32_u_minus_v_difference_out),
- .b_in (sub32_u_minus_v_borrow_in),
- .b_out (sub32_u_minus_v_borrow_out)
- );
-
- //
- // Subtractor (v - u)
- //
- wire [31: 0] sub32_v_minus_u_difference_out;
- wire sub32_v_minus_u_borrow_in;
- wire sub32_v_minus_u_borrow_out;
-
- subtractor32_wrapper sub32_v_minus_u
- (
- .clk (clk),
- .a (v_din),
- .b (u_din),
- .d (sub32_v_minus_u_difference_out),
- .b_in (sub32_v_minus_u_borrow_in),
- .b_out (sub32_v_minus_u_borrow_out)
- );
-
-
- //
- // Carry & Borrow Masking Logic
- //
- reg mask_carry_borrow;
-
- always @(posedge clk)
- //
- mask_carry_borrow <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
- 1'b0 : 1'b1;
-
- assign add32_r_plus_s_carry_in = add32_r_plus_s_carry_out & ~mask_carry_borrow;
- assign sub32_u_minus_v_borrow_in = sub32_u_minus_v_borrow_out & ~mask_carry_borrow;
- assign sub32_v_minus_u_borrow_in = sub32_v_minus_u_borrow_out & ~mask_carry_borrow;
-
-
- //
- // Carry Bits
- //
- reg r_dbl_carry;
- reg s_dbl_carry;
- reg u_half_carry;
- reg v_half_carry;
- reg u_minus_v_half_carry;
- reg v_minus_u_half_carry;
-
- always @(posedge clk) begin
-
- r_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
- r_din[31] : 1'b0;
-
- s_dbl_carry <= ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ?
- s_din[31] : 1'b0;
-
- u_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
- u_din[0] : 1'b0;
-
- v_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
- v_din[0] : 1'b0;
-
- u_minus_v_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
- u_minus_v_din[0] : 1'b0;
-
- v_minus_u_half_carry <= ((proc_cnt >= cnt_wren_out4_start) && (proc_cnt < cnt_wren_out4_stop)) ?
- v_minus_u_din[0] : 1'b0;
-
- end
-
-
- //
- // Data Mapper
- //
- assign r_dbl_dout = {r_din[30:0], r_dbl_carry};
- assign s_dbl_dout = {s_din[30:0], s_dbl_carry};
- assign r_plus_s_dout = add32_r_plus_s_sum_out;
- assign u_half_dout = {u_half_carry, u_din[31:1]};
- assign v_half_dout = {v_half_carry, v_din[31:1]};
- assign u_minus_v_dout = sub32_u_minus_v_difference_out;
- assign v_minus_u_dout = sub32_v_minus_u_difference_out;
- assign u_minus_v_half_dout = {u_minus_v_half_carry, u_minus_v_din[31:1]};
- assign v_minus_u_half_dout = {v_minus_u_half_carry, v_minus_u_din[31:1]};
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v
deleted file mode 100644
index 0cd6ac5..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_invert_update.v
+++ /dev/null
@@ -1,257 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_invert_update
- (
- clk, rst_n,
- ena, rdy,
-
- u_gt_v, v_eq_1,
- u_is_even, v_is_even,
-
- r_addr, r_wren, r_dout,
- s_addr, s_wren, s_dout,
- u_addr, u_wren, u_dout,
- v_addr, v_wren, v_dout,
-
- r_dbl_addr, r_dbl_din,
- s_dbl_addr, s_dbl_din,
- r_plus_s_addr, r_plus_s_din,
- u_half_addr, u_half_din,
- v_half_addr, v_half_din,
- u_minus_v_half_addr, u_minus_v_half_din,
- v_minus_u_half_addr, v_minus_u_half_din
- );
-
-
- //
- // Parameters
- //
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- input wire u_gt_v;
- input wire v_eq_1;
- input wire u_is_even;
- input wire v_is_even;
-
- output wire [BUFFER_ADDR_BITS-1:0] r_addr;
- output wire [BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_addr;
-
- output wire r_wren;
- output wire s_wren;
- output wire u_wren;
- output wire v_wren;
-
- output wire [ 32-1:0] r_dout;
- output wire [ 32-1:0] s_dout;
- output wire [ 32-1:0] u_dout;
- output wire [ 32-1:0] v_dout;
-
- output wire [BUFFER_ADDR_BITS-1:0] r_dbl_addr;
- output wire [BUFFER_ADDR_BITS-1:0] s_dbl_addr;
- output wire [BUFFER_ADDR_BITS-1:0] r_plus_s_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_minus_v_half_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_minus_u_half_addr;
-
- input wire [ 32-1:0] r_dbl_din;
- input wire [ 32-1:0] s_dbl_din;
- input wire [ 32-1:0] r_plus_s_din;
- input wire [ 32-1:0] u_half_din;
- input wire [ 32-1:0] v_half_din;
- input wire [ 32-1:0] u_minus_v_half_din;
- input wire [ 32-1:0] v_minus_u_half_din;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [BUFFER_ADDR_BITS-1:0] addr_in;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ?
- addr_in + 1'b1 : addr_in_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ?
- addr_out + 1'b1 : addr_out_zero;
-
- assign r_addr = addr_out;
- assign s_addr = addr_out;
- assign u_addr = addr_out;
- assign v_addr = addr_out;
-
- assign r_dbl_addr = addr_in;
- assign s_dbl_addr = addr_in;
- assign r_plus_s_addr = addr_in;
- assign u_half_addr = addr_in;
- assign v_half_addr = addr_in;
- assign u_minus_v_half_addr = addr_in;
- assign v_minus_u_half_addr = addr_in;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment Logic
- //
- wire inc_addr_in;
- wire inc_addr_out;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1;
-
- assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
- assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop);
-
- always @(posedge clk) begin
- //
- if (inc_addr_in) addr_in <= addr_in_next;
- else addr_in <= addr_in_zero;
- //
- if (inc_addr_out) addr_out <= addr_out_next;
- else addr_out <= addr_out_zero;
- //
- end
-
- //
- // Write Enable Logic
- //
- wire wren_out;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1;
-
- assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop);
-
- reg r_wren_allow;
- reg s_wren_allow;
- reg u_wren_allow;
- reg v_wren_allow;
-
- assign r_wren = wren_out && r_wren_allow && !v_eq_1 && !rdy;
- assign s_wren = wren_out && s_wren_allow && !v_eq_1 && !rdy;
- assign u_wren = wren_out && u_wren_allow && !v_eq_1 && !rdy;
- assign v_wren = wren_out && v_wren_allow && !v_eq_1 && !rdy;
-
-
- //
- // Data Logic
- //
- reg [31: 0] r_dout_mux;
- reg [31: 0] s_dout_mux;
- reg [31: 0] u_dout_mux;
- reg [31: 0] v_dout_mux;
-
- assign r_dout = r_dout_mux;
- assign s_dout = s_dout_mux;
- assign u_dout = u_dout_mux;
- assign v_dout = v_dout_mux;
-
- always @(*) begin
- //
- // r, s, u, v
- //
- if (u_is_even) begin
- //
- u_dout_mux = u_half_din;
- v_dout_mux = {32{1'bX}};
- r_dout_mux = {32{1'bX}};
- s_dout_mux = s_dbl_din;
- //
- u_wren_allow = 1'b1;
- v_wren_allow = 1'b0;
- r_wren_allow = 1'b0;
- s_wren_allow = 1'b1;
- //
- end else begin
- //
- if (v_is_even) begin
- //
- u_dout_mux = {32{1'bX}};
- v_dout_mux = v_half_din;
- r_dout_mux = r_dbl_din;
- s_dout_mux = {32{1'bX}};
- //
- u_wren_allow = 1'b0;
- v_wren_allow = 1'b1;
- r_wren_allow = 1'b1;
- s_wren_allow = 1'b0;
- //
- end else begin
- //
- u_dout_mux = u_gt_v ? u_minus_v_half_din : {32{1'bX}};
- v_dout_mux = u_gt_v ? {32{1'bX}} : v_minus_u_half_din;
- r_dout_mux = u_gt_v ? r_plus_s_din : r_dbl_din;
- s_dout_mux = u_gt_v ? s_dbl_din : r_plus_s_din;
- //
- u_wren_allow = u_gt_v;
- v_wren_allow = !u_gt_v;
- r_wren_allow = 1'b1;
- s_wren_allow = 1'b1;
- //
- end
- //
- end
- //
- end
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v
deleted file mode 100644
index fb858a6..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_precalc.v
+++ /dev/null
@@ -1,328 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_reduce_precalc
- (
- clk, rst_n,
- ena, rdy,
-
- k,
-
- s_is_odd, k_is_nul,
-
- r_addr, r_din, r_wren, r_dout,
- s_addr, s_din,
- u_addr, u_wren, u_dout,
- v_addr, v_wren, v_dout,
- q_addr, q_din
- );
-
-
- //
- // Parameters
- //
- parameter OPERAND_NUM_WORDS = 8;
- parameter OPERAND_ADDR_BITS = 3;
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
- parameter K_NUM_BITS = 10;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = 2 * BUFFER_NUM_WORDS + 4;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- input wire [ K_NUM_BITS-1:0] k;
-
- output wire s_is_odd;
- output wire k_is_nul;
-
- output wire [ BUFFER_ADDR_BITS-1:0] r_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [ BUFFER_ADDR_BITS-1:0] v_addr;
- output wire [OPERAND_ADDR_BITS-1:0] q_addr;
-
- input wire [ 32-1:0] r_din;
- input wire [ 32-1:0] s_din;
- input wire [ 32-1:0] q_din;
-
- output wire r_wren;
- output wire u_wren;
- output wire v_wren;
-
- output wire [ 32-1:0] r_dout;
- output wire [ 32-1:0] u_dout;
- output wire [ 32-1:0] v_dout;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [ BUFFER_ADDR_BITS-1:0] addr_in_buf;
- reg [OPERAND_ADDR_BITS-1:0] addr_in_op;
- reg [ BUFFER_ADDR_BITS-1:0] addr_out1;
- reg [ BUFFER_ADDR_BITS-1:0] addr_out2;
- reg [ BUFFER_ADDR_BITS-1:0] addr_out3;
-
- wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_last = BUFFER_NUM_WORDS - 1;
- wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_next = (addr_in_buf < addr_in_buf_last) ?
- addr_in_buf + 1'b1 : addr_in_buf_zero;
- wire [ BUFFER_ADDR_BITS-1:0] addr_in_buf_prev = (addr_in_buf > addr_in_buf_zero) ?
- addr_in_buf - 1'b1 : addr_in_buf_zero;
-
- wire [OPERAND_ADDR_BITS-1:0] addr_in_op_last = OPERAND_NUM_WORDS - 1;
- wire [OPERAND_ADDR_BITS-1:0] addr_in_op_zero = {OPERAND_ADDR_BITS{1'b0}};
- wire [OPERAND_ADDR_BITS-1:0] addr_in_op_next = (addr_in_op < addr_in_op_last) ?
- addr_in_op + 1'b1 : addr_in_op_zero;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_next = (addr_out1 < addr_out1_last) ?
- addr_out1 + 1'b1 : addr_out1_zero;
- wire [BUFFER_ADDR_BITS-1:0] addr_out1_prev = (addr_out1 > addr_out1_zero) ?
- addr_out1 - 1'b1 : addr_out1_zero;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out2_prev = (addr_out2 > addr_out2_zero) ?
- addr_out2 - 1'b1 : addr_out2_last;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_last = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out3_prev = (addr_out3 > addr_out3_zero) ?
- addr_out3 - 1'b1 : addr_out3_last;
-
-
- assign s_addr = addr_in_buf;
- assign q_addr = addr_in_op;
- assign r_addr = addr_out1;
- assign u_addr = addr_out2;
- assign v_addr = addr_out3;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment/Decrement Logic
- //
- wire inc_addr_buf_in;
- wire dec_addr_buf_in;
- wire inc_addr_op_in;
- wire inc_addr_out1;
- wire dec_addr_out1;
- wire dec_addr_out2;
- wire dec_addr_out3;
-
- wire [PROC_CNT_BITS-1:0] cnt_calc_flags = 0 * BUFFER_NUM_WORDS + 2;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_start = 0 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_buf_in_stop = 1 * BUFFER_NUM_WORDS - 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_start = 1 * BUFFER_NUM_WORDS + 0;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_buf_in_stop = 2 * BUFFER_NUM_WORDS - 2;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_start = 0 * OPERAND_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_op_in_stop = 1 * OPERAND_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_start = 0 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out1_stop = 1 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_start = 1 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out1_stop = 2 * BUFFER_NUM_WORDS + 1;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_start = 1 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out2_stop = 2 * BUFFER_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_start = 1 * BUFFER_NUM_WORDS + 4;
- wire [PROC_CNT_BITS-1:0] cnt_dec_addr_out3_stop = 2 * BUFFER_NUM_WORDS + 3;
-
- assign inc_addr_buf_in = (proc_cnt >= cnt_inc_addr_buf_in_start) && (proc_cnt <= cnt_inc_addr_buf_in_stop);
- assign dec_addr_buf_in = (proc_cnt >= cnt_dec_addr_buf_in_start) && (proc_cnt <= cnt_dec_addr_buf_in_stop);
- assign inc_addr_op_in = (proc_cnt >= cnt_inc_addr_op_in_start) && (proc_cnt <= cnt_inc_addr_op_in_stop);
- assign inc_addr_out1 = (proc_cnt >= cnt_inc_addr_out1_start) && (proc_cnt <= cnt_inc_addr_out1_stop);
- assign dec_addr_out1 = (proc_cnt >= cnt_dec_addr_out1_start) && (proc_cnt <= cnt_dec_addr_out1_stop);
- assign dec_addr_out2 = (proc_cnt >= cnt_dec_addr_out2_start) && (proc_cnt <= cnt_dec_addr_out2_stop);
- assign dec_addr_out3 = (proc_cnt >= cnt_dec_addr_out3_start) && (proc_cnt <= cnt_dec_addr_out3_stop);
-
- always @(posedge clk) begin
- //
- if (rdy) begin
- //
- addr_in_buf <= addr_in_buf_zero;
- addr_in_op <= addr_in_op_zero;
- addr_out1 <= addr_out1_zero;
- addr_out2 <= addr_out2_last;
- addr_out3 <= addr_out3_last;
- //
- end else begin
- //
- if (inc_addr_buf_in) addr_in_buf <= addr_in_buf_next;
- else if (dec_addr_buf_in) addr_in_buf <= addr_in_buf_prev;
- //
- if (inc_addr_op_in) addr_in_op <= addr_in_op_next;
- else addr_in_op <= addr_in_op_zero;
- //
- if (inc_addr_out1) addr_out1 <= addr_out1_next;
- else if (dec_addr_out1) addr_out1 <= addr_out1_prev;
- //
- if (dec_addr_out2) addr_out2 <= addr_out2_prev;
- else addr_out2 <= addr_out2_last;
- //
- if (dec_addr_out3) addr_out3 <= addr_out3_prev;
- else addr_out3 <= addr_out3_last;
- //
- end
- //
- end
-
-
- //
- // Write Enable Logic
- //
- wire wren_out1;
- wire wren_out2;
- wire wren_out3;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out1_start = 0 * BUFFER_NUM_WORDS + 3;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out1_stop = 1 * BUFFER_NUM_WORDS + 2;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out2_start = 1 * BUFFER_NUM_WORDS + 1;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out2_stop = 2 * BUFFER_NUM_WORDS + 0;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out3_start = 1 * BUFFER_NUM_WORDS + 4;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out3_stop = 2 * BUFFER_NUM_WORDS + 3;
-
- assign wren_out1 = (proc_cnt >= cnt_wren_out1_start) && (proc_cnt <= cnt_wren_out1_stop);
- assign wren_out2 = (proc_cnt >= cnt_wren_out2_start) && (proc_cnt <= cnt_wren_out2_stop);
- assign wren_out3 = (proc_cnt >= cnt_wren_out3_start) && (proc_cnt <= cnt_wren_out3_stop);
-
- assign r_wren = wren_out1;
- assign u_wren = wren_out2;
- assign v_wren = wren_out3;
-
- //
- // Adder (s + q)
- //
- wire [31: 0] q_din_masked;
- wire [31: 0] add32_s_plus_q_sum_out;
- wire add32_s_plus_q_carry_in;
- wire add32_s_plus_q_carry_out;
-
- adder32_wrapper add32_r_plus_s
- (
- .clk (clk),
- .a (s_din),
- .b (q_din_masked),
- .s (add32_s_plus_q_sum_out),
- .c_in (add32_s_plus_q_carry_in),
- .c_out (add32_s_plus_q_carry_out)
- );
-
-
- //
- // Carry Masking Logic
- //
- wire mask_carry;
-
- assign mask_carry = ((proc_cnt >= cnt_wren_out1_start) && (proc_cnt < cnt_wren_out1_stop)) ? 1'b0 : 1'b1;
-
-
- //
- // Addend Masking Logic
- //
- reg q_din_mask;
-
- always @(posedge clk)
- q_din_mask <= (addr_in_buf == addr_in_buf_last) ? 1'b1 : 1'b0;
-
- assign q_din_masked = q_din_mask ? {32{1'b0}} : q_din;
-
- assign add32_s_plus_q_carry_in = add32_s_plus_q_carry_out & ~mask_carry;
-
-
- //
- // Carry Bits
- //
- reg s_half_carry;
- reg s_plus_q_half_carry;
-
- always @(posedge clk) begin
- //
- s_half_carry <= ((proc_cnt >= cnt_wren_out2_start) && (proc_cnt < cnt_wren_out2_stop)) ?
- s_din[0] : 1'b0;
- //
- s_plus_q_half_carry <= ((proc_cnt >= cnt_wren_out3_start) && (proc_cnt < cnt_wren_out3_stop)) ?
- r_din[0] : 1'b0;
- //
- end
-
- //
- // Data Mapper
- //
- assign r_dout = add32_s_plus_q_sum_out;
- assign u_dout = {s_half_carry, s_din[31:1]};
- assign v_dout = {s_plus_q_half_carry, r_din[31:1]};
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
- //
- // Output Flags
- //
- reg s_is_odd_reg;
- reg k_is_nul_reg;
-
- assign s_is_odd = s_is_odd_reg;
- assign k_is_nul = k_is_nul_reg;
-
- always @(posedge clk)
- //
- if (proc_cnt == cnt_calc_flags) begin
- s_is_odd_reg <= s_din[0];
- k_is_nul_reg <= (k == {K_NUM_BITS{1'b0}}) ? 1'b1 : 1'b0;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v b/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v
deleted file mode 100644
index ea5b854..0000000
--- a/rtl/modular/modular_invertor/helper/modinv_helper_reduce_update.v
+++ /dev/null
@@ -1,153 +0,0 @@
-`timescale 1ns / 1ps
-
-module modinv_helper_reduce_update
- (
- clk, rst_n,
- ena, rdy,
-
- s_is_odd, k_is_nul,
-
- s_addr, s_wren, s_dout,
- u_addr, u_din,
- v_addr, v_din
- );
-
-
- //
- // Parameters
- //
- parameter BUFFER_NUM_WORDS = 9;
- parameter BUFFER_ADDR_BITS = 4;
-
-
- //
- // clog2
- //
-`include "..\modinv_clog2.v"
-
-
- //
- // Constants
- //
- localparam PROC_NUM_CYCLES = BUFFER_NUM_WORDS + 3;
- localparam PROC_CNT_BITS = clog2(PROC_NUM_CYCLES);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
- input wire ena;
- output wire rdy;
-
- input wire s_is_odd;
- input wire k_is_nul;
-
- output wire [BUFFER_ADDR_BITS-1:0] s_addr;
- output wire [BUFFER_ADDR_BITS-1:0] u_addr;
- output wire [BUFFER_ADDR_BITS-1:0] v_addr;
-
- output wire s_wren;
-
- output wire [ 32-1:0] s_dout;
-
- input wire [ 32-1:0] u_din;
- input wire [ 32-1:0] v_din;
-
-
- //
- // Counter
- //
- reg [PROC_CNT_BITS-1:0] proc_cnt;
-
- wire [PROC_CNT_BITS-1:0] proc_cnt_max = PROC_NUM_CYCLES - 1;
- wire [PROC_CNT_BITS-1:0] proc_cnt_zero = {PROC_CNT_BITS{1'b0}};
- wire [PROC_CNT_BITS-1:0] proc_cnt_next = (proc_cnt < proc_cnt_max) ?
- proc_cnt + 1'b1 : proc_cnt_zero;
-
- //
- // Addresses
- //
- reg [BUFFER_ADDR_BITS-1:0] addr_in;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_in_max = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_in_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_in_next = (addr_in < addr_in_max) ?
- addr_in + 1'b1 : addr_in_zero;
-
- reg [BUFFER_ADDR_BITS-1:0] addr_out;
-
- wire [BUFFER_ADDR_BITS-1:0] addr_out_max = BUFFER_NUM_WORDS - 1;
- wire [BUFFER_ADDR_BITS-1:0] addr_out_zero = {BUFFER_ADDR_BITS{1'b0}};
- wire [BUFFER_ADDR_BITS-1:0] addr_out_next = (addr_out < addr_out_max) ?
- addr_out + 1'b1 : addr_out_zero;
-
- assign s_addr = addr_out;
- assign u_addr = addr_in;
- assign v_addr = addr_in;
-
-
- //
- // Ready Flag
- //
- assign rdy = (proc_cnt == proc_cnt_zero);
-
-
- //
- // Address Increment Logic
- //
- wire inc_addr_in;
- wire inc_addr_out;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_start = 1;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_in_stop = BUFFER_NUM_WORDS;
-
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_inc_addr_out_stop = BUFFER_NUM_WORDS + 1;
-
- assign inc_addr_in = (proc_cnt >= cnt_inc_addr_in_start) && (proc_cnt <= cnt_inc_addr_in_stop);
- assign inc_addr_out = (proc_cnt >= cnt_inc_addr_out_start) && (proc_cnt <= cnt_inc_addr_out_stop);
-
- always @(posedge clk) begin
- //
- if (inc_addr_in) addr_in <= addr_in_next;
- else addr_in <= addr_in_zero;
- //
- if (inc_addr_out) addr_out <= addr_out_next;
- else addr_out <= addr_out_zero;
- //
- end
-
- //
- // Write Enable Logic
- //
- wire wren_out;
-
- wire [PROC_CNT_BITS-1:0] cnt_wren_out_start = 2;
- wire [PROC_CNT_BITS-1:0] cnt_wren_out_stop = BUFFER_NUM_WORDS + 1;
-
- assign wren_out = (proc_cnt >= cnt_wren_out_start) && (proc_cnt <= cnt_wren_out_stop);
-
- assign s_wren = wren_out && !k_is_nul; //s_wren_allow && !v_eq_1 && !rdy;
-
-
- //
- // Data Logic
- //
- assign s_dout = s_is_odd ? v_din : u_din;
-
-
- //
- // Primary Counter Logic
- //
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) proc_cnt <= proc_cnt_zero;
- else begin
- if (!rdy) proc_cnt <= proc_cnt_next;
- else if (ena) proc_cnt <= proc_cnt_next;
- end
-
-
-endmodule
diff --git a/rtl/modular/modular_invertor/modinv_clog2.v b/rtl/modular/modular_invertor/modinv_clog2.v
deleted file mode 100644
index 2f7b64d..0000000
--- a/rtl/modular/modular_invertor/modinv_clog2.v
+++ /dev/null
@@ -1,10 +0,0 @@
-function integer clog2;
- input integer value;
- integer result;
- begin
- value = value - 1;
- for (result = 0; value > 0; result = result + 1)
- value = value >> 1;
- clog2 = result;
- end
-endfunction
diff --git a/rtl/modular/modular_invertor/modular_invertor.v b/rtl/modular/modular_invertor/modular_invertor.v
deleted file mode 100644
index e9f2460..0000000
--- a/rtl/modular/modular_invertor/modular_invertor.v
+++ /dev/null
@@ -1,981 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// modular_invertor.v
-// -----------------------------------------------------------------------------
-// Modular invertor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_invertor
- (
- clk, rst_n,
- ena, rdy,
- a_addr, q_addr, a1_addr, a1_wren,
- a_din, q_din, a1_dout
- );
-
-
- //
- // Parameters
- //
- parameter MAX_OPERAND_WIDTH = 256;
-
-
- //
- // clog2
- //
-`include "modinv_clog2.v"
-
-
- //
- // More Parameters
- //
- localparam OPERAND_NUM_WORDS = MAX_OPERAND_WIDTH / 32;
- localparam OPERAND_ADDR_BITS = clog2(OPERAND_NUM_WORDS);
-
- localparam BUFFER_NUM_WORDS = OPERAND_NUM_WORDS + 1;
- localparam BUFFER_ADDR_BITS = clog2(BUFFER_NUM_WORDS);
-
- localparam LOOP_NUM_ROUNDS = 2 * MAX_OPERAND_WIDTH;
- localparam ROUND_COUNTER_BITS = clog2(LOOP_NUM_ROUNDS);
-
- localparam K_NUM_BITS = clog2(LOOP_NUM_ROUNDS + 1);
-
-
- //
- // Ports
- //
- input wire clk;
- input wire rst_n;
-
- input wire ena;
- output wire rdy;
-
- output wire [OPERAND_ADDR_BITS-1:0] a_addr;
- output reg [OPERAND_ADDR_BITS-1:0] q_addr;
- output wire [OPERAND_ADDR_BITS-1:0] a1_addr;
- output wire a1_wren;
-
- input wire [32-1:0] a_din;
- input wire [32-1:0] q_din;
- output wire [32-1:0] a1_dout;
-
-
- //
- // "Redundant" Power of 2 (K)
- //
- reg [K_NUM_BITS-1:0] k;
-
-
- //
- // Buffers
- //
- reg [BUFFER_ADDR_BITS-1:0] buf_r_wr_addr;
- reg [BUFFER_ADDR_BITS-1:0] buf_r_rd_addr;
- reg buf_r_wr_en;
- reg [ 32-1:0] buf_r_wr_din;
- wire [ 32-1:0] buf_r_wr_dout;
- wire [ 32-1:0] buf_r_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_r
- ( .clk(clk),
- .a_addr(buf_r_wr_addr), .a_out(buf_r_wr_dout), .a_wr(buf_r_wr_en), .a_in(buf_r_wr_din),
- .b_addr(buf_r_rd_addr), .b_out(buf_r_rd_dout)
- );
-
- reg [BUFFER_ADDR_BITS-1:0] buf_s_wr_addr;
- reg [BUFFER_ADDR_BITS-1:0] buf_s_rd_addr;
- reg buf_s_wr_en;
- reg [ 32-1:0] buf_s_wr_din;
- wire [ 32-1:0] buf_s_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_s
- ( .clk(clk),
- .a_addr(buf_s_wr_addr), .a_out(), .a_wr(buf_s_wr_en), .a_in(buf_s_wr_din),
- .b_addr(buf_s_rd_addr), .b_out(buf_s_rd_dout)
- );
-
- reg [BUFFER_ADDR_BITS-1:0] buf_u_wr_addr;
- reg [BUFFER_ADDR_BITS-1:0] buf_u_rd_addr;
- reg buf_u_wr_en;
- reg [ 32-1:0] buf_u_wr_din;
- wire [ 32-1:0] buf_u_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_u
- ( .clk(clk),
- .a_addr(buf_u_wr_addr), .a_out(), .a_wr(buf_u_wr_en), .a_in(buf_u_wr_din),
- .b_addr(buf_u_rd_addr), .b_out(buf_u_rd_dout)
- );
-
- reg [BUFFER_ADDR_BITS-1:0] buf_v_wr_addr;
- reg [BUFFER_ADDR_BITS-1:0] buf_v_rd_addr;
- reg buf_v_wr_en;
- reg [ 32-1:0] buf_v_wr_din;
- wire [ 32-1:0] buf_v_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_v
- ( .clk(clk),
- .a_addr(buf_v_wr_addr), .a_out(), .a_wr(buf_v_wr_en), .a_in(buf_v_wr_din),
- .b_addr(buf_v_rd_addr), .b_out(buf_v_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_r_dbl_rd_addr;
- wire buf_r_dbl_wr_en;
- wire [ 32-1:0] buf_r_dbl_wr_din;
- wire [ 32-1:0] buf_r_dbl_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_r_dbl
- ( .clk(clk),
- .a_addr(buf_r_dbl_wr_addr), .a_out(), .a_wr(buf_r_dbl_wr_en), .a_in(buf_r_dbl_wr_din),
- .b_addr(buf_r_dbl_rd_addr), .b_out(buf_r_dbl_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_s_dbl_rd_addr;
- wire buf_s_dbl_wr_en;
- wire [ 32-1:0] buf_s_dbl_wr_din;
- wire [ 32-1:0] buf_s_dbl_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_s_dbl
- ( .clk(clk),
- .a_addr(buf_s_dbl_wr_addr), .a_out(), .a_wr(buf_s_dbl_wr_en), .a_in(buf_s_dbl_wr_din),
- .b_addr(buf_s_dbl_rd_addr), .b_out(buf_s_dbl_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_r_plus_s_rd_addr;
- wire buf_r_plus_s_wr_en;
- wire [ 32-1:0] buf_r_plus_s_wr_din;
- wire [ 32-1:0] buf_r_plus_s_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_r_plus_s
- ( .clk(clk),
- .a_addr(buf_r_plus_s_wr_addr), .a_out(), .a_wr(buf_r_plus_s_wr_en), .a_in(buf_r_plus_s_wr_din),
- .b_addr(buf_r_plus_s_rd_addr), .b_out(buf_r_plus_s_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_rd_addr;
- wire buf_u_minus_v_wr_en;
- wire [ 32-1:0] buf_u_minus_v_wr_din;
- wire [ 32-1:0] buf_u_minus_v_wr_dout;
-
- assign buf_u_minus_v_rd_addr = ~buf_u_minus_v_wr_addr;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_u_minus_v
- ( .clk(clk),
- .a_addr(buf_u_minus_v_wr_addr), .a_out(buf_u_minus_v_wr_dout), .a_wr(buf_u_minus_v_wr_en), .a_in(buf_u_minus_v_wr_din),
- .b_addr(buf_u_minus_v_rd_addr), .b_out()
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_rd_addr;
- wire buf_v_minus_u_wr_en;
- wire [ 32-1:0] buf_v_minus_u_wr_din;
- wire [ 32-1:0] buf_v_minus_u_wr_dout;
-
- assign buf_v_minus_u_rd_addr = ~buf_v_minus_u_wr_addr;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_v_minus_u
- ( .clk(clk),
- .a_addr(buf_v_minus_u_wr_addr), .a_out(buf_v_minus_u_wr_dout), .a_wr(buf_v_minus_u_wr_en), .a_in(buf_v_minus_u_wr_din),
- .b_addr(buf_v_minus_u_rd_addr), .b_out()
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_u_half_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_u_half_rd_addr;
- wire buf_u_half_wr_en;
- wire [ 32-1:0] buf_u_half_wr_din;
- wire [ 32-1:0] buf_u_half_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_u_half
- ( .clk(clk),
- .a_addr(buf_u_half_wr_addr), .a_out(), .a_wr(buf_u_half_wr_en), .a_in(buf_u_half_wr_din),
- .b_addr(buf_u_half_rd_addr), .b_out(buf_u_half_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_v_half_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_v_half_rd_addr;
- wire buf_v_half_wr_en;
- wire [ 32-1:0] buf_v_half_wr_din;
- wire [ 32-1:0] buf_v_half_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_v_half
- ( .clk(clk),
- .a_addr(buf_v_half_wr_addr), .a_out(), .a_wr(buf_v_half_wr_en), .a_in(buf_v_half_wr_din),
- .b_addr(buf_v_half_rd_addr), .b_out(buf_v_half_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_u_minus_v_half_rd_addr;
- wire buf_u_minus_v_half_wr_en;
- wire [ 32-1:0] buf_u_minus_v_half_wr_din;
- wire [ 32-1:0] buf_u_minus_v_half_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_u_minus_v_half
- ( .clk(clk),
- .a_addr(buf_u_minus_v_half_wr_addr), .a_out(), .a_wr(buf_u_minus_v_half_wr_en), .a_in(buf_u_minus_v_half_wr_din),
- .b_addr(buf_u_minus_v_half_rd_addr), .b_out(buf_u_minus_v_half_rd_dout)
- );
-
- wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_wr_addr;
- wire [BUFFER_ADDR_BITS-1:0] buf_v_minus_u_half_rd_addr;
- wire buf_v_minus_u_half_wr_en;
- wire [ 32-1:0] buf_v_minus_u_half_wr_din;
- wire [ 32-1:0] buf_v_minus_u_half_rd_dout;
-
- bram_1rw_1ro_readfirst #
- ( .MEM_WIDTH(32), .MEM_ADDR_BITS(BUFFER_ADDR_BITS)
- )
- buf_v_minus_u_half
- ( .clk(clk),
- .a_addr(buf_v_minus_u_half_wr_addr), .a_out(), .a_wr(buf_v_minus_u_half_wr_en), .a_in(buf_v_minus_u_half_wr_din),
- .b_addr(buf_v_minus_u_half_rd_addr), .b_out(buf_v_minus_u_half_rd_dout)
- );
-
-
- //
- // Helper Modules
- //
- wire helper_init_ena;
- wire helper_invert_precalc_ena;
- wire helper_invert_compare_ena;
- wire helper_invert_update_ena;
- wire helper_reduce_precalc_ena;
- wire helper_reduce_update_ena;
- wire helper_copy_ena;
-
- wire helper_init_rdy;
- wire helper_invert_precalc_rdy;
- wire helper_invert_compare_rdy;
- wire helper_invert_update_rdy;
- wire helper_reduce_precalc_rdy;
- wire helper_reduce_update_rdy;
- wire helper_copy_rdy;
-
- wire helper_init_done = helper_init_rdy && !helper_init_ena;
- wire helper_invert_precalc_done = helper_invert_precalc_rdy && !helper_invert_precalc_ena;
- wire helper_invert_compare_done = helper_invert_compare_rdy && !helper_invert_compare_ena;
- wire helper_invert_update_done = helper_invert_update_rdy && !helper_invert_update_ena;
- wire helper_reduce_precalc_done = helper_reduce_precalc_rdy && !helper_reduce_precalc_ena;
- wire helper_reduce_update_done = helper_reduce_update_rdy && !helper_reduce_update_ena;
- wire helper_copy_done = helper_copy_rdy && !helper_copy_ena;
-
-
- //
- // Helper Module - Initialization
- //
- wire [ BUFFER_ADDR_BITS-1:0] helper_init_r_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_init_s_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_init_u_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_init_v_addr;
- wire [OPERAND_ADDR_BITS-1:0] helper_init_q_addr;
-
- wire helper_init_r_wren;
- wire helper_init_s_wren;
- wire helper_init_u_wren;
- wire helper_init_v_wren;
-
- wire [ 32-1:0] helper_init_r_data;
- wire [ 32-1:0] helper_init_s_data;
- wire [ 32-1:0] helper_init_u_data;
- wire [ 32-1:0] helper_init_v_data;
-
- modinv_helper_init #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
-
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_init
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_init_ena),
- .rdy (helper_init_rdy),
-
- .a_addr (a_addr),
- .q_addr (helper_init_q_addr),
-
- .r_addr (helper_init_r_addr),
- .s_addr (helper_init_s_addr),
- .u_addr (helper_init_u_addr),
- .v_addr (helper_init_v_addr),
-
- .q_din (q_din),
- .a_din (a_din),
-
- .r_dout (helper_init_r_data),
- .s_dout (helper_init_s_data),
- .u_dout (helper_init_u_data),
- .v_dout (helper_init_v_data),
-
- .r_wren (helper_init_r_wren),
- .s_wren (helper_init_s_wren),
- .u_wren (helper_init_u_wren),
- .v_wren (helper_init_v_wren)
- );
-
-
- //
- // Helper Module - Inversion Pre-Calculation
- //
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_r_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_s_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_u_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_precalc_v_addr;
-
- modinv_helper_invert_precalc #
- (
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_invert_precalc
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_invert_precalc_ena),
- .rdy (helper_invert_precalc_rdy),
-
- .r_addr (helper_invert_precalc_r_addr),
- .s_addr (helper_invert_precalc_s_addr),
- .u_addr (helper_invert_precalc_u_addr),
- .v_addr (helper_invert_precalc_v_addr),
-
- .r_din (buf_r_rd_dout),
- .s_din (buf_s_rd_dout),
- .u_din (buf_u_rd_dout),
- .v_din (buf_v_rd_dout),
-
- .r_dbl_addr (buf_r_dbl_wr_addr),
- .s_dbl_addr (buf_s_dbl_wr_addr),
- .r_plus_s_addr (buf_r_plus_s_wr_addr),
-
- .u_half_addr (buf_u_half_wr_addr),
- .v_half_addr (buf_v_half_wr_addr),
- .u_minus_v_addr (buf_u_minus_v_wr_addr),
- .v_minus_u_addr (buf_v_minus_u_wr_addr),
- .u_minus_v_half_addr (buf_u_minus_v_half_wr_addr),
- .v_minus_u_half_addr (buf_v_minus_u_half_wr_addr),
-
- .r_dbl_dout (buf_r_dbl_wr_din),
- .s_dbl_dout (buf_s_dbl_wr_din),
- .r_plus_s_dout (buf_r_plus_s_wr_din),
-
- .u_half_dout (buf_u_half_wr_din),
- .v_half_dout (buf_v_half_wr_din),
- .u_minus_v_dout (buf_u_minus_v_wr_din),
- .v_minus_u_dout (buf_v_minus_u_wr_din),
- .u_minus_v_half_dout (buf_u_minus_v_half_wr_din),
- .v_minus_u_half_dout (buf_v_minus_u_half_wr_din),
-
- .r_dbl_wren (buf_r_dbl_wr_en),
- .s_dbl_wren (buf_s_dbl_wr_en),
- .r_plus_s_wren (buf_r_plus_s_wr_en),
-
- .u_half_wren (buf_u_half_wr_en),
- .v_half_wren (buf_v_half_wr_en),
- .u_minus_v_wren (buf_u_minus_v_wr_en),
- .v_minus_u_wren (buf_v_minus_u_wr_en),
- .u_minus_v_half_wren (buf_u_minus_v_half_wr_en),
- .v_minus_u_half_wren (buf_v_minus_u_half_wr_en),
-
- .u_minus_v_din (buf_u_minus_v_wr_dout),
- .v_minus_u_din (buf_v_minus_u_wr_dout)
- );
-
-
- //
- // Helper Module - Inversion Comparison
- //
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_u_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_compare_v_addr;
-
- wire flag_invert_u_gt_v;
- wire flag_invert_v_eq_1;
- wire flag_invert_u_is_even;
- wire flag_invert_v_is_even;
-
- modinv_helper_invert_compare #
- (
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_invert_compare
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_invert_compare_ena),
- .rdy (helper_invert_compare_rdy),
-
- .u_addr (helper_invert_compare_u_addr),
- .v_addr (helper_invert_compare_v_addr),
-
- .u_din (buf_u_rd_dout),
- .v_din (buf_v_rd_dout),
-
- .u_gt_v (flag_invert_u_gt_v),
- .v_eq_1 (flag_invert_v_eq_1),
- .u_is_even (flag_invert_u_is_even),
- .v_is_even (flag_invert_v_is_even)
- );
-
-
- //
- // Helper Module - Inversion Update
- //
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_r_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_s_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_u_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_invert_update_v_addr;
-
- wire helper_invert_update_r_wren;
- wire helper_invert_update_s_wren;
- wire helper_invert_update_u_wren;
- wire helper_invert_update_v_wren;
-
- wire [ 32-1:0] helper_invert_update_r_data;
- wire [ 32-1:0] helper_invert_update_s_data;
- wire [ 32-1:0] helper_invert_update_u_data;
- wire [ 32-1:0] helper_invert_update_v_data;
-
- modinv_helper_invert_update #
- (
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_invert_update
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_invert_update_ena),
- .rdy (helper_invert_update_rdy),
-
- .u_gt_v (flag_invert_u_gt_v),
- .v_eq_1 (flag_invert_v_eq_1),
- .u_is_even (flag_invert_u_is_even),
- .v_is_even (flag_invert_v_is_even),
-
- .r_addr (helper_invert_update_r_addr),
- .s_addr (helper_invert_update_s_addr),
- .u_addr (helper_invert_update_u_addr),
- .v_addr (helper_invert_update_v_addr),
-
- .r_wren (helper_invert_update_r_wren),
- .s_wren (helper_invert_update_s_wren),
- .u_wren (helper_invert_update_u_wren),
- .v_wren (helper_invert_update_v_wren),
-
- .r_dout (helper_invert_update_r_data),
- .s_dout (helper_invert_update_s_data),
- .u_dout (helper_invert_update_u_data),
- .v_dout (helper_invert_update_v_data),
-
- .r_dbl_addr (buf_r_dbl_rd_addr),
- .s_dbl_addr (buf_s_dbl_rd_addr),
- .r_plus_s_addr (buf_r_plus_s_rd_addr),
- .u_half_addr (buf_u_half_rd_addr),
- .v_half_addr (buf_v_half_rd_addr),
- .u_minus_v_half_addr (buf_u_minus_v_half_rd_addr),
- .v_minus_u_half_addr (buf_v_minus_u_half_rd_addr),
-
- .r_dbl_din (buf_r_dbl_rd_dout),
- .s_dbl_din (buf_s_dbl_rd_dout),
- .r_plus_s_din (buf_r_plus_s_rd_dout),
- .u_half_din (buf_u_half_rd_dout),
- .v_half_din (buf_v_half_rd_dout),
- .u_minus_v_half_din (buf_u_minus_v_half_rd_dout),
- .v_minus_u_half_din (buf_v_minus_u_half_rd_dout)
- );
-
-
- //
- // Helper Module - Reduction Pre-Calculation
- //
- wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_r_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_s_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_u_addr;
- wire [ BUFFER_ADDR_BITS-1:0] helper_reduce_precalc_v_addr;
- wire [OPERAND_ADDR_BITS-1:0] helper_reduce_precalc_q_addr;
-
- wire helper_reduce_precalc_r_wren;
- wire helper_reduce_precalc_u_wren;
- wire helper_reduce_precalc_v_wren;
-
- wire [ 32-1:0] helper_reduce_precalc_r_data;
- wire [ 32-1:0] helper_reduce_precalc_u_data;
- wire [ 32-1:0] helper_reduce_precalc_v_data;
-
- wire flag_reduce_s_is_odd;
- wire flag_invert_k_is_nul;
-
- modinv_helper_reduce_precalc #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS),
- .K_NUM_BITS (K_NUM_BITS)
- )
- helper_reduce_precalc
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_reduce_precalc_ena),
- .rdy (helper_reduce_precalc_rdy),
-
- .r_addr (helper_reduce_precalc_r_addr),
- .s_addr (helper_reduce_precalc_s_addr),
- .u_addr (helper_reduce_precalc_u_addr),
- .v_addr (helper_reduce_precalc_v_addr),
- .q_addr (helper_reduce_precalc_q_addr),
-
- .k (k),
-
- .s_is_odd (flag_reduce_s_is_odd),
- .k_is_nul (flag_reduce_k_is_nul),
-
- .r_din (buf_r_wr_dout),
- .s_din (buf_s_rd_dout),
- .q_din (q_din),
-
- .r_wren (helper_reduce_precalc_r_wren),
- .u_wren (helper_reduce_precalc_u_wren),
- .v_wren (helper_reduce_precalc_v_wren),
-
- .r_dout (helper_reduce_precalc_r_data),
- .u_dout (helper_reduce_precalc_u_data),
- .v_dout (helper_reduce_precalc_v_data)
- );
-
- //
- // Helper Module - Reduction Update
- //
- wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_s_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_u_addr;
- wire [BUFFER_ADDR_BITS-1:0] helper_reduce_update_v_addr;
-
- wire helper_reduce_update_s_wren;
-
- wire [ 32-1:0] helper_reduce_update_s_data;
-
- modinv_helper_reduce_update #
- (
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_reduce_update
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_reduce_update_ena),
- .rdy (helper_reduce_update_rdy),
-
- .s_is_odd (flag_reduce_s_is_odd),
- .k_is_nul (flag_reduce_k_is_nul),
-
- .s_addr (helper_reduce_update_s_addr),
- .u_addr (helper_reduce_update_u_addr),
- .v_addr (helper_reduce_update_v_addr),
-
- .s_wren (helper_reduce_update_s_wren),
-
- .s_dout (helper_reduce_update_s_data),
-
- .u_din (buf_u_rd_dout),
- .v_din (buf_v_rd_dout)
- );
-
-
- //
- // Helper Module - Copying
- //
- wire [BUFFER_ADDR_BITS-1:0] helper_copy_s_addr;
-
- modinv_helper_copy #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .OPERAND_ADDR_BITS (OPERAND_ADDR_BITS),
-
- .BUFFER_NUM_WORDS (BUFFER_NUM_WORDS),
- .BUFFER_ADDR_BITS (BUFFER_ADDR_BITS)
- )
- helper_copy
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (helper_copy_ena),
- .rdy (helper_copy_rdy),
-
- .s_addr (helper_copy_s_addr),
- .a1_addr (a1_addr),
-
- .s_din (buf_s_rd_dout),
-
- .a1_dout (a1_dout),
-
- .a1_wren (a1_wren)
- );
-
-
- //
- // Round Counter
- //
- reg [ROUND_COUNTER_BITS-1:0] round_counter;
- wire [ROUND_COUNTER_BITS-1:0] round_counter_max = LOOP_NUM_ROUNDS - 1;
- wire [ROUND_COUNTER_BITS-1:0] round_counter_zero = {ROUND_COUNTER_BITS{1'b0}};
- wire [ROUND_COUNTER_BITS-1:0] round_counter_next =
- (round_counter < round_counter_max) ? round_counter + 1'b1 : round_counter_zero;
-
-
- //
- // FSM
- //
- localparam FSM_STATE_IDLE = 4'd0;
-
- localparam FSM_STATE_INIT = 4'd1;
-
- localparam FSM_STATE_INVERT_PRECALC = 4'd11;
- localparam FSM_STATE_INVERT_COMPARE = 4'd12;
- localparam FSM_STATE_INVERT_UPDATE = 4'd13;
-
- localparam FSM_STATE_REDUCE_PRECALC = 4'd14;
- localparam FSM_STATE_REDUCE_UPDATE = 4'd15;
-
- localparam FSM_STATE_COPY = 4'd2;
-
- localparam FSM_STATE_DONE = 4'd3;
-
- reg [3:0] fsm_state = FSM_STATE_IDLE;
- reg [3:0] fsm_state_dly = FSM_STATE_IDLE;
-
- wire fsm_state_new = (fsm_state != fsm_state_dly);
-
- wire [3:0] fsm_state_invert_next = (round_counter < round_counter_max) ?
- FSM_STATE_INVERT_PRECALC : FSM_STATE_REDUCE_PRECALC;
-
- wire [3:0] fsm_state_reduce_next = (round_counter < round_counter_max) ?
- FSM_STATE_REDUCE_PRECALC : FSM_STATE_COPY;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
- else case (fsm_state)
- FSM_STATE_IDLE: fsm_state <= ena ? FSM_STATE_INIT : FSM_STATE_IDLE;
- FSM_STATE_INIT: fsm_state <= helper_init_done ? FSM_STATE_INVERT_PRECALC : FSM_STATE_INIT;
- FSM_STATE_INVERT_PRECALC: fsm_state <= helper_invert_precalc_done ? FSM_STATE_INVERT_COMPARE : FSM_STATE_INVERT_PRECALC;
- FSM_STATE_INVERT_COMPARE: fsm_state <= helper_invert_compare_done ? FSM_STATE_INVERT_UPDATE : FSM_STATE_INVERT_COMPARE;
- FSM_STATE_INVERT_UPDATE: fsm_state <= helper_invert_update_done ? fsm_state_invert_next : FSM_STATE_INVERT_UPDATE;
- FSM_STATE_REDUCE_PRECALC: fsm_state <= helper_reduce_precalc_done ? FSM_STATE_REDUCE_UPDATE : FSM_STATE_REDUCE_PRECALC;
- FSM_STATE_REDUCE_UPDATE: fsm_state <= helper_reduce_update_done ? fsm_state_reduce_next : FSM_STATE_REDUCE_UPDATE;
- FSM_STATE_COPY: fsm_state <= helper_copy_done ? FSM_STATE_DONE : FSM_STATE_COPY;
- FSM_STATE_DONE: fsm_state <= FSM_STATE_IDLE;
- default: fsm_state <= FSM_STATE_IDLE;
- endcase
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) fsm_state_dly <= FSM_STATE_IDLE;
- else fsm_state_dly <= fsm_state;
-
-
- assign helper_init_ena = (fsm_state == FSM_STATE_INIT) && fsm_state_new;
- assign helper_invert_precalc_ena = (fsm_state == FSM_STATE_INVERT_PRECALC) && fsm_state_new;
- assign helper_invert_compare_ena = (fsm_state == FSM_STATE_INVERT_COMPARE) && fsm_state_new;
- assign helper_invert_update_ena = (fsm_state == FSM_STATE_INVERT_UPDATE) && fsm_state_new;
- assign helper_reduce_precalc_ena = (fsm_state == FSM_STATE_REDUCE_PRECALC) && fsm_state_new;
- assign helper_reduce_update_ena = (fsm_state == FSM_STATE_REDUCE_UPDATE) && fsm_state_new;
- assign helper_copy_ena = (fsm_state == FSM_STATE_COPY) && fsm_state_new;
-
-
- //
- // Counter Increment
- //
- always @(posedge clk) begin
- //
- if ((fsm_state == FSM_STATE_INIT) && helper_init_done)
- round_counter <= round_counter_zero;
- //
- if ((fsm_state == FSM_STATE_INVERT_UPDATE) && helper_invert_update_done)
- round_counter <= round_counter_next;
- //
- if ((fsm_state == FSM_STATE_REDUCE_UPDATE) && helper_reduce_update_done)
- round_counter <= round_counter_next;
- //
- end
-
-
- //
- // Q Address Selector
- //
- always @(*) begin
- //
- case (fsm_state)
- FSM_STATE_INIT: q_addr = helper_init_q_addr;
- FSM_STATE_REDUCE_PRECALC: q_addr = helper_reduce_precalc_q_addr;
- default: q_addr = {OPERAND_ADDR_BITS{1'bX}};
- endcase
- //
- end
-
-
- //
- // Buffer Address Selector
- //
- always @(*) begin
- //
- // Write Ports
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_r_wr_addr = helper_init_r_addr;
- FSM_STATE_INVERT_UPDATE: buf_r_wr_addr = helper_invert_update_r_addr;
- FSM_STATE_REDUCE_PRECALC: buf_r_wr_addr = helper_reduce_precalc_r_addr;
- default: buf_r_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_s_wr_addr = helper_init_s_addr;
- FSM_STATE_INVERT_UPDATE: buf_s_wr_addr = helper_invert_update_s_addr;
- FSM_STATE_REDUCE_UPDATE: buf_s_wr_addr = helper_reduce_update_s_addr;
- default: buf_s_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_u_wr_addr = helper_init_u_addr;
- FSM_STATE_INVERT_UPDATE: buf_u_wr_addr = helper_invert_update_u_addr;
- FSM_STATE_REDUCE_PRECALC: buf_u_wr_addr = helper_reduce_precalc_u_addr;
- default: buf_u_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_v_wr_addr = helper_init_v_addr;
- FSM_STATE_INVERT_UPDATE: buf_v_wr_addr = helper_invert_update_v_addr;
- FSM_STATE_REDUCE_PRECALC: buf_v_wr_addr = helper_reduce_precalc_v_addr;
- default: buf_v_wr_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- // Read Ports
- //
- case (fsm_state)
- FSM_STATE_INVERT_PRECALC: buf_r_rd_addr = helper_invert_precalc_r_addr;
- default: buf_r_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INVERT_PRECALC: buf_s_rd_addr = helper_invert_precalc_s_addr;
- FSM_STATE_REDUCE_PRECALC: buf_s_rd_addr = helper_reduce_precalc_s_addr;
- FSM_STATE_COPY: buf_s_rd_addr = helper_copy_s_addr;
- default: buf_s_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INVERT_PRECALC: buf_u_rd_addr = helper_invert_precalc_u_addr;
- FSM_STATE_INVERT_COMPARE: buf_u_rd_addr = helper_invert_compare_u_addr;
- FSM_STATE_REDUCE_UPDATE: buf_u_rd_addr = helper_reduce_update_u_addr;
- default: buf_u_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INVERT_PRECALC: buf_v_rd_addr = helper_invert_precalc_v_addr;
- FSM_STATE_INVERT_COMPARE: buf_v_rd_addr = helper_invert_compare_v_addr;
- FSM_STATE_REDUCE_UPDATE: buf_v_rd_addr = helper_reduce_update_v_addr;
- default: buf_v_rd_addr = {BUFFER_ADDR_BITS{1'bX}};
- endcase
- //
- end
-
-
- //
- // Buffer Write Enable Logic
- //
- always @(*) begin
- //
- // Write Ports
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_r_wr_en = helper_init_r_wren;
- FSM_STATE_INVERT_UPDATE: buf_r_wr_en = helper_invert_update_r_wren;
- FSM_STATE_REDUCE_PRECALC: buf_r_wr_en = helper_reduce_precalc_r_wren;
- default: buf_r_wr_en = 1'b0;
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_s_wr_en = helper_init_s_wren;
- FSM_STATE_INVERT_UPDATE: buf_s_wr_en = helper_invert_update_s_wren;
- FSM_STATE_REDUCE_UPDATE: buf_s_wr_en = helper_reduce_update_s_wren;
- default: buf_s_wr_en = 1'b0;
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_u_wr_en = helper_init_u_wren;
- FSM_STATE_INVERT_UPDATE: buf_u_wr_en = helper_invert_update_u_wren;
- FSM_STATE_REDUCE_PRECALC: buf_u_wr_en = helper_reduce_precalc_u_wren;
- default: buf_u_wr_en = 1'b0;
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_v_wr_en = helper_init_v_wren;
- FSM_STATE_INVERT_UPDATE: buf_v_wr_en = helper_invert_update_v_wren;
- FSM_STATE_REDUCE_PRECALC: buf_v_wr_en = helper_reduce_precalc_v_wren;
- default: buf_v_wr_en = 1'b0;
- endcase
- //
- end
-
-
- //
- // Buffer Write Data Selector
- //
- always @(*) begin
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_r_wr_din = helper_init_r_data;
- FSM_STATE_INVERT_UPDATE: buf_r_wr_din = helper_invert_update_r_data;
- FSM_STATE_REDUCE_PRECALC: buf_r_wr_din = helper_reduce_precalc_r_data;
- default: buf_r_wr_din = {32{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_s_wr_din = helper_init_s_data;
- FSM_STATE_INVERT_UPDATE: buf_s_wr_din = helper_invert_update_s_data;
- FSM_STATE_REDUCE_UPDATE: buf_s_wr_din = helper_reduce_update_s_data;
- default: buf_s_wr_din = {32{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_u_wr_din = helper_init_u_data;
- FSM_STATE_INVERT_UPDATE: buf_u_wr_din = helper_invert_update_u_data;
- FSM_STATE_REDUCE_PRECALC: buf_u_wr_din = helper_reduce_precalc_u_data;
- default: buf_u_wr_din = {32{1'bX}};
- endcase
- //
- case (fsm_state)
- FSM_STATE_INIT: buf_v_wr_din = helper_init_v_data;
- FSM_STATE_INVERT_UPDATE: buf_v_wr_din = helper_invert_update_v_data;
- FSM_STATE_REDUCE_PRECALC: buf_v_wr_din = helper_reduce_precalc_v_data;
- default: buf_v_wr_din = {32{1'bX}};
- endcase
- //
- end
-
-
- //
- // Ready Logic
- //
- reg rdy_reg = 1'b1;
-
- assign rdy = rdy_reg;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0) rdy_reg <= 1'b1;
- else begin
-
- /* clear */
- if (rdy && ena) rdy_reg <= 1'b0;
-
- /* set */
- if (!rdy && (fsm_state == FSM_STATE_DONE)) rdy_reg <= 1'b1;
-
- end
-
-
- //
- // Store Redundant Power of 2 (K)
- //
- always @(posedge clk)
- //
- if (helper_init_ena)
- k <= {K_NUM_BITS{1'b0}};
- else begin
-
- if (helper_invert_update_ena && !flag_invert_v_eq_1)
- k <= k + 1'b1;
-
- if (helper_reduce_update_ena && (k != {K_NUM_BITS{1'b0}}))
- k <= k - 1'b1;
-
- end
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_multiplier_256.v b/rtl/modular/modular_multiplier_256.v
index c2f2661..2b35233 100644
--- a/rtl/modular/modular_multiplier_256.v
+++ b/rtl/modular/modular_multiplier_256.v
@@ -1,402 +1,402 @@
-//------------------------------------------------------------------------------
-//
-// modular_multiplier_256.v
-// -----------------------------------------------------------------------------
-// Modular multiplier.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_multiplier_256
- (
- clk, rst_n,
- ena, rdy,
- a_addr, b_addr, n_addr, p_addr, p_wren,
- a_din, b_din, n_din, p_dout
- );
-
-
- //
- // Constants
- //
- localparam OPERAND_NUM_WORDS = 8;
- localparam WORD_COUNTER_WIDTH = 3;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
- WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
- end
- endfunction
-
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
- WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word
- output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word
- output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
- output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
- output wire p_wren; // store current P word now
-
- input wire [ 31:0] a_din; // A
- input wire [ 31:0] b_din; // B
- input wire [ 31:0] n_din; // N (must be P-256!)
- output wire [ 31:0] p_dout; // P = A * B mod N
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH-1:0] index_a;
- reg [WORD_COUNTER_WIDTH-1:0] index_b;
-
- /* map registers to output ports */
- assign a_addr = index_a;
- assign b_addr = index_b;
-
- //
- // FSM
- //
- localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
- wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
- wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
- wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
- wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)];
- wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)];
- wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)];
- wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)];
- wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)];
- wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)];
-
- wire inc_index_a = |fsm_shreg_inc_index_a;
- wire store_word_a = |fsm_shreg_store_word_a;
- wire inc_index_b = |fsm_shreg_inc_index_b;
- wire clear_mac_ab = |fsm_shreg_inc_index_b;
- wire shift_wide_a = |fsm_shreg_inc_index_b;
- wire enable_mac_ab = |fsm_shreg_inc_index_b;
- wire store_si_msb = |fsm_shreg_store_si_msb;
- wire store_si_lsb = fsm_shreg_store_si_lsb;
- wire shift_si = |fsm_shreg_shift_si;
- wire mask_cw1_sum = fsm_shreg_mask_cw1_sum;
- wire store_c_word = |fsm_shreg_store_c_word;
- wire reduce_start = fsm_shreg_reduce_start;
- wire reduce_stop = fsm_shreg_reduce_stop;
-
-
- //
- // FSM Logic
- //
- wire reduce_done;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy)
- fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else if (!reduce_stop || reduce_done)
- fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- reg index_b_ff;
-
- always @(posedge clk)
- //
- if (inc_index_b) index_b_ff <= ~index_b_ff;
- else index_b_ff <= 1'b0;
-
- always @(posedge clk)
- //
- if (rdy) begin
- //
- index_a <= WORD_INDEX_ZERO;
- index_b <= WORD_INDEX_LAST;
- //
- end else begin
- //
- if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a);
- if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
- //
- end
-
-
- //
- // Wide Operand Buffer
- //
- reg [255:0] buf_a_wide;
-
- always @(posedge clk)
- //
- if (store_word_a)
- buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]};
- else if (shift_wide_a)
- buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
-
-
- //
- // Multiplier Array
- //
- wire mac_inhibit; // control signal to pause all accumulators
-
- wire [46: 0] mac[0:15]; // outputs of all accumulators
- reg [15: 0] mac_clear; // individual per-accumulator clear flag
-
- assign mac_inhibit = ~enable_mac_ab;
-
- always @(posedge clk)
- //
- if (!clear_mac_ab)
- mac_clear <= {16{1'b1}};
- else begin
-
- if (mac_clear == {16{1'b1}})
- mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}};
- else
- mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}};
-
-
- end
-
- //
- // Array of parallel multipliers
- //
- genvar i;
- generate for (i=0; i<16; i=i+1)
- begin : gen_mac_array
- //
- mac16_wrapper mac16_inst
- (
- .clk (clk),
- .ce (~mac_inhibit),
-
- .clr (mac_clear[i]),
-
- .a (buf_a_wide[16*i+:16]),
- .b (index_b_ff ? b_din[15:0] : b_din[31:16]),
- .s (mac[i])
- );
- //
- end
- endgenerate
-
- //
- // Intermediate Words
- //
- reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb;
- reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb;
-
-
- wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new;
- wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new;
-
- generate for (i=0; i<16; i=i+1)
- begin : gen_si_lsb_new
- assign si_lsb_new[47*i+:47] = mac[15-i];
- end
- endgenerate
-
- generate for (i=1; i<16; i=i+1)
- begin : gen_si_msb_new
- assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47];
- end
- endgenerate
-
- always @(posedge clk) begin
- //
- if (shift_si) begin
- si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]};
- si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]};
- end else begin
-
- if (store_si_msb)
- si_msb <= si_msb_new;
-
- if (store_si_lsb)
- si_lsb <= si_lsb_new;
- end
-
- end
-
-
- //
- // Accumulators
- //
- wire [46: 0] add47_cw0_s;
- wire [46: 0] add47_cw1_s;
-
-
- //
- // cw0, b, cw1, b
- //
- reg [30: 0] si_prev_dly;
- reg [15: 0] si_next_dly;
-
- always @(posedge clk)
- //
- if (shift_si)
- si_prev_dly <= si_lsb[93:63];
- else
- si_prev_dly <= {31{1'b0}};
-
- always @(posedge clk)
- //
- si_next_dly <= si_lsb[62:47];
-
- wire [46: 0] add47_cw0_a = si_lsb[46:0];
- wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly};
-
- wire [46: 0] add47_cw1_a = add47_cw0_s;
- wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}};
-
- adder47_wrapper add47_cw0_inst
- (
- .clk (clk),
- .a (add47_cw0_a),
- .b (add47_cw0_b),
- .s (add47_cw0_s)
- );
-
- adder47_wrapper add47_cw1_inst
- (
- .clk (clk),
- .a (add47_cw1_a),
- .b (add47_cw1_b),
- .s (add47_cw1_s)
- );
-
-
-
- //
- // Full-Size Product
- //
- reg [WORD_COUNTER_WIDTH:0] bram_c_addr;
-
- wire [WORD_COUNTER_WIDTH:0] reduce_c_addr;
- wire [ 31:0] reduce_c_word;
-
- always @(posedge clk)
- //
- if (store_c_word)
- bram_c_addr <= bram_c_addr + 1'b1;
- else
- bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}};
-
- bram_1rw_1ro_readfirst #
- (
- .MEM_WIDTH (32),
- .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1)
- )
- bram_c_inst
- (
- .clk (clk),
-
- .a_addr (bram_c_addr),
- .a_wr (store_c_word),
- .a_in (add47_cw1_s[31:0]),
- .a_out (),
-
- .b_addr (reduce_c_addr),
- .b_out (reduce_c_word)
- );
-
-
- //
- // Reduction Stage
- //
- modular_reductor_256 reduce_256_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (reduce_start),
- .rdy (reduce_done),
-
- .x_addr (reduce_c_addr),
- .n_addr (n_addr),
- .p_addr (p_addr),
- .p_wren (p_wren),
-
- .x_din (reduce_c_word),
- .n_din (n_din),
- .p_dout (p_dout)
- );
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// modular_multiplier_256.v
+// -----------------------------------------------------------------------------
+// Modular multiplier.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module modular_multiplier_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ a_addr, b_addr, n_addr, p_addr, p_wren,
+ a_din, b_din, n_din, p_dout
+ );
+
+
+ //
+ // Constants
+ //
+ localparam OPERAND_NUM_WORDS = 8;
+ localparam WORD_COUNTER_WIDTH = 3;
+
+
+ //
+ // Handy Numbers
+ //
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
+ localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
+
+
+ //
+ // Handy Functions
+ //
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
+ WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
+ end
+ endfunction
+
+ function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST;
+ input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+ WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+ end
+ endfunction
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word
+ output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word
+ output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
+ output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
+ output wire p_wren; // store current P word now
+
+ input wire [ 31:0] a_din; // A
+ input wire [ 31:0] b_din; // B
+ input wire [ 31:0] n_din; // N (must be P-256!)
+ output wire [ 31:0] p_dout; // P = A * B mod N
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] index_a;
+ reg [WORD_COUNTER_WIDTH-1:0] index_b;
+
+ /* map registers to output ports */
+ assign a_addr = index_a;
+ assign b_addr = index_b;
+
+ //
+ // FSM
+ //
+ localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
+ wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
+ wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+ wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)];
+ wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)];
+ wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)];
+ wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)];
+
+ wire inc_index_a = |fsm_shreg_inc_index_a;
+ wire store_word_a = |fsm_shreg_store_word_a;
+ wire inc_index_b = |fsm_shreg_inc_index_b;
+ wire clear_mac_ab = |fsm_shreg_inc_index_b;
+ wire shift_wide_a = |fsm_shreg_inc_index_b;
+ wire enable_mac_ab = |fsm_shreg_inc_index_b;
+ wire store_si_msb = |fsm_shreg_store_si_msb;
+ wire store_si_lsb = fsm_shreg_store_si_lsb;
+ wire shift_si = |fsm_shreg_shift_si;
+ wire mask_cw1_sum = fsm_shreg_mask_cw1_sum;
+ wire store_c_word = |fsm_shreg_store_c_word;
+ wire reduce_start = fsm_shreg_reduce_start;
+ wire reduce_stop = fsm_shreg_reduce_stop;
+
+
+ //
+ // FSM Logic
+ //
+ wire reduce_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy)
+ fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else if (!reduce_stop || reduce_done)
+ fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+ //
+ // Word Index Increment Logic
+ //
+ reg index_b_ff;
+
+ always @(posedge clk)
+ //
+ if (inc_index_b) index_b_ff <= ~index_b_ff;
+ else index_b_ff <= 1'b0;
+
+ always @(posedge clk)
+ //
+ if (rdy) begin
+ //
+ index_a <= WORD_INDEX_ZERO;
+ index_b <= WORD_INDEX_LAST;
+ //
+ end else begin
+ //
+ if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a);
+ if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b);
+ //
+ end
+
+
+ //
+ // Wide Operand Buffer
+ //
+ reg [255:0] buf_a_wide;
+
+ always @(posedge clk)
+ //
+ if (store_word_a)
+ buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]};
+ else if (shift_wide_a)
+ buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]};
+
+
+ //
+ // Multiplier Array
+ //
+ wire mac_inhibit; // control signal to pause all accumulators
+
+ wire [46: 0] mac[0:15]; // outputs of all accumulators
+ reg [15: 0] mac_clear; // individual per-accumulator clear flag
+
+ assign mac_inhibit = ~enable_mac_ab;
+
+ always @(posedge clk)
+ //
+ if (!clear_mac_ab)
+ mac_clear <= {16{1'b1}};
+ else begin
+
+ if (mac_clear == {16{1'b1}})
+ mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}};
+ else
+ mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}};
+
+
+ end
+
+ //
+ // Array of parallel multipliers
+ //
+ genvar i;
+ generate for (i=0; i<16; i=i+1)
+ begin : gen_mac_array
+ //
+ mac16_wrapper mac16_inst
+ (
+ .clk (clk),
+ .ce (~mac_inhibit),
+
+ .clr (mac_clear[i]),
+
+ .a (buf_a_wide[16*i+:16]),
+ .b (index_b_ff ? b_din[15:0] : b_din[31:16]),
+ .s (mac[i])
+ );
+ //
+ end
+ endgenerate
+
+ //
+ // Intermediate Words
+ //
+ reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb;
+ reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb;
+
+
+ wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new;
+ wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new;
+
+ generate for (i=0; i<16; i=i+1)
+ begin : gen_si_lsb_new
+ assign si_lsb_new[47*i+:47] = mac[15-i];
+ end
+ endgenerate
+
+ generate for (i=1; i<16; i=i+1)
+ begin : gen_si_msb_new
+ assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47];
+ end
+ endgenerate
+
+ always @(posedge clk) begin
+ //
+ if (shift_si) begin
+ si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]};
+ si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]};
+ end else begin
+
+ if (store_si_msb)
+ si_msb <= si_msb_new;
+
+ if (store_si_lsb)
+ si_lsb <= si_lsb_new;
+ end
+
+ end
+
+
+ //
+ // Accumulators
+ //
+ wire [46: 0] add47_cw0_s;
+ wire [46: 0] add47_cw1_s;
+
+
+ //
+ // cw0, b, cw1, b
+ //
+ reg [30: 0] si_prev_dly;
+ reg [15: 0] si_next_dly;
+
+ always @(posedge clk)
+ //
+ if (shift_si)
+ si_prev_dly <= si_lsb[93:63];
+ else
+ si_prev_dly <= {31{1'b0}};
+
+ always @(posedge clk)
+ //
+ si_next_dly <= si_lsb[62:47];
+
+ wire [46: 0] add47_cw0_a = si_lsb[46:0];
+ wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly};
+
+ wire [46: 0] add47_cw1_a = add47_cw0_s;
+ wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}};
+
+ adder47_wrapper add47_cw0_inst
+ (
+ .clk (clk),
+ .a (add47_cw0_a),
+ .b (add47_cw0_b),
+ .s (add47_cw0_s)
+ );
+
+ adder47_wrapper add47_cw1_inst
+ (
+ .clk (clk),
+ .a (add47_cw1_a),
+ .b (add47_cw1_b),
+ .s (add47_cw1_s)
+ );
+
+
+
+ //
+ // Full-Size Product
+ //
+ reg [WORD_COUNTER_WIDTH:0] bram_c_addr;
+
+ wire [WORD_COUNTER_WIDTH:0] reduce_c_addr;
+ wire [ 31:0] reduce_c_word;
+
+ always @(posedge clk)
+ //
+ if (store_c_word)
+ bram_c_addr <= bram_c_addr + 1'b1;
+ else
+ bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}};
+
+ bram_1rw_1ro_readfirst #
+ (
+ .MEM_WIDTH (32),
+ .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1)
+ )
+ bram_c_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_c_addr),
+ .a_wr (store_c_word),
+ .a_in (add47_cw1_s[31:0]),
+ .a_out (),
+
+ .b_addr (reduce_c_addr),
+ .b_out (reduce_c_word)
+ );
+
+
+ //
+ // Reduction Stage
+ //
+ modular_reductor_256 reduce_256_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_done),
+
+ .x_addr (reduce_c_addr),
+ .n_addr (n_addr),
+ .p_addr (p_addr),
+ .p_wren (p_wren),
+
+ .x_din (reduce_c_word),
+ .n_din (n_din),
+ .p_dout (p_dout)
+ );
+
+
+ endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_reductor_256.v b/rtl/modular/modular_reductor_256.v
index e4b346a..6f31570 100644
--- a/rtl/modular/modular_reductor_256.v
+++ b/rtl/modular/modular_reductor_256.v
@@ -1,692 +1,692 @@
-//------------------------------------------------------------------------------
-//
-// modular_reductor_256.v
-// -----------------------------------------------------------------------------
-// Modular reductor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_reductor_256
- (
- clk, rst_n,
- ena, rdy,
- x_addr, n_addr, p_addr, p_wren,
- x_din, n_din, p_dout
- );
-
- //
- // Constants
- //
- localparam OPERAND_NUM_WORDS = 8;
- localparam WORD_COUNTER_WIDTH = 3;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST;
- input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
- WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word
- output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
- output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
- output wire p_wren; // store current P word now
-
- input wire [ 31:0] x_din; // X
- input wire [ 31:0] n_din; // N (must be P-256!)
- output wire [ 31:0] p_dout; // P = X mod N
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH:0] index_x;
-
-
- /* map registers to output ports */
- assign x_addr = index_x;
-
-
- //
- // FSM
- //
- localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS];
- wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS];
- wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5];
-
- wire [5-1:0] fsm_shreg_reduce_stage_start;
- wire [5-1:0] fsm_shreg_reduce_stage_stop;
-
- genvar s;
- generate for (s=0; s<5; s=s+1)
- begin : gen_fsm_shreg_reduce_stages
- assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1];
- assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2];
- end
- endgenerate
-
- wire inc_index_x = |fsm_shreg_inc_index_x;
- wire store_word_z = |fsm_shreg_store_word_z;
- wire reduce_start = |fsm_shreg_reduce_stage_start;
- wire reduce_stop = |fsm_shreg_reduce_stage_stop;
- wire store_p = fsm_shreg_reduce_stage_stop[0];
-
-
- wire reduce_adder0_done;
- wire reduce_adder1_done;
- wire reduce_subtractor_done;
-
- wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy)
- //
- fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else if (!reduce_stop || reduce_done_all)
- //
- fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- always @(posedge clk)
- //
- if (rdy)
- //
- index_x <= WORD_INDEX_LAST;
- //
- else if (inc_index_x)
- //
- index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x);
-
-
- //
- // Look-up Table
- //
-
- //
- // Take a look at the corresponding C model for more information
- // on how exactly the math behind reduction works. The first step
- // is to assemble nine 256-bit values ("z-words") from 32-bit parts
- // of the full 512-bit product ("c-word"). The problem with z5 is
- // that it contains c13 two times. This implementation scans from
- // c15 to c0 and writes current part of c-word into corresponding
- // parts of z-words. Since those 32-bit parts are stored in block
- // memories, one source word can only be written to one location in
- // every z-word at a time. The trick is to delay c13 and then write
- // the delayed value at the corresponding location in z5 instead of
- // the next c12. "z_save" flag is used to indicate that the current
- // word should be delayed and written once again during the next cycle.
- //
-
- reg [9*WORD_COUNTER_WIDTH-1:0] z_addr; //
- reg [9 -1:0] z_wren; //
- reg [9 -1:0] z_mask; // mask input to store zero word
- reg [9 -1:0] z_save; // save previous word once again
-
- always @(posedge clk)
- //
- if (inc_index_x)
- //
- case (index_x)
- //
- // s9 s8 s7 s6 s5 s4 s3 s2 s1
- // || || || || || || || || ||
- 4'd00: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd00};
- 4'd01: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd01};
- 4'd02: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd02};
- 4'd03: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd03};
- 4'd04: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd04};
- 4'd05: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd05};
- 4'd06: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd06};
- 4'd07: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd07};
- 4'd08: z_addr <= {3'd02, 3'd03, 3'd04, 3'd06, 3'd07, 3'd00, 3'd00, 3'd00, 3'dxx};
- 4'd09: z_addr <= {3'd03, 3'd04, 3'd06, 3'd03, 3'd00, 3'd01, 3'd01, 3'd01, 3'dxx};
- 4'd10: z_addr <= {3'd04, 3'd05, 3'd05, 3'd07, 3'd01, 3'd02, 3'd02, 3'd02, 3'dxx};
- 4'd11: z_addr <= {3'd05, 3'd06, 3'd07, 3'd00, 3'd02, 3'd03, 3'd07, 3'd03, 3'dxx};
- 4'd12: z_addr <= {3'd06, 3'd07, 3'd00, 3'd01, 3'd06, 3'd04, 3'd03, 3'd04, 3'dxx};
- 4'd13: z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dxx};
- 4'd14: z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dxx};
- 4'd15: z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dxx};
- //
- default: z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}};
- //
- endcase
-
- always @(posedge clk)
- //
- case (index_x)
- //
- // 9 8 7 6 5 4 3 2 1
- // | | | | | | | | |
- 4'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd08: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd09: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd10: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd11: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- //
- default: z_wren <= {9{1'b0}};
- //
- endcase
-
- always @(posedge clk)
- //
- if (inc_index_x)
- //
- case (index_x)
- //
- // 9 8 7 6 5 4 3 2 1
- // | | | | | | | | |
- 4'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd08: z_mask <= {1'b1, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
- 4'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
- 4'd10: z_mask <= {1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
- 4'd11: z_mask <= {1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0};
- 4'd12: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
- 4'd13: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
- 4'd14: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd15: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- //
- default: z_mask <= {9{1'bX}};
- //
- endcase
-
- always @(posedge clk)
- //
- if (inc_index_x)
- //
- case (index_x)
- //
- // 9 8 7 6 5 4 3 2 1
- // | | | | | | | | |
- 4'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- //
- default: z_save <= {9{1'bX}};
- //
- endcase
-
-
- //
- // Intermediate Numbers
- //
- reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:9];
- wire [ 32-1:0] reduce_z_dout[1:9];
-
- reg [31: 0] x_din_dly;
- always @(posedge clk)
- //
- x_din_dly <= x_din;
-
-
- genvar z;
- generate for (z=1; z<=9; z=z+1)
- //
- begin : gen_z_bram
- //
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_c_inst
- (
- .clk (clk),
-
- .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]),
- .a_wr (z_wren[z-1] & store_word_z),
- .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)),
- .a_out (),
-
- .b_addr (reduce_z_addr[z]),
- .b_out (reduce_z_dout[z])
- );
- //
- end
- //
- endgenerate
-
-
-
-
- wire [ 32-1:0] bram_sum0_wr_din;
- wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr;
- wire bram_sum0_wr_wren;
-
- wire [ 32-1:0] bram_sum1_wr_din;
- wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr;
- wire bram_sum1_wr_wren;
-
- wire [ 32-1:0] bram_diff_wr_din;
- wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr;
- wire bram_diff_wr_wren;
-
- wire [ 32-1:0] bram_sum0_rd_dout;
- reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr;
-
- wire [ 32-1:0] bram_sum1_rd_dout;
- reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr;
-
- wire [ 32-1:0] bram_diff_rd_dout;
- reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr;
-
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_sum0_inst
- (
- .clk (clk),
-
- .a_addr (bram_sum0_wr_addr),
- .a_wr (bram_sum0_wr_wren),
- .a_in (bram_sum0_wr_din),
- .a_out (),
-
- .b_addr (bram_sum0_rd_addr),
- .b_out (bram_sum0_rd_dout)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_sum1_inst
- (
- .clk (clk),
-
- .a_addr (bram_sum1_wr_addr),
- .a_wr (bram_sum1_wr_wren),
- .a_in (bram_sum1_wr_din),
- .a_out (),
-
- .b_addr (bram_sum1_rd_addr),
- .b_out (bram_sum1_rd_dout)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_diff_inst
- (
- .clk (clk),
-
- .a_addr (bram_diff_wr_addr),
- .a_wr (bram_diff_wr_wren),
- .a_in (bram_diff_wr_din),
- .a_out (),
-
- .b_addr (bram_diff_rd_addr),
- .b_out (bram_diff_rd_dout)
- );
-
-
- wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr;
- wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr;
- wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr;
-
- reg [ 32-1:0] adder0_a_din;
- reg [ 32-1:0] adder0_b_din;
-
- reg [ 32-1:0] adder1_a_din;
- reg [ 32-1:0] adder1_b_din;
-
- reg [ 32-1:0] subtractor_a_din;
- reg [ 32-1:0] subtractor_b_din;
-
- // n_addr - only 1 output, because all modules are in sync
-
- modular_adder #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
- )
- adder_inst0
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (reduce_start),
- .rdy (reduce_adder0_done),
-
- .ab_addr (adder0_ab_addr),
- .n_addr (),
- .s_addr (bram_sum0_wr_addr),
- .s_wren (bram_sum0_wr_wren),
-
- .a_din (adder0_a_din),
- .b_din (adder0_b_din),
- .n_din (n_din),
- .s_dout (bram_sum0_wr_din)
- );
-
- modular_adder #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
- )
- adder_inst1
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (reduce_start),
- .rdy (reduce_adder1_done),
-
- .ab_addr (adder1_ab_addr),
- .n_addr (),
- .s_addr (bram_sum1_wr_addr),
- .s_wren (bram_sum1_wr_wren),
-
- .a_din (adder1_a_din),
- .b_din (adder1_b_din),
- .n_din (n_din),
- .s_dout (bram_sum1_wr_din)
- );
-
- modular_subtractor #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
- )
- subtractor_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (reduce_start),
- .rdy (reduce_subtractor_done),
-
- .ab_addr (subtractor_ab_addr),
- .n_addr (n_addr),
- .d_addr (bram_diff_wr_addr),
- .d_wren (bram_diff_wr_wren),
-
- .a_din (subtractor_a_din),
- .b_din (subtractor_b_din),
- .n_din (n_din),
- .d_dout (bram_diff_wr_din)
- );
-
-
- //
- // Address (Operand) Selector
- //
- always @(*)
- //
- case (fsm_shreg_reduce_stage_stop)
- //
- 5'b10000: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = adder0_ab_addr;
- reduce_z_addr[3] = adder1_ab_addr;
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = subtractor_ab_addr;
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- 5'b01000: begin
- reduce_z_addr[1] = adder0_ab_addr;
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = adder1_ab_addr;
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = subtractor_ab_addr;
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = adder1_ab_addr;
- bram_diff_rd_addr = subtractor_ab_addr;
- end
- //
- 5'b00100: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = adder0_ab_addr;
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = subtractor_ab_addr;
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = adder1_ab_addr;
- bram_diff_rd_addr = subtractor_ab_addr;
- end
- //
- 5'b00010: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = subtractor_ab_addr;
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = adder0_ab_addr;
- bram_diff_rd_addr = subtractor_ab_addr;
- end
- //
- 5'b00001: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_diff_rd_addr = adder0_ab_addr;
- end
- //
- default: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- endcase
-
-
- //
- // adder 0
- //
- always @(*) begin
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder0_a_din = reduce_z_dout[2];
- 5'b01000: adder0_a_din = bram_sum0_rd_dout;
- 5'b00100: adder0_a_din = bram_sum0_rd_dout;
- 5'b00010: adder0_a_din = bram_sum0_rd_dout;
- 5'b00001: adder0_a_din = bram_sum0_rd_dout;
- default: adder0_a_din = {32{1'bX}};
- endcase
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder0_b_din = reduce_z_dout[2];
- 5'b01000: adder0_b_din = reduce_z_dout[1];
- 5'b00100: adder0_b_din = reduce_z_dout[5];
- 5'b00010: adder0_b_din = bram_sum1_rd_dout;
- 5'b00001: adder0_b_din = bram_diff_rd_dout;
- default: adder0_b_din = {32{1'bX}};
- endcase
- //
- end
-
- //
- // adder 1
- //
- always @(*) begin
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder1_a_din = reduce_z_dout[3];
- 5'b01000: adder1_a_din = bram_sum1_rd_dout;
- 5'b00100: adder1_a_din = bram_sum1_rd_dout;
- 5'b00010: adder1_a_din = {32{1'bX}};
- 5'b00001: adder1_a_din = {32{1'bX}};
- default: adder1_a_din = {32{1'bX}};
- endcase
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder1_b_din = reduce_z_dout[3];
- 5'b01000: adder1_b_din = reduce_z_dout[4];
- 5'b00100: adder1_b_din = {32{1'b0}};
- 5'b00010: adder1_b_din = {32{1'bX}};
- 5'b00001: adder1_b_din = {32{1'bX}};
- default: adder1_b_din = {32{1'bX}};
- endcase
- //
- end
-
-
- //
- // subtractor
- //
- always @(*) begin
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: subtractor_a_din = {32{1'b0}};
- 5'b01000: subtractor_a_din = bram_diff_rd_dout;
- 5'b00100: subtractor_a_din = bram_diff_rd_dout;
- 5'b00010: subtractor_a_din = bram_diff_rd_dout;
- 5'b00001: subtractor_a_din = {32{1'bX}};
- default: subtractor_a_din = {32{1'bX}};
- endcase
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: subtractor_b_din = reduce_z_dout[6];
- 5'b01000: subtractor_b_din = reduce_z_dout[7];
- 5'b00100: subtractor_b_din = reduce_z_dout[8];
- 5'b00010: subtractor_b_din = reduce_z_dout[9];
- 5'b00001: subtractor_b_din = {32{1'bX}};
- default: subtractor_b_din = {32{1'bX}};
- endcase
- //
- end
-
-
- //
- // Address Mapping
- //
- assign p_addr = bram_sum0_wr_addr;
- assign p_wren = bram_sum0_wr_wren & store_p;
- assign p_dout = bram_sum0_wr_din;
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
+//------------------------------------------------------------------------------
+//
+// modular_reductor_256.v
+// -----------------------------------------------------------------------------
+// Modular reductor.
+//
+// Authors: Pavel Shatov
+//
+// Copyright (c) 2015-2016, NORDUnet A/S
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// - Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// - Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// - Neither the name of the NORDUnet nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+//------------------------------------------------------------------------------
+
+module modular_reductor_256
+ (
+ clk, rst_n,
+ ena, rdy,
+ x_addr, n_addr, p_addr, p_wren,
+ x_din, n_din, p_dout
+ );
+
+ //
+ // Constants
+ //
+ localparam OPERAND_NUM_WORDS = 8;
+ localparam WORD_COUNTER_WIDTH = 3;
+
+
+ //
+ // Handy Numbers
+ //
+ localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0;
+ localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1;
+
+
+ //
+ // Handy Functions
+ //
+ function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST;
+ input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT;
+ begin
+ WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
+ WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
+ end
+ endfunction
+
+
+ //
+ // Ports
+ //
+ input wire clk; // system clock
+ input wire rst_n; // active-low async reset
+
+ input wire ena; // enable input
+ output wire rdy; // ready output
+
+ output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word
+ output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
+ output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
+ output wire p_wren; // store current P word now
+
+ input wire [ 31:0] x_din; // X
+ input wire [ 31:0] n_din; // N (must be P-256!)
+ output wire [ 31:0] p_dout; // P = X mod N
+
+
+ //
+ // Word Indices
+ //
+ reg [WORD_COUNTER_WIDTH:0] index_x;
+
+
+ /* map registers to output ports */
+ assign x_addr = index_x;
+
+
+ //
+ // FSM
+ //
+ localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1;
+
+ reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
+
+ assign rdy = fsm_shreg[0];
+
+ wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS];
+ wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS];
+ wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5];
+
+ wire [5-1:0] fsm_shreg_reduce_stage_start;
+ wire [5-1:0] fsm_shreg_reduce_stage_stop;
+
+ genvar s;
+ generate for (s=0; s<5; s=s+1)
+ begin : gen_fsm_shreg_reduce_stages
+ assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1];
+ assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2];
+ end
+ endgenerate
+
+ wire inc_index_x = |fsm_shreg_inc_index_x;
+ wire store_word_z = |fsm_shreg_store_word_z;
+ wire reduce_start = |fsm_shreg_reduce_stage_start;
+ wire reduce_stop = |fsm_shreg_reduce_stage_stop;
+ wire store_p = fsm_shreg_reduce_stage_stop[0];
+
+
+ wire reduce_adder0_done;
+ wire reduce_adder1_done;
+ wire reduce_subtractor_done;
+
+ wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done;
+
+ always @(posedge clk or negedge rst_n)
+ //
+ if (rst_n == 1'b0)
+ //
+ fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
+ //
+ else begin
+ //
+ if (rdy)
+ //
+ fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
+ //
+ else if (!reduce_stop || reduce_done_all)
+ //
+ fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
+ //
+ end
+
+
+ //
+ // Word Index Increment Logic
+ //
+ always @(posedge clk)
+ //
+ if (rdy)
+ //
+ index_x <= WORD_INDEX_LAST;
+ //
+ else if (inc_index_x)
+ //
+ index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x);
+
+
+ //
+ // Look-up Table
+ //
+
+ //
+ // Take a look at the corresponding C model for more information
+ // on how exactly the math behind reduction works. The first step
+ // is to assemble nine 256-bit values ("z-words") from 32-bit parts
+ // of the full 512-bit product ("c-word"). The problem with z5 is
+ // that it contains c13 two times. This implementation scans from
+ // c15 to c0 and writes current part of c-word into corresponding
+ // parts of z-words. Since those 32-bit parts are stored in block
+ // memories, one source word can only be written to one location in
+ // every z-word at a time. The trick is to delay c13 and then write
+ // the delayed value at the corresponding location in z5 instead of
+ // the next c12. "z_save" flag is used to indicate that the current
+ // word should be delayed and written once again during the next cycle.
+ //
+
+ reg [9*WORD_COUNTER_WIDTH-1:0] z_addr; //
+ reg [9 -1:0] z_wren; //
+ reg [9 -1:0] z_mask; // mask input to store zero word
+ reg [9 -1:0] z_save; // save previous word once again
+
+ always @(posedge clk)
+ //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // s9 s8 s7 s6 s5 s4 s3 s2 s1
+ // || || || || || || || || ||
+ 4'd00: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd00};
+ 4'd01: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd01};
+ 4'd02: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd02};
+ 4'd03: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd03};
+ 4'd04: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd04};
+ 4'd05: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd05};
+ 4'd06: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd06};
+ 4'd07: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd07};
+ 4'd08: z_addr <= {3'd02, 3'd03, 3'd04, 3'd06, 3'd07, 3'd00, 3'd00, 3'd00, 3'dxx};
+ 4'd09: z_addr <= {3'd03, 3'd04, 3'd06, 3'd03, 3'd00, 3'd01, 3'd01, 3'd01, 3'dxx};
+ 4'd10: z_addr <= {3'd04, 3'd05, 3'd05, 3'd07, 3'd01, 3'd02, 3'd02, 3'd02, 3'dxx};
+ 4'd11: z_addr <= {3'd05, 3'd06, 3'd07, 3'd00, 3'd02, 3'd03, 3'd07, 3'd03, 3'dxx};
+ 4'd12: z_addr <= {3'd06, 3'd07, 3'd00, 3'd01, 3'd06, 3'd04, 3'd03, 3'd04, 3'dxx};
+ 4'd13: z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dxx};
+ 4'd14: z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dxx};
+ 4'd15: z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dxx};
+ //
+ default: z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}};
+ //
+ endcase
+
+ always @(posedge clk)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
+ 4'd08: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd09: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd10: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd11: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ 4'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
+ //
+ default: z_wren <= {9{1'b0}};
+ //
+ endcase
+
+ always @(posedge clk)
+ //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd08: z_mask <= {1'b1, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd10: z_mask <= {1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
+ 4'd11: z_mask <= {1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0};
+ 4'd12: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+ 4'd13: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
+ 4'd14: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd15: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ //
+ default: z_mask <= {9{1'bX}};
+ //
+ endcase
+
+ always @(posedge clk)
+ //
+ if (inc_index_x)
+ //
+ case (index_x)
+ //
+ // 9 8 7 6 5 4 3 2 1
+ // | | | | | | | | |
+ 4'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ 4'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
+ //
+ default: z_save <= {9{1'bX}};
+ //
+ endcase
+
+
+ //
+ // Intermediate Numbers
+ //
+ reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:9];
+ wire [ 32-1:0] reduce_z_dout[1:9];
+
+ reg [31: 0] x_din_dly;
+ always @(posedge clk)
+ //
+ x_din_dly <= x_din;
+
+
+ genvar z;
+ generate for (z=1; z<=9; z=z+1)
+ //
+ begin : gen_z_bram
+ //
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_c_inst
+ (
+ .clk (clk),
+
+ .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]),
+ .a_wr (z_wren[z-1] & store_word_z),
+ .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)),
+ .a_out (),
+
+ .b_addr (reduce_z_addr[z]),
+ .b_out (reduce_z_dout[z])
+ );
+ //
+ end
+ //
+ endgenerate
+
+
+
+
+ wire [ 32-1:0] bram_sum0_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr;
+ wire bram_sum0_wr_wren;
+
+ wire [ 32-1:0] bram_sum1_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr;
+ wire bram_sum1_wr_wren;
+
+ wire [ 32-1:0] bram_diff_wr_din;
+ wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr;
+ wire bram_diff_wr_wren;
+
+ wire [ 32-1:0] bram_sum0_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr;
+
+ wire [ 32-1:0] bram_sum1_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr;
+
+ wire [ 32-1:0] bram_diff_rd_dout;
+ reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr;
+
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_sum0_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_sum0_wr_addr),
+ .a_wr (bram_sum0_wr_wren),
+ .a_in (bram_sum0_wr_din),
+ .a_out (),
+
+ .b_addr (bram_sum0_rd_addr),
+ .b_out (bram_sum0_rd_dout)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_sum1_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_sum1_wr_addr),
+ .a_wr (bram_sum1_wr_wren),
+ .a_in (bram_sum1_wr_din),
+ .a_out (),
+
+ .b_addr (bram_sum1_rd_addr),
+ .b_out (bram_sum1_rd_dout)
+ );
+
+ bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
+ bram_diff_inst
+ (
+ .clk (clk),
+
+ .a_addr (bram_diff_wr_addr),
+ .a_wr (bram_diff_wr_wren),
+ .a_in (bram_diff_wr_din),
+ .a_out (),
+
+ .b_addr (bram_diff_rd_addr),
+ .b_out (bram_diff_rd_dout)
+ );
+
+
+ wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr;
+ wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr;
+
+ reg [ 32-1:0] adder0_a_din;
+ reg [ 32-1:0] adder0_b_din;
+
+ reg [ 32-1:0] adder1_a_din;
+ reg [ 32-1:0] adder1_b_din;
+
+ reg [ 32-1:0] subtractor_a_din;
+ reg [ 32-1:0] subtractor_b_din;
+
+ // n_addr - only 1 output, because all modules are in sync
+
+ modular_adder #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
+ )
+ adder_inst0
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_adder0_done),
+
+ .ab_addr (adder0_ab_addr),
+ .n_addr (),
+ .s_addr (bram_sum0_wr_addr),
+ .s_wren (bram_sum0_wr_wren),
+
+ .a_din (adder0_a_din),
+ .b_din (adder0_b_din),
+ .n_din (n_din),
+ .s_dout (bram_sum0_wr_din)
+ );
+
+ modular_adder #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
+ )
+ adder_inst1
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_adder1_done),
+
+ .ab_addr (adder1_ab_addr),
+ .n_addr (),
+ .s_addr (bram_sum1_wr_addr),
+ .s_wren (bram_sum1_wr_wren),
+
+ .a_din (adder1_a_din),
+ .b_din (adder1_b_din),
+ .n_din (n_din),
+ .s_dout (bram_sum1_wr_din)
+ );
+
+ modular_subtractor #
+ (
+ .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
+ .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
+ )
+ subtractor_inst
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (reduce_start),
+ .rdy (reduce_subtractor_done),
+
+ .ab_addr (subtractor_ab_addr),
+ .n_addr (n_addr),
+ .d_addr (bram_diff_wr_addr),
+ .d_wren (bram_diff_wr_wren),
+
+ .a_din (subtractor_a_din),
+ .b_din (subtractor_b_din),
+ .n_din (n_din),
+ .d_dout (bram_diff_wr_din)
+ );
+
+
+ //
+ // Address (Operand) Selector
+ //
+ always @(*)
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ //
+ 5'b10000: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = adder0_ab_addr;
+ reduce_z_addr[3] = adder1_ab_addr;
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = subtractor_ab_addr;
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ 5'b01000: begin
+ reduce_z_addr[1] = adder0_ab_addr;
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = adder1_ab_addr;
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = subtractor_ab_addr;
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder1_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00100: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = adder0_ab_addr;
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = subtractor_ab_addr;
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder1_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00010: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = subtractor_ab_addr;
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = adder0_ab_addr;
+ bram_diff_rd_addr = subtractor_ab_addr;
+ end
+ //
+ 5'b00001: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = adder0_ab_addr;
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = adder0_ab_addr;
+ end
+ //
+ default: begin
+ reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
+ reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
+ end
+ //
+ endcase
+
+
+ //
+ // adder 0
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder0_a_din = reduce_z_dout[2];
+ 5'b01000: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00100: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00010: adder0_a_din = bram_sum0_rd_dout;
+ 5'b00001: adder0_a_din = bram_sum0_rd_dout;
+ default: adder0_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder0_b_din = reduce_z_dout[2];
+ 5'b01000: adder0_b_din = reduce_z_dout[1];
+ 5'b00100: adder0_b_din = reduce_z_dout[5];
+ 5'b00010: adder0_b_din = bram_sum1_rd_dout;
+ 5'b00001: adder0_b_din = bram_diff_rd_dout;
+ default: adder0_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+ //
+ // adder 1
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder1_a_din = reduce_z_dout[3];
+ 5'b01000: adder1_a_din = bram_sum1_rd_dout;
+ 5'b00100: adder1_a_din = bram_sum1_rd_dout;
+ 5'b00010: adder1_a_din = {32{1'bX}};
+ 5'b00001: adder1_a_din = {32{1'bX}};
+ default: adder1_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: adder1_b_din = reduce_z_dout[3];
+ 5'b01000: adder1_b_din = reduce_z_dout[4];
+ 5'b00100: adder1_b_din = {32{1'b0}};
+ 5'b00010: adder1_b_din = {32{1'bX}};
+ 5'b00001: adder1_b_din = {32{1'bX}};
+ default: adder1_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // subtractor
+ //
+ always @(*) begin
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: subtractor_a_din = {32{1'b0}};
+ 5'b01000: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00100: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00010: subtractor_a_din = bram_diff_rd_dout;
+ 5'b00001: subtractor_a_din = {32{1'bX}};
+ default: subtractor_a_din = {32{1'bX}};
+ endcase
+ //
+ case (fsm_shreg_reduce_stage_stop)
+ 5'b10000: subtractor_b_din = reduce_z_dout[6];
+ 5'b01000: subtractor_b_din = reduce_z_dout[7];
+ 5'b00100: subtractor_b_din = reduce_z_dout[8];
+ 5'b00010: subtractor_b_din = reduce_z_dout[9];
+ 5'b00001: subtractor_b_din = {32{1'bX}};
+ default: subtractor_b_din = {32{1'bX}};
+ endcase
+ //
+ end
+
+
+ //
+ // Address Mapping
+ //
+ assign p_addr = bram_sum0_wr_addr;
+ assign p_wren = bram_sum0_wr_wren & store_p;
+ assign p_dout = bram_sum0_wr_din;
+
+
+endmodule
+
+
+//------------------------------------------------------------------------------
+// End-of-File
+//------------------------------------------------------------------------------
diff --git a/rtl/modular/modular_subtractor.v b/rtl/modular/modular_subtractor.v
deleted file mode 100644
index 322aec4..0000000
--- a/rtl/modular/modular_subtractor.v
+++ /dev/null
@@ -1,292 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// modular_subtractor.v
-// -----------------------------------------------------------------------------
-// Modular subtractor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_subtractor
- (
- clk, rst_n,
- ena, rdy,
- ab_addr, n_addr, d_addr, d_wren,
- a_din, b_din, n_din, d_dout
- );
-
-
- //
- // Parameters
- //
- parameter OPERAND_NUM_WORDS = 8;
- parameter WORD_COUNTER_WIDTH = 3;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
- WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-1:0] ab_addr; // index of current A and B words
- output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
- output wire [WORD_COUNTER_WIDTH-1:0] d_addr; // index of current D word
- output wire d_wren; // store current D word now
-
- input wire [ 31:0] a_din; // A
- input wire [ 31:0] b_din; // B
- input wire [ 31:0] n_din; // N
- output wire [ 31:0] d_dout; // D = (A - B) mod N
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH-1:0] index_ab;
- reg [WORD_COUNTER_WIDTH-1:0] index_n;
- reg [WORD_COUNTER_WIDTH-1:0] index_d;
-
- /* map registers to output ports */
- assign ab_addr = index_ab;
- assign n_addr = index_n;
- assign d_addr = index_d;
-
-
- //
- // Subtractor
- //
- wire [31: 0] sub32_d;
- wire sub32_b_in;
- wire sub32_b_out;
-
- subtractor32_wrapper subtractor32
- (
- .clk (clk),
- .a (a_din),
- .b (b_din),
- .d (sub32_d),
- .b_in (sub32_b_in),
- .b_out (sub32_b_out)
- );
-
-
- //
- // Adder
- //
- wire [31: 0] add32_s;
- wire add32_c_in;
- wire add32_c_out;
-
- adder32_wrapper adder32
- (
- .clk (clk),
- .a (sub32_d),
- .b (n_din),
- .s (add32_s),
- .c_in (add32_c_in),
- .c_out (add32_c_out)
- );
-
-
- //
- // FSM
- //
-
- localparam FSM_SHREG_WIDTH = 2*OPERAND_NUM_WORDS + 5;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_dif_ab_n = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 3)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_store_data_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 3)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_d = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (2 * OPERAND_NUM_WORDS + 4)];
-
- wire fsm_latch_msb_borrow = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
-
- wire inc_index_ab = |fsm_shreg_inc_index_ab;
- wire inc_index_n = |fsm_shreg_inc_index_n;
- wire store_dif_ab = |fsm_shreg_store_dif_ab;
- wire store_dif_ab_n = |fsm_shreg_store_dif_ab_n;
- wire store_data_d = |fsm_shreg_store_data_d;
- wire inc_index_d = |fsm_shreg_inc_index_d;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
- //
- // Borrow & Carry Masking Logic
- //
- reg sub32_b_mask;
- reg add32_c_mask;
-
-
- always @(posedge clk) begin
- //
- sub32_b_mask <= (index_ab == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
- add32_c_mask <= (index_n == WORD_INDEX_ZERO) ? 1'b1 : 1'b0;
- //
- end
-
- assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
- assign add32_c_in = add32_c_out & ~add32_c_mask;
-
-
-
- //
- // Borrow & Carry Latch Logic
- //
- reg sub32_borrow_latch;
-
- always @(posedge clk) begin
- //
- if (fsm_latch_msb_borrow) sub32_borrow_latch <= sub32_b_out;
- //
- end
-
-
- //
- // Intermediate Results
- //
- reg [32*OPERAND_NUM_WORDS-1:0] d_ab;
- reg [32*OPERAND_NUM_WORDS-1:0] d_ab_n;
-
- always @(posedge clk)
- //
- if (store_data_d) begin
- //
- d_ab <= {{32{1'bX}}, d_ab[32*OPERAND_NUM_WORDS-1:32]};
- d_ab_n <= {{32{1'bX}}, d_ab_n[32*OPERAND_NUM_WORDS-1:32]};
- //
- end else begin
- //
- if (store_dif_ab) d_ab <= {sub32_d, d_ab[32*OPERAND_NUM_WORDS-1:32]};
- if (store_dif_ab_n) d_ab_n <= {add32_s, d_ab_n[32*OPERAND_NUM_WORDS-1:32]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- always @(posedge clk)
- //
- if (rdy) begin
- //
- index_ab <= WORD_INDEX_ZERO;
- index_n <= WORD_INDEX_ZERO;
- index_d <= WORD_INDEX_ZERO;
- //
- end else begin
- //
- if (inc_index_ab) index_ab <= WORD_INDEX_NEXT_OR_ZERO(index_ab);
- if (inc_index_n) index_n <= WORD_INDEX_NEXT_OR_ZERO(index_n);
- if (inc_index_d) index_d <= WORD_INDEX_NEXT_OR_ZERO(index_d);
- //
- end
-
-
- //
- // Output Sum Selector
- //
- wire mux_select_ab_n = sub32_borrow_latch;
-
-
- //
- // Output Data and Write Enable Logic
- //
- reg d_wren_reg;
- reg [31: 0] d_dout_reg;
- wire [31: 0] d_dout_mux = mux_select_ab_n ? d_ab_n[31:0] : d_ab[31:0];
-
- assign d_wren = d_wren_reg;
- assign d_dout = d_dout_reg;
-
- always @(posedge clk)
- //
- if (rdy) begin
- //
- d_wren_reg <= 1'b0;
- d_dout_reg <= {32{1'bX}};
- //
- end else begin
- //
- d_wren_reg <= store_data_d;
- d_dout_reg <= store_data_d ? d_dout_mux : {32{1'bX}};
- //
- end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/multiword/mw_comparator.v b/rtl/multiword/mw_comparator.v
deleted file mode 100644
index b97a6cf..0000000
--- a/rtl/multiword/mw_comparator.v
+++ /dev/null
@@ -1,220 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mw_comparator.v
-// -----------------------------------------------------------------------------
-// Multi-word comparator.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mw_comparator
- (
- clk, rst_n,
- ena, rdy,
- xy_addr, x_din, y_din,
- cmp_l, cmp_e, cmp_g
- );
-
-
- //
- // Parameters
- //
- parameter WORD_COUNTER_WIDTH = 3;
- parameter OPERAND_NUM_WORDS = 8;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREV_OR_LAST;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_PREV_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
- WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-1:0] xy_addr; // address of current X and Y words
- input wire [ 32-1:0] x_din; // current X word
- input wire [ 32-1:0] y_din; // current Y word
-
- output wire cmp_l; // X < Y ?
- output wire cmp_e; // X = Y ?
- output wire cmp_g; // X > Y ?
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH-1:0] index_xy;
-
- reg reg_cmp_l;
- reg reg_cmp_e;
- reg reg_cmp_g;
-
-
- //
- // Output Mapping
- //
- assign xy_addr = index_xy;
-
- assign cmp_l = reg_cmp_l;
- assign cmp_e = reg_cmp_e;
- assign cmp_g = reg_cmp_g;
-
-
- //
- // FSM
- //
- localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 3;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_dec_index_xy = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_calc_leg = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
- wire fsm_shreg_calc_leg_last = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2)];
-
- wire dec_index_xy = |fsm_shreg_dec_index_xy;
- wire calc_leg = |fsm_shreg_calc_leg;
- wire calc_leg_last = fsm_shreg_calc_leg_last;
-
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- always @(posedge clk)
- //
- if (rdy) index_xy <= WORD_INDEX_LAST;
- else if (dec_index_xy) index_xy <= WORD_INDEX_PREV_OR_LAST(index_xy);
-
-
- //
- // 32-bit Subtractor
- //
- wire [31: 0] sub32_d_out;
- wire sub32_b_in;
- wire sub32_b_out;
-
- subtractor32_wrapper subtractor32_inst
- (
- .clk (clk),
-
- .a (x_din),
- .b (y_din),
-
- .d (sub32_d_out),
-
- .b_in (sub32_b_in),
- .b_out (sub32_b_out)
- );
-
-
- //
- // Borrow Masking Logic
- //
- reg sub32_b_mask;
-
- always @(posedge clk)
- //
- sub32_b_mask <= (index_xy == WORD_INDEX_LAST) ? 1'b1 : 1'b0;
-
- assign sub32_b_in = sub32_b_out & ~sub32_b_mask;
-
- //
- // Output Logic
- //
- wire cmp_unresolved = !(cmp_l || cmp_g);
-
- wire cmp_borrow_is_set = (sub32_b_out == 1'b1) ? 1'b1 : 1'b0;
- wire cmp_difference_is_nonzero = (sub32_d_out != 32'd0) ? 1'b1 : 1'b0;
-
- always @(posedge clk)
- //
- if (rdy) begin
- //
- if (ena) begin
- //
- reg_cmp_l <= 1'b0;
- reg_cmp_e <= 1'b0;
- reg_cmp_g <= 1'b0;
- //
- end
- //
- end else if (cmp_unresolved && calc_leg) begin
- //
- if ( cmp_borrow_is_set) reg_cmp_l <= 1'b1;
- if (!cmp_borrow_is_set && cmp_difference_is_nonzero) reg_cmp_g <= 1'b1;
- if (!cmp_borrow_is_set && !cmp_difference_is_nonzero && calc_leg_last) reg_cmp_e <= 1'b1;
- //
- end
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/multiword/mw_mover.v b/rtl/multiword/mw_mover.v
deleted file mode 100644
index 5db95a7..0000000
--- a/rtl/multiword/mw_mover.v
+++ /dev/null
@@ -1,175 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// mw_mover.v
-// -----------------------------------------------------------------------------
-// Multi-word data mover.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module mw_mover
- (
- clk, rst_n,
- ena, rdy,
- x_addr, y_addr, y_wren,
- x_din, y_dout
- );
-
-
- //
- // Parameters
- //
- parameter WORD_COUNTER_WIDTH = 3;
- parameter OPERAND_NUM_WORDS = 8;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO;
- input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ?
- WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-1:0] x_addr; // address of current X word
- output wire [WORD_COUNTER_WIDTH-1:0] y_addr; // address of current Y word
- output wire y_wren; // store current Y word
-
- input wire [ 32-1:0] x_din; // current X word
- output wire [ 32-1:0] y_dout; // current Y word
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH-1:0] index_x;
- reg [WORD_COUNTER_WIDTH-1:0] index_y;
-
-
- //
- // Output Mapping
- //
- assign x_addr = index_x;
- assign y_addr = index_y;
-
-
- //
- // FSM
- //
- localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 2;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)];
- wire [OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_y = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)];
-
- wire inc_index_x = |fsm_shreg_inc_index_x;
- wire inc_index_y = |fsm_shreg_inc_index_y;
- wire store_word_y = |fsm_shreg_inc_index_x;
-
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- always @(posedge clk)
- //
- if (rdy) begin
- index_x <= WORD_INDEX_ZERO;
- index_y <= WORD_INDEX_ZERO;
- end else begin
- if (inc_index_x) index_x <= WORD_INDEX_NEXT_OR_ZERO(index_x);
- if (inc_index_y) index_y <= WORD_INDEX_NEXT_OR_ZERO(index_y);
- end
-
-
- //
- // Write Enable Logic
- //
- reg y_wren_reg;
-
- assign y_wren = y_wren_reg;
-
- always @(posedge clk)
- //
- if (rdy) y_wren_reg <= 1'b0;
- else y_wren_reg <= store_word_y;
-
-
- //
- // Output Logic
- //
- assign y_dout = x_din;
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------
diff --git a/rtl/util/bram_1rw_1ro_readfirst.v b/rtl/util/bram_1rw_1ro_readfirst.v
deleted file mode 100644
index 28782c2..0000000
--- a/rtl/util/bram_1rw_1ro_readfirst.v
+++ /dev/null
@@ -1,101 +0,0 @@
-//======================================================================
-//
-// Copyright (c) 2015, NORDUnet A/S All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-// - Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may
-// be used to endorse or promote products derived from this software
-// without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-//======================================================================
-
-`timescale 1ns / 1ps
-
-module bram_1rw_1ro_readfirst
- #(parameter MEM_WIDTH = 32,
- parameter MEM_ADDR_BITS = 8)
- (
- input wire clk,
-
- input wire [MEM_ADDR_BITS-1:0] a_addr,
- input wire a_wr,
- input wire [MEM_WIDTH-1:0] a_in,
- output wire [MEM_WIDTH-1:0] a_out,
-
- input wire [MEM_ADDR_BITS-1:0] b_addr,
- output wire [MEM_WIDTH-1:0] b_out
- );
-
-
- //
- // BRAM
- //
- (* RAM_STYLE="BLOCK" *)
- reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1];
-
-
- //
- // Initialization
- //
- /**
- integer c;
- initial begin
- for (c=0; c<(2**MEM_ADDR_BITS); c=c+1)
- bram[c] = {MEM_WIDTH{1'b0}};
- end
- **/
-
-
-
- //
- // Output Registers
- //
- reg [MEM_WIDTH-1:0] bram_reg_a;
- reg [MEM_WIDTH-1:0] bram_reg_b;
-
- assign a_out = bram_reg_a;
- assign b_out = bram_reg_b;
-
-
- //
- // Read-Write Port A
- //
- always @(posedge clk) begin
- //
- bram_reg_a <= bram[a_addr];
- //
- if (a_wr) bram[a_addr] <= a_in;
- //
- end
-
-
- //
- // Read-Only Port B
- //
- always @(posedge clk)
- //
- bram_reg_b <= bram[b_addr];
-
-
-endmodule