From 89f913c3aa2a6dad35630f3882a06b99e0978105 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Tue, 7 Mar 2017 19:52:36 -0500 Subject: Promote to a repository in the core tree. Change name of reset signal from rst_n to reset_n for consistancy with other Cryptech cores. Code common between this core and the ecdsa384 core split out into a separate library repository. Minor cleanup (Windows-isms, indentation). --- rtl/modular/modular_multiplier_256.v | 804 +++++++++++++++++------------------ 1 file changed, 402 insertions(+), 402 deletions(-) (limited to 'rtl/modular/modular_multiplier_256.v') diff --git a/rtl/modular/modular_multiplier_256.v b/rtl/modular/modular_multiplier_256.v index c2f2661..2b35233 100644 --- a/rtl/modular/modular_multiplier_256.v +++ b/rtl/modular/modular_multiplier_256.v @@ -1,402 +1,402 @@ -//------------------------------------------------------------------------------ -// -// modular_multiplier_256.v -// ----------------------------------------------------------------------------- -// Modular multiplier. -// -// Authors: Pavel Shatov -// -// Copyright (c) 2015-2016, NORDUnet A/S -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// - Neither the name of the NORDUnet nor the names of its contributors may be -// used to endorse or promote products derived from this software without -// specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. -// -//------------------------------------------------------------------------------ - -module modular_multiplier_256 - ( - clk, rst_n, - ena, rdy, - a_addr, b_addr, n_addr, p_addr, p_wren, - a_din, b_din, n_din, p_dout - ); - - - // - // Constants - // - localparam OPERAND_NUM_WORDS = 8; - localparam WORD_COUNTER_WIDTH = 3; - - - // - // Handy Numbers - // - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; - localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; - - - // - // Handy Functions - // - function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; - input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; - begin - WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? - WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; - end - endfunction - - function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST; - input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; - begin - WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? - WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; - end - endfunction - - - // - // Ports - // - input wire clk; // system clock - input wire rst_n; // active-low async reset - - input wire ena; // enable input - output wire rdy; // ready output - - output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word - output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word - output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word - output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word - output wire p_wren; // store current P word now - - input wire [ 31:0] a_din; // A - input wire [ 31:0] b_din; // B - input wire [ 31:0] n_din; // N (must be P-256!) - output wire [ 31:0] p_dout; // P = A * B mod N - - - // - // Word Indices - // - reg [WORD_COUNTER_WIDTH-1:0] index_a; - reg [WORD_COUNTER_WIDTH-1:0] index_b; - - /* map registers to output ports */ - assign a_addr = index_a; - assign b_addr = index_b; - - // - // FSM - // - localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1; - - reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; - - assign rdy = fsm_shreg[0]; - - wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; - wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; - wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; - wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; - wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)]; - wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)]; - wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)]; - wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)]; - wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)]; - wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)]; - - wire inc_index_a = |fsm_shreg_inc_index_a; - wire store_word_a = |fsm_shreg_store_word_a; - wire inc_index_b = |fsm_shreg_inc_index_b; - wire clear_mac_ab = |fsm_shreg_inc_index_b; - wire shift_wide_a = |fsm_shreg_inc_index_b; - wire enable_mac_ab = |fsm_shreg_inc_index_b; - wire store_si_msb = |fsm_shreg_store_si_msb; - wire store_si_lsb = fsm_shreg_store_si_lsb; - wire shift_si = |fsm_shreg_shift_si; - wire mask_cw1_sum = fsm_shreg_mask_cw1_sum; - wire store_c_word = |fsm_shreg_store_c_word; - wire reduce_start = fsm_shreg_reduce_start; - wire reduce_stop = fsm_shreg_reduce_stop; - - - // - // FSM Logic - // - wire reduce_done; - - always @(posedge clk or negedge rst_n) - // - if (rst_n == 1'b0) - // - fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; - // - else begin - // - if (rdy) - fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; - // - else if (!reduce_stop || reduce_done) - fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; - // - end - - - // - // Word Index Increment Logic - // - reg index_b_ff; - - always @(posedge clk) - // - if (inc_index_b) index_b_ff <= ~index_b_ff; - else index_b_ff <= 1'b0; - - always @(posedge clk) - // - if (rdy) begin - // - index_a <= WORD_INDEX_ZERO; - index_b <= WORD_INDEX_LAST; - // - end else begin - // - if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); - if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); - // - end - - - // - // Wide Operand Buffer - // - reg [255:0] buf_a_wide; - - always @(posedge clk) - // - if (store_word_a) - buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]}; - else if (shift_wide_a) - buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]}; - - - // - // Multiplier Array - // - wire mac_inhibit; // control signal to pause all accumulators - - wire [46: 0] mac[0:15]; // outputs of all accumulators - reg [15: 0] mac_clear; // individual per-accumulator clear flag - - assign mac_inhibit = ~enable_mac_ab; - - always @(posedge clk) - // - if (!clear_mac_ab) - mac_clear <= {16{1'b1}}; - else begin - - if (mac_clear == {16{1'b1}}) - mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}}; - else - mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}}; - - - end - - // - // Array of parallel multipliers - // - genvar i; - generate for (i=0; i<16; i=i+1) - begin : gen_mac_array - // - mac16_wrapper mac16_inst - ( - .clk (clk), - .ce (~mac_inhibit), - - .clr (mac_clear[i]), - - .a (buf_a_wide[16*i+:16]), - .b (index_b_ff ? b_din[15:0] : b_din[31:16]), - .s (mac[i]) - ); - // - end - endgenerate - - // - // Intermediate Words - // - reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; - reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; - - - wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; - wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; - - generate for (i=0; i<16; i=i+1) - begin : gen_si_lsb_new - assign si_lsb_new[47*i+:47] = mac[15-i]; - end - endgenerate - - generate for (i=1; i<16; i=i+1) - begin : gen_si_msb_new - assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47]; - end - endgenerate - - always @(posedge clk) begin - // - if (shift_si) begin - si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]}; - si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]}; - end else begin - - if (store_si_msb) - si_msb <= si_msb_new; - - if (store_si_lsb) - si_lsb <= si_lsb_new; - end - - end - - - // - // Accumulators - // - wire [46: 0] add47_cw0_s; - wire [46: 0] add47_cw1_s; - - - // - // cw0, b, cw1, b - // - reg [30: 0] si_prev_dly; - reg [15: 0] si_next_dly; - - always @(posedge clk) - // - if (shift_si) - si_prev_dly <= si_lsb[93:63]; - else - si_prev_dly <= {31{1'b0}}; - - always @(posedge clk) - // - si_next_dly <= si_lsb[62:47]; - - wire [46: 0] add47_cw0_a = si_lsb[46:0]; - wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; - - wire [46: 0] add47_cw1_a = add47_cw0_s; - wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}}; - - adder47_wrapper add47_cw0_inst - ( - .clk (clk), - .a (add47_cw0_a), - .b (add47_cw0_b), - .s (add47_cw0_s) - ); - - adder47_wrapper add47_cw1_inst - ( - .clk (clk), - .a (add47_cw1_a), - .b (add47_cw1_b), - .s (add47_cw1_s) - ); - - - - // - // Full-Size Product - // - reg [WORD_COUNTER_WIDTH:0] bram_c_addr; - - wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; - wire [ 31:0] reduce_c_word; - - always @(posedge clk) - // - if (store_c_word) - bram_c_addr <= bram_c_addr + 1'b1; - else - bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}}; - - bram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1) - ) - bram_c_inst - ( - .clk (clk), - - .a_addr (bram_c_addr), - .a_wr (store_c_word), - .a_in (add47_cw1_s[31:0]), - .a_out (), - - .b_addr (reduce_c_addr), - .b_out (reduce_c_word) - ); - - - // - // Reduction Stage - // - modular_reductor_256 reduce_256_inst - ( - .clk (clk), - .rst_n (rst_n), - - .ena (reduce_start), - .rdy (reduce_done), - - .x_addr (reduce_c_addr), - .n_addr (n_addr), - .p_addr (p_addr), - .p_wren (p_wren), - - .x_din (reduce_c_word), - .n_din (n_din), - .p_dout (p_dout) - ); - - -endmodule - - -//------------------------------------------------------------------------------ -// End-of-File -//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ +// +// modular_multiplier_256.v +// ----------------------------------------------------------------------------- +// Modular multiplier. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module modular_multiplier_256 + ( + clk, rst_n, + ena, rdy, + a_addr, b_addr, n_addr, p_addr, p_wren, + a_din, b_din, n_din, p_dout + ); + + + // + // Constants + // + localparam OPERAND_NUM_WORDS = 8; + localparam WORD_COUNTER_WIDTH = 3; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction + + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction + + + // + // Ports + // + input wire clk; // system clock + input wire rst_n; // active-low async reset + + input wire ena; // enable input + output wire rdy; // ready output + + output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word + output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word + output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word + output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + output wire p_wren; // store current P word now + + input wire [ 31:0] a_din; // A + input wire [ 31:0] b_din; // B + input wire [ 31:0] n_din; // N (must be P-256!) + output wire [ 31:0] p_dout; // P = A * B mod N + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_a; + reg [WORD_COUNTER_WIDTH-1:0] index_b; + + /* map registers to output ports */ + assign a_addr = index_a; + assign b_addr = index_b; + + // + // FSM + // + localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; + + assign rdy = fsm_shreg[0]; + + wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; + wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)]; + wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)]; + wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)]; + wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)]; + + wire inc_index_a = |fsm_shreg_inc_index_a; + wire store_word_a = |fsm_shreg_store_word_a; + wire inc_index_b = |fsm_shreg_inc_index_b; + wire clear_mac_ab = |fsm_shreg_inc_index_b; + wire shift_wide_a = |fsm_shreg_inc_index_b; + wire enable_mac_ab = |fsm_shreg_inc_index_b; + wire store_si_msb = |fsm_shreg_store_si_msb; + wire store_si_lsb = fsm_shreg_store_si_lsb; + wire shift_si = |fsm_shreg_shift_si; + wire mask_cw1_sum = fsm_shreg_mask_cw1_sum; + wire store_c_word = |fsm_shreg_store_c_word; + wire reduce_start = fsm_shreg_reduce_start; + wire reduce_stop = fsm_shreg_reduce_stop; + + + // + // FSM Logic + // + wire reduce_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) + fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else if (!reduce_stop || reduce_done) + fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + + + // + // Word Index Increment Logic + // + reg index_b_ff; + + always @(posedge clk) + // + if (inc_index_b) index_b_ff <= ~index_b_ff; + else index_b_ff <= 1'b0; + + always @(posedge clk) + // + if (rdy) begin + // + index_a <= WORD_INDEX_ZERO; + index_b <= WORD_INDEX_LAST; + // + end else begin + // + if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); + if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); + // + end + + + // + // Wide Operand Buffer + // + reg [255:0] buf_a_wide; + + always @(posedge clk) + // + if (store_word_a) + buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]}; + else if (shift_wide_a) + buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]}; + + + // + // Multiplier Array + // + wire mac_inhibit; // control signal to pause all accumulators + + wire [46: 0] mac[0:15]; // outputs of all accumulators + reg [15: 0] mac_clear; // individual per-accumulator clear flag + + assign mac_inhibit = ~enable_mac_ab; + + always @(posedge clk) + // + if (!clear_mac_ab) + mac_clear <= {16{1'b1}}; + else begin + + if (mac_clear == {16{1'b1}}) + mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}}; + else + mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}}; + + + end + + // + // Array of parallel multipliers + // + genvar i; + generate for (i=0; i<16; i=i+1) + begin : gen_mac_array + // + mac16_wrapper mac16_inst + ( + .clk (clk), + .ce (~mac_inhibit), + + .clr (mac_clear[i]), + + .a (buf_a_wide[16*i+:16]), + .b (index_b_ff ? b_din[15:0] : b_din[31:16]), + .s (mac[i]) + ); + // + end + endgenerate + + // + // Intermediate Words + // + reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; + reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; + + + wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; + wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; + + generate for (i=0; i<16; i=i+1) + begin : gen_si_lsb_new + assign si_lsb_new[47*i+:47] = mac[15-i]; + end + endgenerate + + generate for (i=1; i<16; i=i+1) + begin : gen_si_msb_new + assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47]; + end + endgenerate + + always @(posedge clk) begin + // + if (shift_si) begin + si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]}; + si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]}; + end else begin + + if (store_si_msb) + si_msb <= si_msb_new; + + if (store_si_lsb) + si_lsb <= si_lsb_new; + end + + end + + + // + // Accumulators + // + wire [46: 0] add47_cw0_s; + wire [46: 0] add47_cw1_s; + + + // + // cw0, b, cw1, b + // + reg [30: 0] si_prev_dly; + reg [15: 0] si_next_dly; + + always @(posedge clk) + // + if (shift_si) + si_prev_dly <= si_lsb[93:63]; + else + si_prev_dly <= {31{1'b0}}; + + always @(posedge clk) + // + si_next_dly <= si_lsb[62:47]; + + wire [46: 0] add47_cw0_a = si_lsb[46:0]; + wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; + + wire [46: 0] add47_cw1_a = add47_cw0_s; + wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}}; + + adder47_wrapper add47_cw0_inst + ( + .clk (clk), + .a (add47_cw0_a), + .b (add47_cw0_b), + .s (add47_cw0_s) + ); + + adder47_wrapper add47_cw1_inst + ( + .clk (clk), + .a (add47_cw1_a), + .b (add47_cw1_b), + .s (add47_cw1_s) + ); + + + + // + // Full-Size Product + // + reg [WORD_COUNTER_WIDTH:0] bram_c_addr; + + wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; + wire [ 31:0] reduce_c_word; + + always @(posedge clk) + // + if (store_c_word) + bram_c_addr <= bram_c_addr + 1'b1; + else + bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}}; + + bram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1) + ) + bram_c_inst + ( + .clk (clk), + + .a_addr (bram_c_addr), + .a_wr (store_c_word), + .a_in (add47_cw1_s[31:0]), + .a_out (), + + .b_addr (reduce_c_addr), + .b_out (reduce_c_word) + ); + + + // + // Reduction Stage + // + modular_reductor_256 reduce_256_inst + ( + .clk (clk), + .rst_n (rst_n), + + .ena (reduce_start), + .rdy (reduce_done), + + .x_addr (reduce_c_addr), + .n_addr (n_addr), + .p_addr (p_addr), + .p_wren (p_wren), + + .x_din (reduce_c_word), + .n_din (n_din), + .p_dout (p_dout) + ); + + + endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ -- cgit v1.2.3