//------------------------------------------------------------------------------ // // modular_multiplier_256.v // ----------------------------------------------------------------------------- // Modular multiplier. // // Authors: Pavel Shatov // // Copyright (c) 2015-2016, NORDUnet A/S // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // - Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // - Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // - Neither the name of the NORDUnet nor the names of its contributors may be // used to endorse or promote products derived from this software without // specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // //------------------------------------------------------------------------------ module modular_multiplier_256 ( clk, rst_n, ena, rdy, a_addr, b_addr, n_addr, p_addr, p_wren, a_din, b_din, n_din, p_dout ); // // Constants // localparam OPERAND_NUM_WORDS = 8; localparam WORD_COUNTER_WIDTH = 3; // // Handy Numbers // localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; // // Handy Functions // function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; begin WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; end endfunction function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST; input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; begin WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; end endfunction // // Ports // input wire clk; // system clock input wire rst_n; // active-low async reset input wire ena; // enable input output wire rdy; // ready output output wire [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word output wire [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word output wire p_wren; // store current P word now input wire [ 31:0] a_din; // A input wire [ 31:0] b_din; // B input wire [ 31:0] n_din; // N (must be P-256!) output wire [ 31:0] p_dout; // P = A * B mod N // // Word Indices // reg [WORD_COUNTER_WIDTH-1:0] index_a; reg [WORD_COUNTER_WIDTH-1:0] index_b; /* map registers to output ports */ assign a_addr = index_a; assign b_addr = index_b; // // FSM // localparam FSM_SHREG_WIDTH = (1 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 1) + (2 * OPERAND_NUM_WORDS + 2) + (0 * OPERAND_NUM_WORDS + 2) + 1; reg [FSM_SHREG_WIDTH-1:0] fsm_shreg; assign rdy = fsm_shreg[0]; wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)]; wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)]; wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)]; wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)]; wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)]; wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)]; wire inc_index_a = |fsm_shreg_inc_index_a; wire store_word_a = |fsm_shreg_store_word_a; wire inc_index_b = |fsm_shreg_inc_index_b; wire clear_mac_ab = |fsm_shreg_inc_index_b; wire shift_wide_a = |fsm_shreg_inc_index_b; wire enable_mac_ab = |fsm_shreg_inc_index_b; wire store_si_msb = |fsm_shreg_store_si_msb; wire store_si_lsb = fsm_shreg_store_si_lsb; wire shift_si = |fsm_shreg_shift_si; wire mask_cw1_sum = fsm_shreg_mask_cw1_sum; wire store_c_word = |fsm_shreg_store_c_word; wire reduce_start = fsm_shreg_reduce_start; wire reduce_stop = fsm_shreg_reduce_stop; // // FSM Logic // wire reduce_done; always @(posedge clk or negedge rst_n) // if (rst_n == 1'b0) // fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; // else begin // if (rdy) fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; // else if (!reduce_stop || reduce_done) fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; // end // // Word Index Increment Logic // reg index_b_ff; always @(posedge clk) // if (inc_index_b) index_b_ff <= ~index_b_ff; else index_b_ff <= 1'b0; always @(posedge clk) // if (rdy) begin // index_a <= WORD_INDEX_ZERO; index_b <= WORD_INDEX_LAST; // end else begin // if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); // end // // Wide Operand Buffer // reg [255:0] buf_a_wide; always @(posedge clk) // if (store_word_a) buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]}; else if (shift_wide_a) buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]}; // // Multiplier Array // wire mac_inhibit; // control signal to pause all accumulators wire [46: 0] mac[0:15]; // outputs of all accumulators reg [15: 0] mac_clear; // individual per-accumulator clear flag assign mac_inhibit = ~enable_mac_ab; always @(posedge clk) // if (!clear_mac_ab) mac_clear <= {16{1'b1}}; else begin if (mac_clear == {16{1'b1}}) mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}}; else mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}}; end // // Array of parallel multipliers // genvar i; generate for (i=0; i<16; i=i+1) begin : gen_mac_array // mac16_wrapper mac16_inst ( .clk (clk), .ce (~mac_inhibit), .clr (mac_clear[i]), .a (buf_a_wide[16*i+:16]), .b (index_b_ff ? b_din[15:0] : b_din[31:16]), .s (mac[i]) ); // end endgenerate // // Intermediate Words // reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; generate for (i=0; i<16; i=i+1) begin : gen_si_lsb_new assign si_lsb_new[47*i+:47] = mac[15-i]; end endgenerate generate for (i=1; i<16; i=i+1) begin : gen_si_msb_new assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47]; end endgenerate always @(posedge clk) begin // if (shift_si) begin si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]}; si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]}; end else begin if (store_si_msb) si_msb <= si_msb_new; if (store_si_lsb) si_lsb <= si_lsb_new; end end // // Accumulators // wire [46: 0] add47_cw0_s; wire [46: 0] add47_cw1_s; // // cw0, b, cw1, b // reg [30: 0] si_prev_dly; reg [15: 0] si_next_dly; always @(posedge clk) // if (shift_si) si_prev_dly <= si_lsb[93:63]; else si_prev_dly <= {31{1'b0}}; always @(posedge clk) // si_next_dly <= si_lsb[62:47]; wire [46: 0] add47_cw0_a = si_lsb[46:0]; wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; wire [46: 0] add47_cw1_a = add47_cw0_s; wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}}; adder47_wrapper add47_cw0_inst ( .clk (clk), .a (add47_cw0_a), .b (add47_cw0_b), .s (add47_cw0_s) ); adder47_wrapper add47_cw1_inst ( .clk (clk), .a (add47_cw1_a), .b (add47_cw1_b), .s (add47_cw1_s) ); // // Full-Size Product // reg [WORD_COUNTER_WIDTH:0] bram_c_addr; wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; wire [ 31:0] reduce_c_word; always @(posedge clk) // if (store_c_word) bram_c_addr <= bram_c_addr + 1'b1; else bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}}; bram_1rw_1ro_readfirst # ( .MEM_WIDTH (32), .MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1) ) bram_c_inst ( .clk (clk), .a_addr (bram_c_addr), .a_wr (store_c_word), .a_in (add47_cw1_s[31:0]), .a_out (), .b_addr (reduce_c_addr), .b_out (reduce_c_word) ); // // Reduction Stage // modular_reductor_256 reduce_256_inst ( .clk (clk), .rst_n (rst_n), .ena (reduce_start), .rdy (reduce_done), .x_addr (reduce_c_addr), .n_addr (n_addr), .p_addr (p_addr), .p_wren (p_wren), .x_din (reduce_c_word), .n_din (n_din), .p_dout (p_dout) ); endmodule //------------------------------------------------------------------------------ // End-of-File //------------------------------------------------------------------------------