From f2239775210e3cd6a373987e7a872328a30501a9 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 25 Sep 2018 15:04:27 +0300 Subject: Started porting modular multiplier from ECDSA. --- .../ed25519_modular_multiplier.v | 410 ++++++++++++ rtl/modular_multiplier/ed25519_modular_reductor.v | 699 +++++++++++++++++++++ 2 files changed, 1109 insertions(+) create mode 100644 rtl/modular_multiplier/ed25519_modular_multiplier.v create mode 100644 rtl/modular_multiplier/ed25519_modular_reductor.v (limited to 'rtl/modular_multiplier') diff --git a/rtl/modular_multiplier/ed25519_modular_multiplier.v b/rtl/modular_multiplier/ed25519_modular_multiplier.v new file mode 100644 index 0000000..9f8ead7 --- /dev/null +++ b/rtl/modular_multiplier/ed25519_modular_multiplier.v @@ -0,0 +1,410 @@ +//------------------------------------------------------------------------------ +// +// ed25519_modular_multiplier.v +// ----------------------------------------------------------------------------- +// Curve25519 Modular Multiplier. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2015-2016, 2018 NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module ed25519_modular_multiplier +( + clk, rst_n, + ena, rdy, + a_addr, b_addr, p_addr, p_wren, + a_din, b_din, p_dout +); + + + // + // Constants + // + localparam integer OPERAND_NUM_WORDS = 8; + localparam integer WORD_COUNTER_WIDTH = 3; + + + // + // Handy Numbers + // + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_ZERO = 0; + localparam [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_LAST = OPERAND_NUM_WORDS - 1; + + + // + // Handy Functions + // + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_NEXT_OR_ZERO; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_NEXT_OR_ZERO = (WORD_INDEX_CURRENT < WORD_INDEX_LAST) ? + WORD_INDEX_CURRENT + 1'b1 : WORD_INDEX_ZERO; + end + endfunction + + function [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_PREVIOUS_OR_LAST; + input [WORD_COUNTER_WIDTH-1:0] WORD_INDEX_CURRENT; + begin + WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? + WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; + end + endfunction + + + // + // Ports + // + input clk; // system clock + input rst_n; // active-low async reset + + input ena; // enable input + output rdy; // ready output + + output [WORD_COUNTER_WIDTH-1:0] a_addr; // index of current A word + output [WORD_COUNTER_WIDTH-1:0] b_addr; // index of current B word + output [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + + output p_wren; // store current P word now + + input [31:0] a_din; // current word of A + input [31:0] b_din; // current word of B + output [31:0] p_dout; // current word of P + + + // + // Word Indices + // + reg [WORD_COUNTER_WIDTH-1:0] index_a; + reg [WORD_COUNTER_WIDTH-1:0] index_b; + + /* map registers to output ports */ + assign a_addr = index_a; + assign b_addr = index_b; + + + // + // FSM + // + localparam FSM_SHREG_WIDTH = 1 * OPERAND_NUM_WORDS + 1 + + 2 * OPERAND_NUM_WORDS + 1 + + 2 * OPERAND_NUM_WORDS + 2 + + 0 * OPERAND_NUM_WORDS + 2 + + 0 * OPERAND_NUM_WORDS + 1; + + localparam [FSM_SHREG_WIDTH-1:0] FSM_SHREG_INIT = {{(FSM_SHREG_WIDTH-1){1'b0}}, 1'b1}; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg = FSM_SHREG_INIT; + + assign rdy = fsm_shreg[0]; + + wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 0)]; + //wire [1 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_word_a = fsm_shreg[FSM_SHREG_WIDTH - (0 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1)]; + //wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_inc_index_b = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 1) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + //wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_store_si_msb = fsm_shreg[FSM_SHREG_WIDTH - (1 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 1)]; + //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_store_si_lsb = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 2)]; + //wire [2 * OPERAND_NUM_WORDS-2:0] fsm_shreg_shift_si = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 3) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 1)]; + //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_mask_cw1_sum = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4) : FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 4)]; + //wire [2 * OPERAND_NUM_WORDS-1:0] fsm_shreg_store_c_word = fsm_shreg[FSM_SHREG_WIDTH - (3 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 4)]; + //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_start = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 5)]; + //wire [0 * OPERAND_NUM_WORDS-0:0] fsm_shreg_reduce_stop = fsm_shreg[FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6) : FSM_SHREG_WIDTH - (5 * OPERAND_NUM_WORDS + 6)]; + + wire inc_index_a = |fsm_shreg_inc_index_a; +// wire store_word_a = |fsm_shreg_store_word_a; +// wire inc_index_b = |fsm_shreg_inc_index_b; +// wire clear_mac_ab = |fsm_shreg_inc_index_b; +// wire shift_wide_a = |fsm_shreg_inc_index_b; +// wire enable_mac_ab = |fsm_shreg_inc_index_b; +// wire store_si_msb = |fsm_shreg_store_si_msb; +// wire store_si_lsb = fsm_shreg_store_si_lsb; +// wire shift_si = |fsm_shreg_shift_si; +// wire mask_cw1_sum = fsm_shreg_mask_cw1_sum; +// wire store_c_word = |fsm_shreg_store_c_word; +// wire reduce_start = fsm_shreg_reduce_start; +// wire reduce_stop = fsm_shreg_reduce_stop; + + + // + // FSM Logic + // +// wire reduce_done; + + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= FSM_SHREG_INIT; + // + else begin + // + if (rdy) + fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + else + /*if (!reduce_stop || reduce_done)*/ + fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + end + + +// +// Word Index Increment Logic +// + +/* +reg index_b_ff; + +always @(posedge clk) +// +if (inc_index_b) index_b_ff <= ~index_b_ff; +else index_b_ff <= 1'b0; + +always @(posedge clk) +// +if (rdy) begin +// +index_a <= WORD_INDEX_ZERO; +index_b <= WORD_INDEX_LAST; +// +end else begin +// +if (inc_index_a) index_a <= WORD_INDEX_NEXT_OR_ZERO(index_a); +if (inc_index_b && !index_b_ff) index_b <= WORD_INDEX_PREVIOUS_OR_LAST(index_b); +// +end + + +// +// Wide Operand Buffer +// +reg [255:0] buf_a_wide; + +always @(posedge clk) +// +if (store_word_a) +buf_a_wide <= {buf_a_wide[16 +: 256 - 3 * 16], {a_din[15:0], a_din[31:16]}, buf_a_wide[256 - 2 * 16 +: 16]}; +else if (shift_wide_a) +buf_a_wide <= {buf_a_wide[256-(16+1):0], buf_a_wide[256-16+:16]}; + + +// +// Multiplier Array +// +wire mac_inhibit; // control signal to pause all accumulators + +wire [46: 0] mac[0:15]; // outputs of all accumulators +reg [15: 0] mac_clear; // individual per-accumulator clear flag + +assign mac_inhibit = ~enable_mac_ab; + +always @(posedge clk) +// +if (!clear_mac_ab) +mac_clear <= {16{1'b1}}; +else begin + +if (mac_clear == {16{1'b1}}) +mac_clear <= {{14{1'b0}}, 1'b1, {1{1'b0}}}; +else +mac_clear <= (mac_clear[15] == 1'b0) ? {mac_clear[14:0], 1'b0} : {16{1'b1}}; + + +end + +// +// Array of parallel multipliers +// +genvar i; +generate for (i=0; i<16; i=i+1) +begin : gen_mac_array +// +mac16_wrapper mac16_inst +( +.clk (clk), +.ce (~mac_inhibit), + +.clr (mac_clear[i]), + +.a (buf_a_wide[16*i+:16]), +.b (index_b_ff ? b_din[15:0] : b_din[31:16]), +.s (mac[i]) +); +// +end +endgenerate + +// +// Intermediate Words +// +reg [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb; +reg [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb; + + +wire [47*(2*OPERAND_NUM_WORDS-1)-1:0] si_msb_new; +wire [47*(2*OPERAND_NUM_WORDS-0)-1:0] si_lsb_new; + +generate for (i=0; i<16; i=i+1) +begin : gen_si_lsb_new +assign si_lsb_new[47*i+:47] = mac[15-i]; +end +endgenerate + +generate for (i=1; i<16; i=i+1) +begin : gen_si_msb_new +assign si_msb_new[47*(15-i)+:47] = mac_clear[i] ? mac[i] : si_msb[47*(15-i)+:47]; +end +endgenerate + +always @(posedge clk) begin +// +if (shift_si) begin +si_msb <= {{2*47{1'b0}}, si_msb[15*47-1:2*47]}; +si_lsb <= {si_msb[2*47-1:0], si_lsb[16*47-1:2*47]}; +end else begin + +if (store_si_msb) +si_msb <= si_msb_new; + +if (store_si_lsb) +si_lsb <= si_lsb_new; +end + +end + + +// +// Accumulators +// +wire [46: 0] add47_cw0_s; +wire [46: 0] add47_cw1_s; + + +// +// cw0, b, cw1, b +// +reg [30: 0] si_prev_dly; +reg [15: 0] si_next_dly; + +always @(posedge clk) +// +if (shift_si) +si_prev_dly <= si_lsb[93:63]; +else +si_prev_dly <= {31{1'b0}}; + +always @(posedge clk) +// +si_next_dly <= si_lsb[62:47]; + +wire [46: 0] add47_cw0_a = si_lsb[46:0]; +wire [46: 0] add47_cw0_b = {{16{1'b0}}, si_prev_dly}; + +wire [46: 0] add47_cw1_a = add47_cw0_s; +wire [46: 0] add47_cw1_b = {{15{1'b0}}, si_next_dly, mask_cw1_sum ? {16{1'b0}} : {1'b0, add47_cw1_s[46:32]}}; + +adder47_wrapper add47_cw0_inst +( +.clk (clk), +.a (add47_cw0_a), +.b (add47_cw0_b), +.s (add47_cw0_s) +); + +adder47_wrapper add47_cw1_inst +( +.clk (clk), +.a (add47_cw1_a), +.b (add47_cw1_b), +.s (add47_cw1_s) +); + + + +// +// Full-Size Product +// +reg [WORD_COUNTER_WIDTH:0] bram_c_addr; + +wire [WORD_COUNTER_WIDTH:0] reduce_c_addr; +wire [ 31:0] reduce_c_word; + +always @(posedge clk) +// +if (store_c_word) +bram_c_addr <= bram_c_addr + 1'b1; +else +bram_c_addr <= {2*WORD_COUNTER_WIDTH{1'b0}}; + +bram_1rw_1ro_readfirst # +( +.MEM_WIDTH (32), +.MEM_ADDR_BITS (WORD_COUNTER_WIDTH + 1) +) +bram_c_inst +( +.clk (clk), + +.a_addr (bram_c_addr), +.a_wr (store_c_word), +.a_in (add47_cw1_s[31:0]), +.a_out (), + +.b_addr (reduce_c_addr), +.b_out (reduce_c_word) +); + + +// +// Reduction Stage +// +modular_reductor_256 reduce_256_inst +( +.clk (clk), +.rst_n (rst_n), + +.ena (reduce_start), +.rdy (reduce_done), + +.x_addr (reduce_c_addr), +.n_addr (n_addr), +.p_addr (p_addr), +.p_wren (p_wren), + +.x_din (reduce_c_word), +.n_din (n_din), +.p_dout (p_dout) +); +*/ + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ diff --git a/rtl/modular_multiplier/ed25519_modular_reductor.v b/rtl/modular_multiplier/ed25519_modular_reductor.v new file mode 100644 index 0000000..5b50cb3 --- /dev/null +++ b/rtl/modular_multiplier/ed25519_modular_reductor.v @@ -0,0 +1,699 @@ +//------------------------------------------------------------------------------ +// +// ed25519_modular_reductor.v +// ----------------------------------------------------------------------------- +// Curve 25519 Modular Reductor. +// +// Authors: Pavel Shatov +// +// Copyright (c) 2018, NORDUnet A/S +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +//------------------------------------------------------------------------------ + +module ed25519_modular_reductor +( + clk, rst_n, + ena, rdy, + x_addr, y_addr, y_wren, + x_din, y_dout +); + + + // + // Constants + // + localparam integer OPERAND_NUM_WORDS = 8; + localparam integer WORD_COUNTER_WIDTH = 3; + + + /* +// +// Handy Numbers +// +localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0; +localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1; + */ +/* + + // + // Include Handy Functions + // +function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST; +input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT; +begin +WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ? +WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST; +end +endfunction +*/ + + // + // Ports + // + input clk; // system clock + input rst_n; // active-low async reset + + input ena; // enable input + output rdy; // ready output + + output [WORD_COUNTER_WIDTH :0] x_addr; // index of current X word + output [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word + + output y_wren; // store current Y word now + + input [31:0] x_din; // current word of X + output [31:0] y_dout; // current word of Y + +/* +// +// Word Indices +// +reg [WORD_COUNTER_WIDTH:0] index_x; + + +// map registers to output ports +assign x_addr = index_x; +*/ + + // + // FSM + // + localparam integer FSM_SHREG_WIDTH = 2;//(2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1; + + localparam FSM_SHREG_INIT = {{(FSM_SHREG_WIDTH-1){1'b0}}, 1'b1}; + + reg [FSM_SHREG_WIDTH-1:0] fsm_shreg = FSM_SHREG_INIT; + + assign rdy = fsm_shreg[0]; + + /* +wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS]; +wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS]; +wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5]; + +wire [5-1:0] fsm_shreg_reduce_stage_start; +wire [5-1:0] fsm_shreg_reduce_stage_stop; + +genvar s; +generate for (s=0; s<5; s=s+1) +begin : gen_fsm_shreg_reduce_stages +assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1]; +assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2]; +end +endgenerate + +wire inc_index_x = |fsm_shreg_inc_index_x; +wire store_word_z = |fsm_shreg_store_word_z; +wire reduce_start = |fsm_shreg_reduce_stage_start; +wire reduce_stop = |fsm_shreg_reduce_stage_stop; +wire store_p = fsm_shreg_reduce_stage_stop[0]; + + +wire reduce_adder0_done; +wire reduce_adder1_done; +wire reduce_subtractor_done; + +wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done; + +*/ + always @(posedge clk or negedge rst_n) + // + if (rst_n == 1'b0) + // + fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1}; + // + else begin + // + if (rdy) + // + fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena}; + // + else /*if (!reduce_stop || reduce_done_all)*/ + // + fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]}; + // + end + +/* +// +// Word Index Increment Logic +// +always @(posedge clk) +// +if (rdy) +// +index_x <= WORD_INDEX_LAST; +// +else if (inc_index_x) +// +index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x); + + +// +// Look-up Table +// + +// +// Take a look at the corresponding C model for more information +// on how exactly the math behind reduction works. The first step +// is to assemble nine 256-bit values ("z-words") from 32-bit parts +// of the full 512-bit product ("c-word"). The problem with z5 is +// that it contains c13 two times. This implementation scans from +// c15 to c0 and writes current part of c-word into corresponding +// parts of z-words. Since those 32-bit parts are stored in block +// memories, one source word can only be written to one location in +// every z-word at a time. The trick is to delay c13 and then write +// the delayed value at the corresponding location in z5 instead of +// the next c12. "z_save" flag is used to indicate that the current +// word should be delayed and written once again during the next cycle. +// + +reg [9*WORD_COUNTER_WIDTH-1:0] z_addr; // +reg [9 -1:0] z_wren; // +reg [9 -1:0] z_mask; // mask input to store zero word +reg [9 -1:0] z_save; // save previous word once again + +always @(posedge clk) +// +if (inc_index_x) +// +case (index_x) +// +// s9 s8 s7 s6 s5 s4 s3 s2 s1 +// || || || || || || || || || +4'd00: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd00}; +4'd01: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd01}; +4'd02: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd02}; +4'd03: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd03}; +4'd04: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd04}; +4'd05: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd05}; +4'd06: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd06}; +4'd07: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd07}; +4'd08: z_addr <= {3'd02, 3'd03, 3'd04, 3'd06, 3'd07, 3'd00, 3'd00, 3'd00, 3'dxx}; +4'd09: z_addr <= {3'd03, 3'd04, 3'd06, 3'd03, 3'd00, 3'd01, 3'd01, 3'd01, 3'dxx}; +4'd10: z_addr <= {3'd04, 3'd05, 3'd05, 3'd07, 3'd01, 3'd02, 3'd02, 3'd02, 3'dxx}; +4'd11: z_addr <= {3'd05, 3'd06, 3'd07, 3'd00, 3'd02, 3'd03, 3'd07, 3'd03, 3'dxx}; +4'd12: z_addr <= {3'd06, 3'd07, 3'd00, 3'd01, 3'd06, 3'd04, 3'd03, 3'd04, 3'dxx}; +4'd13: z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dxx}; +4'd14: z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dxx}; +4'd15: z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dxx}; +// +default: z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}}; +// +endcase + +always @(posedge clk) +// +case (index_x) +// +// 9 8 7 6 5 4 3 2 1 +// | | | | | | | | | +4'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; +4'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; +4'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; +4'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; +4'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; +4'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; +4'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; +4'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1}; +4'd08: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; +4'd09: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; +4'd10: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; +4'd11: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; +4'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; +4'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; +4'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; +4'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0}; +// +default: z_wren <= {9{1'b0}}; +// +endcase + +always @(posedge clk) +// +if (inc_index_x) +// +case (index_x) +// +// 9 8 7 6 5 4 3 2 1 +// | | | | | | | | | +4'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd08: z_mask <= {1'b1, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0}; +4'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0}; +4'd10: z_mask <= {1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0}; +4'd11: z_mask <= {1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0}; +4'd12: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0}; +4'd13: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0}; +4'd14: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd15: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +// +default: z_mask <= {9{1'bX}}; +// +endcase + +always @(posedge clk) +// +if (inc_index_x) +// +case (index_x) +// +// 9 8 7 6 5 4 3 2 1 +// | | | | | | | | | +4'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +4'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}; +// +default: z_save <= {9{1'bX}}; +// +endcase + + +// +// Intermediate Numbers +// +reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:9]; +wire [ 32-1:0] reduce_z_dout[1:9]; + +reg [31: 0] x_din_dly; +always @(posedge clk) +// +x_din_dly <= x_din; + + +genvar z; +generate for (z=1; z<=9; z=z+1) +// +begin : gen_z_bram +// +bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) +bram_c_inst +( +.clk (clk), + +.a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]), +.a_wr (z_wren[z-1] & store_word_z), +.a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)), +.a_out (), + +.b_addr (reduce_z_addr[z]), +.b_out (reduce_z_dout[z]) +); +// +end +// +endgenerate + + + + +wire [ 32-1:0] bram_sum0_wr_din; +wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr; +wire bram_sum0_wr_wren; + +wire [ 32-1:0] bram_sum1_wr_din; +wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr; +wire bram_sum1_wr_wren; + +wire [ 32-1:0] bram_diff_wr_din; +wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr; +wire bram_diff_wr_wren; + +wire [ 32-1:0] bram_sum0_rd_dout; +reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr; + +wire [ 32-1:0] bram_sum1_rd_dout; +reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr; + +wire [ 32-1:0] bram_diff_rd_dout; +reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr; + + +bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) +bram_sum0_inst +( +.clk (clk), + +.a_addr (bram_sum0_wr_addr), +.a_wr (bram_sum0_wr_wren), +.a_in (bram_sum0_wr_din), +.a_out (), + +.b_addr (bram_sum0_rd_addr), +.b_out (bram_sum0_rd_dout) +); + +bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) +bram_sum1_inst +( +.clk (clk), + +.a_addr (bram_sum1_wr_addr), +.a_wr (bram_sum1_wr_wren), +.a_in (bram_sum1_wr_din), +.a_out (), + +.b_addr (bram_sum1_rd_addr), +.b_out (bram_sum1_rd_dout) +); + +bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH)) +bram_diff_inst +( +.clk (clk), + +.a_addr (bram_diff_wr_addr), +.a_wr (bram_diff_wr_wren), +.a_in (bram_diff_wr_din), +.a_out (), + +.b_addr (bram_diff_rd_addr), +.b_out (bram_diff_rd_dout) +); + + +wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr; +wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr; +wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr; + +reg [ 32-1:0] adder0_a_din; +reg [ 32-1:0] adder0_b_din; + +reg [ 32-1:0] adder1_a_din; +reg [ 32-1:0] adder1_b_din; + +reg [ 32-1:0] subtractor_a_din; +reg [ 32-1:0] subtractor_b_din; + +// n_addr - only 1 output, because all modules are in sync + +modular_adder # +( +.OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), +.WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) +) +adder_inst0 +( +.clk (clk), +.rst_n (rst_n), + +.ena (reduce_start), +.rdy (reduce_adder0_done), + +.ab_addr (adder0_ab_addr), +.n_addr (), +.s_addr (bram_sum0_wr_addr), +.s_wren (bram_sum0_wr_wren), + +.a_din (adder0_a_din), +.b_din (adder0_b_din), +.n_din (n_din), +.s_dout (bram_sum0_wr_din) +); + +modular_adder # +( +.OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), +.WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) +) +adder_inst1 +( +.clk (clk), +.rst_n (rst_n), + +.ena (reduce_start), +.rdy (reduce_adder1_done), + +.ab_addr (adder1_ab_addr), +.n_addr (), +.s_addr (bram_sum1_wr_addr), +.s_wren (bram_sum1_wr_wren), + +.a_din (adder1_a_din), +.b_din (adder1_b_din), +.n_din (n_din), +.s_dout (bram_sum1_wr_din) +); + +modular_subtractor # +( +.OPERAND_NUM_WORDS (OPERAND_NUM_WORDS), +.WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH) +) +subtractor_inst +( +.clk (clk), +.rst_n (rst_n), + +.ena (reduce_start), +.rdy (reduce_subtractor_done), + +.ab_addr (subtractor_ab_addr), +.n_addr (n_addr), +.d_addr (bram_diff_wr_addr), +.d_wren (bram_diff_wr_wren), + +.a_din (subtractor_a_din), +.b_din (subtractor_b_din), +.n_din (n_din), +.d_dout (bram_diff_wr_din) +); + + +// +// Address (Operand) Selector +// +always @(*) +// +case (fsm_shreg_reduce_stage_stop) +// +5'b10000: begin +reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[2] = adder0_ab_addr; +reduce_z_addr[3] = adder1_ab_addr; +reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[6] = subtractor_ab_addr; +reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; +bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; +bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; +bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; +end +// +5'b01000: begin +reduce_z_addr[1] = adder0_ab_addr; +reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[4] = adder1_ab_addr; +reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[7] = subtractor_ab_addr; +reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; +bram_sum0_rd_addr = adder0_ab_addr; +bram_sum1_rd_addr = adder1_ab_addr; +bram_diff_rd_addr = subtractor_ab_addr; +end +// +5'b00100: begin +reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[5] = adder0_ab_addr; +reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[8] = subtractor_ab_addr; +reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; +bram_sum0_rd_addr = adder0_ab_addr; +bram_sum1_rd_addr = adder1_ab_addr; +bram_diff_rd_addr = subtractor_ab_addr; +end +// +5'b00010: begin +reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[9] = subtractor_ab_addr; +bram_sum0_rd_addr = adder0_ab_addr; +bram_sum1_rd_addr = adder0_ab_addr; +bram_diff_rd_addr = subtractor_ab_addr; +end +// +5'b00001: begin +reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; +bram_sum0_rd_addr = adder0_ab_addr; +bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; +bram_diff_rd_addr = adder0_ab_addr; +end +// +default: begin +reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}}; +reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}}; +bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; +bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; +bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}}; +end +// +endcase + + +// +// adder 0 +// +always @(*) begin +// +case (fsm_shreg_reduce_stage_stop) +5'b10000: adder0_a_din = reduce_z_dout[2]; +5'b01000: adder0_a_din = bram_sum0_rd_dout; +5'b00100: adder0_a_din = bram_sum0_rd_dout; +5'b00010: adder0_a_din = bram_sum0_rd_dout; +5'b00001: adder0_a_din = bram_sum0_rd_dout; +default: adder0_a_din = {32{1'bX}}; +endcase +// +case (fsm_shreg_reduce_stage_stop) +5'b10000: adder0_b_din = reduce_z_dout[2]; +5'b01000: adder0_b_din = reduce_z_dout[1]; +5'b00100: adder0_b_din = reduce_z_dout[5]; +5'b00010: adder0_b_din = bram_sum1_rd_dout; +5'b00001: adder0_b_din = bram_diff_rd_dout; +default: adder0_b_din = {32{1'bX}}; +endcase +// +end + +// +// adder 1 +// +always @(*) begin +// +case (fsm_shreg_reduce_stage_stop) +5'b10000: adder1_a_din = reduce_z_dout[3]; +5'b01000: adder1_a_din = bram_sum1_rd_dout; +5'b00100: adder1_a_din = bram_sum1_rd_dout; +5'b00010: adder1_a_din = {32{1'bX}}; +5'b00001: adder1_a_din = {32{1'bX}}; +default: adder1_a_din = {32{1'bX}}; +endcase +// +case (fsm_shreg_reduce_stage_stop) +5'b10000: adder1_b_din = reduce_z_dout[3]; +5'b01000: adder1_b_din = reduce_z_dout[4]; +5'b00100: adder1_b_din = {32{1'b0}}; +5'b00010: adder1_b_din = {32{1'bX}}; +5'b00001: adder1_b_din = {32{1'bX}}; +default: adder1_b_din = {32{1'bX}}; +endcase +// +end + + +// +// subtractor +// +always @(*) begin +// +case (fsm_shreg_reduce_stage_stop) +5'b10000: subtractor_a_din = {32{1'b0}}; +5'b01000: subtractor_a_din = bram_diff_rd_dout; +5'b00100: subtractor_a_din = bram_diff_rd_dout; +5'b00010: subtractor_a_din = bram_diff_rd_dout; +5'b00001: subtractor_a_din = {32{1'bX}}; +default: subtractor_a_din = {32{1'bX}}; +endcase +// +case (fsm_shreg_reduce_stage_stop) +5'b10000: subtractor_b_din = reduce_z_dout[6]; +5'b01000: subtractor_b_din = reduce_z_dout[7]; +5'b00100: subtractor_b_din = reduce_z_dout[8]; +5'b00010: subtractor_b_din = reduce_z_dout[9]; +5'b00001: subtractor_b_din = {32{1'bX}}; +default: subtractor_b_din = {32{1'bX}}; +endcase +// +end + + +// +// Address Mapping +// +assign p_addr = bram_sum0_wr_addr; +assign p_wren = bram_sum0_wr_wren & store_p; +assign p_dout = bram_sum0_wr_din; +*/ + + +endmodule + + +//------------------------------------------------------------------------------ +// End-of-File +//------------------------------------------------------------------------------ -- cgit v1.2.3