aboutsummaryrefslogtreecommitdiff
path: root/rtl/modular/modular_reductor_256.v
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2018-12-19 15:34:55 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2018-12-19 15:41:00 +0300
commitc617c0c711620f58eb3ead22edcdfe57fed06088 (patch)
tree8c15184d5ca6403ea01843560394f6d909a0986f /rtl/modular/modular_reductor_256.v
parentea4d251c81c8b53e0bbc71cd1719dfea4d0517c1 (diff)
* Rewritten from scratch, uses the same hardware architecture as the Ed25519HEADmaster
core for better timing performance * Removed custom modular inversion sub-module, now uses micro-coded modular inversion routine based on Fermat's little theorem (~10% faster) * Uses math primitives from core/lib * Added randomized test vector (see user/shatov/ecdsa_fpga_model/test_vectors/)
Diffstat (limited to 'rtl/modular/modular_reductor_256.v')
-rw-r--r--rtl/modular/modular_reductor_256.v692
1 files changed, 0 insertions, 692 deletions
diff --git a/rtl/modular/modular_reductor_256.v b/rtl/modular/modular_reductor_256.v
deleted file mode 100644
index 6f31570..0000000
--- a/rtl/modular/modular_reductor_256.v
+++ /dev/null
@@ -1,692 +0,0 @@
-//------------------------------------------------------------------------------
-//
-// modular_reductor_256.v
-// -----------------------------------------------------------------------------
-// Modular reductor.
-//
-// Authors: Pavel Shatov
-//
-// Copyright (c) 2015-2016, NORDUnet A/S
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-// - Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// - Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// - Neither the name of the NORDUnet nor the names of its contributors may be
-// used to endorse or promote products derived from this software without
-// specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-// POSSIBILITY OF SUCH DAMAGE.
-//
-//------------------------------------------------------------------------------
-
-module modular_reductor_256
- (
- clk, rst_n,
- ena, rdy,
- x_addr, n_addr, p_addr, p_wren,
- x_din, n_din, p_dout
- );
-
- //
- // Constants
- //
- localparam OPERAND_NUM_WORDS = 8;
- localparam WORD_COUNTER_WIDTH = 3;
-
-
- //
- // Handy Numbers
- //
- localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_ZERO = 0;
- localparam [WORD_COUNTER_WIDTH:0] WORD_INDEX_LAST = 2 * OPERAND_NUM_WORDS - 1;
-
-
- //
- // Handy Functions
- //
- function [WORD_COUNTER_WIDTH:0] WORD_INDEX_PREVIOUS_OR_LAST;
- input [WORD_COUNTER_WIDTH:0] WORD_INDEX_CURRENT;
- begin
- WORD_INDEX_PREVIOUS_OR_LAST = (WORD_INDEX_CURRENT > WORD_INDEX_ZERO) ?
- WORD_INDEX_CURRENT - 1'b1 : WORD_INDEX_LAST;
- end
- endfunction
-
-
- //
- // Ports
- //
- input wire clk; // system clock
- input wire rst_n; // active-low async reset
-
- input wire ena; // enable input
- output wire rdy; // ready output
-
- output wire [WORD_COUNTER_WIDTH-0:0] x_addr; // index of current X word
- output wire [WORD_COUNTER_WIDTH-1:0] n_addr; // index of current N word
- output wire [WORD_COUNTER_WIDTH-1:0] p_addr; // index of current P word
- output wire p_wren; // store current P word now
-
- input wire [ 31:0] x_din; // X
- input wire [ 31:0] n_din; // N (must be P-256!)
- output wire [ 31:0] p_dout; // P = X mod N
-
-
- //
- // Word Indices
- //
- reg [WORD_COUNTER_WIDTH:0] index_x;
-
-
- /* map registers to output ports */
- assign x_addr = index_x;
-
-
- //
- // FSM
- //
- localparam FSM_SHREG_WIDTH = (2 * OPERAND_NUM_WORDS + 1) + (5 * 2) + 1;
-
- reg [FSM_SHREG_WIDTH-1:0] fsm_shreg;
-
- assign rdy = fsm_shreg[0];
-
- wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_inc_index_x = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 1 -: 2 * OPERAND_NUM_WORDS];
- wire [2 * OPERAND_NUM_WORDS - 1:0] fsm_shreg_store_word_z = fsm_shreg[FSM_SHREG_WIDTH - 0*OPERAND_NUM_WORDS - 2 -: 2 * OPERAND_NUM_WORDS];
- wire [2 * 5 - 1:0] fsm_shreg_reduce_stages = fsm_shreg[ 1 +: 2 * 5];
-
- wire [5-1:0] fsm_shreg_reduce_stage_start;
- wire [5-1:0] fsm_shreg_reduce_stage_stop;
-
- genvar s;
- generate for (s=0; s<5; s=s+1)
- begin : gen_fsm_shreg_reduce_stages
- assign fsm_shreg_reduce_stage_start[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 1];
- assign fsm_shreg_reduce_stage_stop[5 - (s + 1)] = fsm_shreg_reduce_stages[2 * (5 - s) - 2];
- end
- endgenerate
-
- wire inc_index_x = |fsm_shreg_inc_index_x;
- wire store_word_z = |fsm_shreg_store_word_z;
- wire reduce_start = |fsm_shreg_reduce_stage_start;
- wire reduce_stop = |fsm_shreg_reduce_stage_stop;
- wire store_p = fsm_shreg_reduce_stage_stop[0];
-
-
- wire reduce_adder0_done;
- wire reduce_adder1_done;
- wire reduce_subtractor_done;
-
- wire reduce_done_all = reduce_adder0_done & reduce_adder1_done & reduce_subtractor_done;
-
- always @(posedge clk or negedge rst_n)
- //
- if (rst_n == 1'b0)
- //
- fsm_shreg <= {{FSM_SHREG_WIDTH-1{1'b0}}, 1'b1};
- //
- else begin
- //
- if (rdy)
- //
- fsm_shreg <= {ena, {FSM_SHREG_WIDTH-2{1'b0}}, ~ena};
- //
- else if (!reduce_stop || reduce_done_all)
- //
- fsm_shreg <= {1'b0, fsm_shreg[FSM_SHREG_WIDTH-1:1]};
- //
- end
-
-
- //
- // Word Index Increment Logic
- //
- always @(posedge clk)
- //
- if (rdy)
- //
- index_x <= WORD_INDEX_LAST;
- //
- else if (inc_index_x)
- //
- index_x <= WORD_INDEX_PREVIOUS_OR_LAST(index_x);
-
-
- //
- // Look-up Table
- //
-
- //
- // Take a look at the corresponding C model for more information
- // on how exactly the math behind reduction works. The first step
- // is to assemble nine 256-bit values ("z-words") from 32-bit parts
- // of the full 512-bit product ("c-word"). The problem with z5 is
- // that it contains c13 two times. This implementation scans from
- // c15 to c0 and writes current part of c-word into corresponding
- // parts of z-words. Since those 32-bit parts are stored in block
- // memories, one source word can only be written to one location in
- // every z-word at a time. The trick is to delay c13 and then write
- // the delayed value at the corresponding location in z5 instead of
- // the next c12. "z_save" flag is used to indicate that the current
- // word should be delayed and written once again during the next cycle.
- //
-
- reg [9*WORD_COUNTER_WIDTH-1:0] z_addr; //
- reg [9 -1:0] z_wren; //
- reg [9 -1:0] z_mask; // mask input to store zero word
- reg [9 -1:0] z_save; // save previous word once again
-
- always @(posedge clk)
- //
- if (inc_index_x)
- //
- case (index_x)
- //
- // s9 s8 s7 s6 s5 s4 s3 s2 s1
- // || || || || || || || || ||
- 4'd00: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd00};
- 4'd01: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd01};
- 4'd02: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd02};
- 4'd03: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd03};
- 4'd04: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd04};
- 4'd05: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd05};
- 4'd06: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd06};
- 4'd07: z_addr <= {3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'dxx, 3'd07};
- 4'd08: z_addr <= {3'd02, 3'd03, 3'd04, 3'd06, 3'd07, 3'd00, 3'd00, 3'd00, 3'dxx};
- 4'd09: z_addr <= {3'd03, 3'd04, 3'd06, 3'd03, 3'd00, 3'd01, 3'd01, 3'd01, 3'dxx};
- 4'd10: z_addr <= {3'd04, 3'd05, 3'd05, 3'd07, 3'd01, 3'd02, 3'd02, 3'd02, 3'dxx};
- 4'd11: z_addr <= {3'd05, 3'd06, 3'd07, 3'd00, 3'd02, 3'd03, 3'd07, 3'd03, 3'dxx};
- 4'd12: z_addr <= {3'd06, 3'd07, 3'd00, 3'd01, 3'd06, 3'd04, 3'd03, 3'd04, 3'dxx};
- 4'd13: z_addr <= {3'd07, 3'd00, 3'd01, 3'd02, 3'd03, 3'd05, 3'd04, 3'd05, 3'dxx};
- 4'd14: z_addr <= {3'd00, 3'd01, 3'd02, 3'd04, 3'd04, 3'd06, 3'd05, 3'd06, 3'dxx};
- 4'd15: z_addr <= {3'd01, 3'd02, 3'd03, 3'd05, 3'd05, 3'd07, 3'd06, 3'd07, 3'dxx};
- //
- default: z_addr <= {9*WORD_COUNTER_WIDTH{1'bX}};
- //
- endcase
-
- always @(posedge clk)
- //
- case (index_x)
- //
- // 9 8 7 6 5 4 3 2 1
- // | | | | | | | | |
- 4'd00: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd01: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd02: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd03: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd04: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd05: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd06: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd07: z_wren <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1};
- 4'd08: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd09: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd10: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd11: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd12: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd13: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd14: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- 4'd15: z_wren <= {1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b0};
- //
- default: z_wren <= {9{1'b0}};
- //
- endcase
-
- always @(posedge clk)
- //
- if (inc_index_x)
- //
- case (index_x)
- //
- // 9 8 7 6 5 4 3 2 1
- // | | | | | | | | |
- 4'd00: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd01: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd02: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd03: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd04: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd05: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd06: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd07: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd08: z_mask <= {1'b1, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
- 4'd09: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
- 4'd10: z_mask <= {1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0};
- 4'd11: z_mask <= {1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b1, 1'b1, 1'b0, 1'b0};
- 4'd12: z_mask <= {1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
- 4'd13: z_mask <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0};
- 4'd14: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd15: z_mask <= {1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- //
- default: z_mask <= {9{1'bX}};
- //
- endcase
-
- always @(posedge clk)
- //
- if (inc_index_x)
- //
- case (index_x)
- //
- // 9 8 7 6 5 4 3 2 1
- // | | | | | | | | |
- 4'd00: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd01: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd02: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd03: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd04: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd05: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd06: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd07: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd08: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd09: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd10: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd11: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd12: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b1, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd13: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd14: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- 4'd15: z_save <= {1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0};
- //
- default: z_save <= {9{1'bX}};
- //
- endcase
-
-
- //
- // Intermediate Numbers
- //
- reg [WORD_COUNTER_WIDTH-1:0] reduce_z_addr[1:9];
- wire [ 32-1:0] reduce_z_dout[1:9];
-
- reg [31: 0] x_din_dly;
- always @(posedge clk)
- //
- x_din_dly <= x_din;
-
-
- genvar z;
- generate for (z=1; z<=9; z=z+1)
- //
- begin : gen_z_bram
- //
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_c_inst
- (
- .clk (clk),
-
- .a_addr (z_addr[(z-1) * WORD_COUNTER_WIDTH +: WORD_COUNTER_WIDTH]),
- .a_wr (z_wren[z-1] & store_word_z),
- .a_in (z_mask[z-1] ? {32{1'b0}} : (z_save[z-1] ? x_din_dly : x_din)),
- .a_out (),
-
- .b_addr (reduce_z_addr[z]),
- .b_out (reduce_z_dout[z])
- );
- //
- end
- //
- endgenerate
-
-
-
-
- wire [ 32-1:0] bram_sum0_wr_din;
- wire [WORD_COUNTER_WIDTH-1:0] bram_sum0_wr_addr;
- wire bram_sum0_wr_wren;
-
- wire [ 32-1:0] bram_sum1_wr_din;
- wire [WORD_COUNTER_WIDTH-1:0] bram_sum1_wr_addr;
- wire bram_sum1_wr_wren;
-
- wire [ 32-1:0] bram_diff_wr_din;
- wire [WORD_COUNTER_WIDTH-1:0] bram_diff_wr_addr;
- wire bram_diff_wr_wren;
-
- wire [ 32-1:0] bram_sum0_rd_dout;
- reg [WORD_COUNTER_WIDTH-1:0] bram_sum0_rd_addr;
-
- wire [ 32-1:0] bram_sum1_rd_dout;
- reg [WORD_COUNTER_WIDTH-1:0] bram_sum1_rd_addr;
-
- wire [ 32-1:0] bram_diff_rd_dout;
- reg [WORD_COUNTER_WIDTH-1:0] bram_diff_rd_addr;
-
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_sum0_inst
- (
- .clk (clk),
-
- .a_addr (bram_sum0_wr_addr),
- .a_wr (bram_sum0_wr_wren),
- .a_in (bram_sum0_wr_din),
- .a_out (),
-
- .b_addr (bram_sum0_rd_addr),
- .b_out (bram_sum0_rd_dout)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_sum1_inst
- (
- .clk (clk),
-
- .a_addr (bram_sum1_wr_addr),
- .a_wr (bram_sum1_wr_wren),
- .a_in (bram_sum1_wr_din),
- .a_out (),
-
- .b_addr (bram_sum1_rd_addr),
- .b_out (bram_sum1_rd_dout)
- );
-
- bram_1rw_1ro_readfirst # (.MEM_WIDTH(32), .MEM_ADDR_BITS(WORD_COUNTER_WIDTH))
- bram_diff_inst
- (
- .clk (clk),
-
- .a_addr (bram_diff_wr_addr),
- .a_wr (bram_diff_wr_wren),
- .a_in (bram_diff_wr_din),
- .a_out (),
-
- .b_addr (bram_diff_rd_addr),
- .b_out (bram_diff_rd_dout)
- );
-
-
- wire [WORD_COUNTER_WIDTH-1:0] adder0_ab_addr;
- wire [WORD_COUNTER_WIDTH-1:0] adder1_ab_addr;
- wire [WORD_COUNTER_WIDTH-1:0] subtractor_ab_addr;
-
- reg [ 32-1:0] adder0_a_din;
- reg [ 32-1:0] adder0_b_din;
-
- reg [ 32-1:0] adder1_a_din;
- reg [ 32-1:0] adder1_b_din;
-
- reg [ 32-1:0] subtractor_a_din;
- reg [ 32-1:0] subtractor_b_din;
-
- // n_addr - only 1 output, because all modules are in sync
-
- modular_adder #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
- )
- adder_inst0
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (reduce_start),
- .rdy (reduce_adder0_done),
-
- .ab_addr (adder0_ab_addr),
- .n_addr (),
- .s_addr (bram_sum0_wr_addr),
- .s_wren (bram_sum0_wr_wren),
-
- .a_din (adder0_a_din),
- .b_din (adder0_b_din),
- .n_din (n_din),
- .s_dout (bram_sum0_wr_din)
- );
-
- modular_adder #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
- )
- adder_inst1
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (reduce_start),
- .rdy (reduce_adder1_done),
-
- .ab_addr (adder1_ab_addr),
- .n_addr (),
- .s_addr (bram_sum1_wr_addr),
- .s_wren (bram_sum1_wr_wren),
-
- .a_din (adder1_a_din),
- .b_din (adder1_b_din),
- .n_din (n_din),
- .s_dout (bram_sum1_wr_din)
- );
-
- modular_subtractor #
- (
- .OPERAND_NUM_WORDS (OPERAND_NUM_WORDS),
- .WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH)
- )
- subtractor_inst
- (
- .clk (clk),
- .rst_n (rst_n),
-
- .ena (reduce_start),
- .rdy (reduce_subtractor_done),
-
- .ab_addr (subtractor_ab_addr),
- .n_addr (n_addr),
- .d_addr (bram_diff_wr_addr),
- .d_wren (bram_diff_wr_wren),
-
- .a_din (subtractor_a_din),
- .b_din (subtractor_b_din),
- .n_din (n_din),
- .d_dout (bram_diff_wr_din)
- );
-
-
- //
- // Address (Operand) Selector
- //
- always @(*)
- //
- case (fsm_shreg_reduce_stage_stop)
- //
- 5'b10000: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = adder0_ab_addr;
- reduce_z_addr[3] = adder1_ab_addr;
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = subtractor_ab_addr;
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- 5'b01000: begin
- reduce_z_addr[1] = adder0_ab_addr;
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = adder1_ab_addr;
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = subtractor_ab_addr;
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = adder1_ab_addr;
- bram_diff_rd_addr = subtractor_ab_addr;
- end
- //
- 5'b00100: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = adder0_ab_addr;
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = subtractor_ab_addr;
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = adder1_ab_addr;
- bram_diff_rd_addr = subtractor_ab_addr;
- end
- //
- 5'b00010: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = subtractor_ab_addr;
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = adder0_ab_addr;
- bram_diff_rd_addr = subtractor_ab_addr;
- end
- //
- 5'b00001: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = adder0_ab_addr;
- bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_diff_rd_addr = adder0_ab_addr;
- end
- //
- default: begin
- reduce_z_addr[1] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[2] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[3] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[4] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[5] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[6] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[7] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[8] = {WORD_COUNTER_WIDTH{1'bX}};
- reduce_z_addr[9] = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum0_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_sum1_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- bram_diff_rd_addr = {WORD_COUNTER_WIDTH{1'bX}};
- end
- //
- endcase
-
-
- //
- // adder 0
- //
- always @(*) begin
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder0_a_din = reduce_z_dout[2];
- 5'b01000: adder0_a_din = bram_sum0_rd_dout;
- 5'b00100: adder0_a_din = bram_sum0_rd_dout;
- 5'b00010: adder0_a_din = bram_sum0_rd_dout;
- 5'b00001: adder0_a_din = bram_sum0_rd_dout;
- default: adder0_a_din = {32{1'bX}};
- endcase
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder0_b_din = reduce_z_dout[2];
- 5'b01000: adder0_b_din = reduce_z_dout[1];
- 5'b00100: adder0_b_din = reduce_z_dout[5];
- 5'b00010: adder0_b_din = bram_sum1_rd_dout;
- 5'b00001: adder0_b_din = bram_diff_rd_dout;
- default: adder0_b_din = {32{1'bX}};
- endcase
- //
- end
-
- //
- // adder 1
- //
- always @(*) begin
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder1_a_din = reduce_z_dout[3];
- 5'b01000: adder1_a_din = bram_sum1_rd_dout;
- 5'b00100: adder1_a_din = bram_sum1_rd_dout;
- 5'b00010: adder1_a_din = {32{1'bX}};
- 5'b00001: adder1_a_din = {32{1'bX}};
- default: adder1_a_din = {32{1'bX}};
- endcase
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: adder1_b_din = reduce_z_dout[3];
- 5'b01000: adder1_b_din = reduce_z_dout[4];
- 5'b00100: adder1_b_din = {32{1'b0}};
- 5'b00010: adder1_b_din = {32{1'bX}};
- 5'b00001: adder1_b_din = {32{1'bX}};
- default: adder1_b_din = {32{1'bX}};
- endcase
- //
- end
-
-
- //
- // subtractor
- //
- always @(*) begin
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: subtractor_a_din = {32{1'b0}};
- 5'b01000: subtractor_a_din = bram_diff_rd_dout;
- 5'b00100: subtractor_a_din = bram_diff_rd_dout;
- 5'b00010: subtractor_a_din = bram_diff_rd_dout;
- 5'b00001: subtractor_a_din = {32{1'bX}};
- default: subtractor_a_din = {32{1'bX}};
- endcase
- //
- case (fsm_shreg_reduce_stage_stop)
- 5'b10000: subtractor_b_din = reduce_z_dout[6];
- 5'b01000: subtractor_b_din = reduce_z_dout[7];
- 5'b00100: subtractor_b_din = reduce_z_dout[8];
- 5'b00010: subtractor_b_din = reduce_z_dout[9];
- 5'b00001: subtractor_b_din = {32{1'bX}};
- default: subtractor_b_din = {32{1'bX}};
- endcase
- //
- end
-
-
- //
- // Address Mapping
- //
- assign p_addr = bram_sum0_wr_addr;
- assign p_wren = bram_sum0_wr_wren & store_p;
- assign p_dout = bram_sum0_wr_din;
-
-
-endmodule
-
-
-//------------------------------------------------------------------------------
-// End-of-File
-//------------------------------------------------------------------------------