aboutsummaryrefslogtreecommitdiff
path: root/src/rtl/modexpa7_systolic_multiplier.v
diff options
context:
space:
mode:
Diffstat (limited to 'src/rtl/modexpa7_systolic_multiplier.v')
-rw-r--r--src/rtl/modexpa7_systolic_multiplier.v152
1 files changed, 113 insertions, 39 deletions
diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v
index a1e141e..9d96f98 100644
--- a/src/rtl/modexpa7_systolic_multiplier.v
+++ b/src/rtl/modexpa7_systolic_multiplier.v
@@ -143,6 +143,7 @@ module modexpa7_systolic_multiplier #
* Parameters Latch
*/
reg [OPERAND_ADDR_WIDTH-1:0] n_num_words_latch;
+ reg [OPERAND_ADDR_WIDTH :0] p_num_words_latch;
// save number of words in n when new operation starts
always @(posedge clk)
@@ -200,20 +201,25 @@ module modexpa7_systolic_multiplier #
// loader input
- reg [SYSTOLIC_CNTR_WIDTH-1:0] loader_addr[0:SYSTOLIC_ARRAY_LENGTH-1];
- reg loader_wren[0:SYSTOLIC_ARRAY_LENGTH-1];
+ reg [SYSTOLIC_CNTR_WIDTH-1:0] loader_addr_wr;
+ wire [SYSTOLIC_CNTR_WIDTH-1:0] loader_addr_rd;
+ reg loader_wren;
reg [ 32-1:0] loader_din [0:SYSTOLIC_ARRAY_LENGTH-1];
// loader output
- wire [ 32-1:0] loader_dout[0:SYSTOLIC_ARRAY_LENGTH-1];
+ wire [ 32-1:0] loader_dout[0:SYSTOLIC_ARRAY_LENGTH-1];
+
+ // array_input
+ wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] pe_a_wide;
+ wire [32 * SYSTOLIC_ARRAY_LENGTH - 1 : 0] pe_b_wide;
// generate parallelized loader
genvar i;
generate for (i=0; i<SYSTOLIC_ARRAY_LENGTH; i=i+1)
//
- begin : gen_bram_1rw_readfirst_loader
+ begin : gen_bram_1rw_1ro_readfirst_loader
//
- bram_1rw_readfirst #
+ bram_1rw_1ro_readfirst #
(
.MEM_WIDTH (32),
.MEM_ADDR_BITS (SYSTOLIC_CNTR_WIDTH)
@@ -221,11 +227,15 @@ module modexpa7_systolic_multiplier #
bram_loader
(
.clk (clk),
- .a_addr (loader_addr[i]),
- .a_wr (loader_wren[i]),
+ .a_addr (loader_addr_wr),
+ .a_wr (loader_wren),
.a_in (loader_din[i]),
- .a_out (loader_dout[i])
- );
+ .a_out (),
+ .b_addr (loader_addr_rd),
+ .b_out (loader_dout[i])
+ );
+ //
+ assign pe_b_wide[32 * (i + 1) - 1 -: 32] = loader_dout[i];
//
end
//
@@ -250,22 +260,40 @@ module modexpa7_systolic_multiplier #
// address registers
reg [OPERAND_ADDR_WIDTH-1:0] b_addr;
+ wire [OPERAND_ADDR_WIDTH :0] p_addr_ext_wr;
+ reg [OPERAND_ADDR_WIDTH :0] p_addr_ext_rd;
// handy increment values
- wire [OPERAND_ADDR_WIDTH-1:0] b_addr_next = b_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] b_addr_next = b_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH :0] p_addr_ext_rd_next = b_addr + 1'b1;
+
+ // write enables
+ wire p_wren;
+
+ // data buses
+ wire [31: 0] p_data_in;
+ wire [31: 0] p_data_out;
// handy stop flags
- wire b_addr_done = (b_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire b_addr_done = (b_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire p_addr_ext_rd_done = (p_addr_ext_rd == bram_addr_ext_last) ? 1'b1 : 1'b0;
// delayed addresses
reg [OPERAND_ADDR_WIDTH-1:0] b_addr_dly;
-
+
always @(posedge clk) b_addr_dly <= b_addr;
// map registers to top-level ports
assign b_bram_addr = b_addr;
+
+ bram_1rw_1ro_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH+1))
+ bram_p
+ ( .clk(clk),
+ .a_addr(p_addr_ext_wr), .a_wr(p_wren), .a_in(p_data_in), .a_out(),
+ .b_addr(p_addr_ext_rd), .b_out(p_data_out));
+
/*
* Loader Data Input
@@ -297,17 +325,8 @@ module modexpa7_systolic_multiplier #
always @(posedge clk)
//
case (fsm_next_state)
-
- FSM_STATE_LOAD_WRITE:
- //
- for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
- loader_wren[j] <= 1'b1;
-
- default:
- //
- for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
- loader_wren[j] <= 1'b0;
-
+ FSM_STATE_LOAD_WRITE: loader_wren <= 1'b1;
+ default: loader_wren <= 1'b0;
endcase
@@ -317,17 +336,15 @@ module modexpa7_systolic_multiplier #
always @(posedge clk)
//
- case (fsm_state)
+ case (fsm_state)
- FSM_STATE_LOAD_START:
- //
- for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
- loader_addr[j] <= load_syst_cnt_zero;
-
- FSM_STATE_LOAD_WRITE:
- //
- for (j=0; j<SYSTOLIC_ARRAY_LENGTH; j=j+1)
- loader_addr[j] <= !load_syst_cnt_done ? load_syst_cnt_next : load_syst_cnt;
+ FSM_STATE_LOAD_START:
+ //
+ loader_addr_wr <= load_syst_cnt_zero;
+
+ FSM_STATE_LOAD_WRITE:
+ //
+ loader_addr_wr <= !load_syst_cnt_done ? load_syst_cnt_next : load_syst_cnt;
endcase
@@ -344,12 +361,68 @@ module modexpa7_systolic_multiplier #
//
end
+
+ /*
+ * Multiplier Array
+ */
+ reg pe_array_ena;
+ wire pe_array_rdy;
+
+ always @(posedge clk)
+ //
+ case (fsm_next_state)
+ FSM_STATE_MULT_START: pe_array_ena <= 1'b1;
+ default: pe_array_ena <= 1'b0;
+ endcase
+
+ always @(posedge clk)
+ //
+ case (fsm_next_state)
+ FSM_STATE_MULT_START: p_num_words_latch <= {n_num_words_latch, 1'b1};
+ endcase
+
+
+ modexpa7_systolic_multiplier_array #
+ (
+ .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH),
+ .SYSTOLIC_ARRAY_POWER (SYSTOLIC_ARRAY_POWER)
+ )
+ systolic_pe_array
+ (
+ .clk (clk),
+ .rst_n (rst_n),
+
+ .ena (pe_array_ena),
+ .rdy (pe_array_rdy),
+
+ .loader_addr_rd (loader_addr_rd),
+
+ .pe_a_wide (),
+ .pe_b_wide (pe_b_wide),
+
+ .p_bram_addr (p_addr_ext_wr),
+ .p_bram_in (p_data_in),
+ .p_bram_wr (p_wren),
+
+
+ .n_num_words (n_num_words_latch),
+ .p_num_words (p_num_words_latch)
+ );
+
+
+
+
+
+
+
+
+
/*
* FSM Process
- */
+ - */
always @(posedge clk or negedge rst_n)
//
if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
@@ -373,13 +446,14 @@ module modexpa7_systolic_multiplier #
else fsm_next_state = FSM_STATE_LOAD_SHIFT;
FSM_STATE_LOAD_WRITE: if (load_syst_cnt_done) fsm_next_state = FSM_STATE_LOAD_FINAL;
else fsm_next_state = FSM_STATE_LOAD_SHIFT;
- FSM_STATE_LOAD_FINAL: fsm_next_state = FSM_STATE_STOP;
+ FSM_STATE_LOAD_FINAL: fsm_next_state = FSM_STATE_MULT_START;
//
- //FSM_STATE_MULT_START:
- //FSM_STATE_MULT_CRUNCH:
- //FSM_STATE_MULT_FINAL:
+ FSM_STATE_MULT_START: fsm_next_state = FSM_STATE_MULT_CRUNCH;
+ FSM_STATE_MULT_CRUNCH: if (pe_array_rdy) fsm_next_state = FSM_STATE_MULT_FINAL;
+ else fsm_next_state = FSM_STATE_MULT_CRUNCH;
+ FSM_STATE_MULT_FINAL: fsm_next_state = FSM_STATE_STOP;
//
- FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE;
+ FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE;
//
endcase
//