aboutsummaryrefslogtreecommitdiff
path: root/src/rtl
diff options
context:
space:
mode:
Diffstat (limited to 'src/rtl')
-rw-r--r--src/rtl/modexpa7_systolic_multiplier.v239
1 files changed, 234 insertions, 5 deletions
diff --git a/src/rtl/modexpa7_systolic_multiplier.v b/src/rtl/modexpa7_systolic_multiplier.v
index 56e7be3..513b5aa 100644
--- a/src/rtl/modexpa7_systolic_multiplier.v
+++ b/src/rtl/modexpa7_systolic_multiplier.v
@@ -118,6 +118,10 @@ module modexpa7_systolic_multiplier #
localparam [ 7: 0] FSM_STATE_MULT_Q_N_RELOAD = 8'h63;
localparam [ 7: 0] FSM_STATE_MULT_Q_N_FINAL = 8'h64;
+ localparam [ 7: 0] FSM_STATE_SAVE_START = 8'h71;
+ localparam [ 7: 0] FSM_STATE_SAVE_WRITE = 8'h72;
+ localparam [ 7: 0] FSM_STATE_SAVE_FINAL = 8'h73;
+
localparam [ 7: 0] FSM_STATE_STOP = 8'hFF;
//
@@ -271,7 +275,7 @@ module modexpa7_systolic_multiplier #
//
// Loader currently stores B, N_COEFF and N, it can be coded another way
- // to initially stire B, then AB, then Q. Some memory can be saved thay way.
+ // to initially store B, then AB, then Q. Some memory can be saved thay way.
// Maybe later...
//
@@ -324,6 +328,9 @@ module modexpa7_systolic_multiplier #
reg [OPERAND_ADDR_WIDTH :0] ab_addr_ext;
reg [OPERAND_ADDR_WIDTH-1:0] q_addr;
reg [OPERAND_ADDR_WIDTH :0] qn_addr_ext;
+ reg [OPERAND_ADDR_WIDTH-1:0] s_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] sn_addr;
+ reg [OPERAND_ADDR_WIDTH-1:0] r_addr;
/* handy increment values */
wire [OPERAND_ADDR_WIDTH-1:0] a_addr_next = a_addr + 1'b1;
@@ -333,6 +340,9 @@ module modexpa7_systolic_multiplier #
wire [OPERAND_ADDR_WIDTH :0] ab_addr_ext_next = ab_addr_ext + 1'b1;
wire [OPERAND_ADDR_WIDTH-1:0] q_addr_next = q_addr + 1'b1;
wire [OPERAND_ADDR_WIDTH :0] qn_addr_ext_next = qn_addr_ext + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] s_addr_next = s_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] sn_addr_next = sn_addr + 1'b1;
+ wire [OPERAND_ADDR_WIDTH-1:0] r_addr_next = r_addr + 1'b1;
/* handy stop flags */
wire a_addr_done = (a_addr == bram_addr_last) ? 1'b1 : 1'b0;
@@ -342,6 +352,9 @@ module modexpa7_systolic_multiplier #
wire ab_addr_ext_done = (ab_addr_ext == bram_addr_ext_last) ? 1'b1 : 1'b0;
wire q_addr_done = (q_addr == bram_addr_last) ? 1'b1 : 1'b0;
wire qn_addr_ext_done = (qn_addr_ext == bram_addr_ext_last) ? 1'b1 : 1'b0;
+ wire s_addr_done = (s_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire sn_addr_done = (sn_addr == bram_addr_last) ? 1'b1 : 1'b0;
+ wire r_addr_done = (r_addr == bram_addr_last) ? 1'b1 : 1'b0;
/* delayed B address */
reg [OPERAND_ADDR_WIDTH-1:0] b_addr_dly;
@@ -358,9 +371,16 @@ module modexpa7_systolic_multiplier #
assign b_bram_addr = b_addr;
assign n_coeff_bram_addr = n_coeff_addr;
assign n_bram_addr = n_addr;
+ assign r_bram_addr = r_addr;
//
+ // Flag
+ //
+ reg flag_select_s;
+
+
+ //
// Memory Address Control Logic
//
always @(posedge clk) begin
@@ -375,6 +395,20 @@ module modexpa7_systolic_multiplier #
FSM_STATE_LOAD_N_SHIFT: n_addr <= n_addr_next;
endcase
//
+ case (fsm_state)
+ FSM_STATE_MULT_Q_N_RELOAD:
+ if (qn_addr_ext == {1'b0, bram_addr_last})
+ n_addr <= bram_addr_zero;
+ else if (qn_addr_ext > {1'b0, bram_addr_last})
+ n_addr <= n_addr_next;
+
+ endcase
+ //
+ case (fsm_state)
+ FSM_STATE_SAVE_START: r_addr <= bram_addr_zero;
+ FSM_STATE_SAVE_WRITE: r_addr <= r_addr_next;
+ endcase
+ //
case (fsm_next_state)
FSM_STATE_MULT_A_B_START: a_addr <= bram_addr_zero;
FSM_STATE_MULT_A_B_RELOAD: a_addr <= !a_addr_done ? a_addr_next : a_addr;
@@ -391,16 +425,28 @@ module modexpa7_systolic_multiplier #
reg [31: 0] ab_data_in;
reg [31: 0] q_data_in;
reg [31: 0] qn_data_in;
+ wire [31: 0] s_data_in;
+ wire [31: 0] sn_data_in;
+ reg [31: 0] r_data_in;
/* memory outputs */
wire [31: 0] ab_data_out;
wire [31: 0] q_data_out;
wire [31: 0] qn_data_out;
+ wire [31: 0] s_data_out;
+ wire [31: 0] sn_data_out;
/* write enables */
reg ab_wren;
reg q_wren;
reg qn_wren;
+ reg s_wren;
+ reg sn_wren;
+ reg r_wren;
+
+ /* map */
+ assign r_bram_in = r_data_in;
+ assign r_bram_wr = r_wren;
bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH+1))
bram_ab (.clk(clk), .a_addr(ab_addr_ext), .a_wr(ab_wren), .a_in(ab_data_in), .a_out(ab_data_out));
@@ -411,6 +457,12 @@ module modexpa7_systolic_multiplier #
bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH+1))
bram_qn (.clk(clk), .a_addr(qn_addr_ext), .a_wr(qn_wren), .a_in(qn_data_in), .a_out(qn_data_out));
+ bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_s (.clk(clk), .a_addr(s_addr), .a_wr(s_wren), .a_in(s_data_in), .a_out(s_data_out));
+
+ bram_1rw_readfirst #(.MEM_WIDTH(32), .MEM_ADDR_BITS(OPERAND_ADDR_WIDTH))
+ bram_sn (.clk(clk), .a_addr(sn_addr), .a_wr(sn_wren), .a_in(sn_data_in), .a_out(sn_data_out));
+
//
// Wide Operand Loader
@@ -646,13 +698,46 @@ module modexpa7_systolic_multiplier #
case (fsm_state)
FSM_STATE_MULT_A_B_START: ab_addr_ext <= bram_addr_ext_zero;
FSM_STATE_MULT_AB_N_COEFF_START: q_addr <= bram_addr_zero;
- FSM_STATE_MULT_Q_N_START: qn_addr_ext <= bram_addr_ext_zero;
+ FSM_STATE_MULT_Q_N_START: begin qn_addr_ext <= bram_addr_ext_zero;
+ ab_addr_ext <= bram_addr_ext_zero;
+ end
FSM_STATE_MULT_A_B_RELOAD: ab_addr_ext <= ab_addr_ext_next;
FSM_STATE_MULT_AB_N_COEFF_RELOAD: q_addr <= q_addr_next;
- FSM_STATE_MULT_Q_N_RELOAD: qn_addr_ext <= qn_addr_ext_next;
+ FSM_STATE_MULT_Q_N_RELOAD: begin qn_addr_ext <= qn_addr_ext_next;
+ ab_addr_ext <= ab_addr_ext_next;
+ end
+ endcase
+ //
+ case (fsm_state)
+
+ FSM_STATE_MULT_Q_N_RELOAD: begin
+ if (qn_addr_ext == {1'b0, bram_addr_last}) begin
+ s_addr <= bram_addr_zero;
+ sn_addr <= bram_addr_zero;
+ end
+
+ if ((qn_addr_ext > {1'b0, bram_addr_last}) && (qn_addr_ext < bram_addr_ext_last)) begin
+ s_addr <= s_addr_next;
+ sn_addr <= sn_addr_next;
+ end
+
+ if (qn_addr_ext == bram_addr_ext_last) begin
+ s_addr <= bram_addr_zero;
+ sn_addr <= bram_addr_zero;
+ end
+
+ end
+
+ FSM_STATE_MULT_Q_N_FINAL,
+ FSM_STATE_SAVE_START,
+ FSM_STATE_SAVE_WRITE: begin
+ s_addr <= !s_addr_done ? s_addr_next : s_addr;
+ sn_addr <= !sn_addr_done ? sn_addr_next : sn_addr;
+ end
endcase
+
//
case (fsm_next_state)
FSM_STATE_MULT_AB_N_COEFF_START: ab_addr_ext <= bram_addr_ext_zero;
@@ -692,7 +777,12 @@ module modexpa7_systolic_multiplier #
qn_wren <= 1'b0;
qn_data_in <= 32'hXXXXXXXX;
end
-
+ //
+ case (fsm_state)
+ FSM_STATE_SAVE_START: r_wren <= 1'b1;
+ FSM_STATE_SAVE_WRITE: r_wren <= ~r_addr_done;
+ default: r_wren <= 1'b0;
+ endcase
//
end
@@ -816,6 +906,140 @@ module modexpa7_systolic_multiplier #
//
end
+
+ //
+ // Adder
+ //
+ /*
+ * This adder is used to calculate S = AB + QN.
+ *
+ */
+ reg add1_ce; // clock enable
+ reg [31: 0] add1_s; // sum output
+ wire add1_c_in; // carry input
+ wire [31: 0] add1_a; // A-input
+ reg [31: 0] add1_b; // B-input
+ reg add1_c_in_mask; // flag to not carry anything into the very first word
+ reg add1_c_out; // carry output
+
+ /* add masking into carry feedback chain */
+ assign add1_c_in = add1_c_out & ~add1_c_in_mask;
+
+ /* mask carry for the very first word of N */
+ //always @(posedge clk) add1_c_in_mask <= (fsm_next_state == FSM_STATE_INIT_2) ? 1'b1 : 1'b0;
+
+ always @(posedge clk)
+ //
+ if (add1_ce)
+ //
+ {add1_c_out, add1_s} <= {{1{1'b0}}, add1_a} + {{1{1'b0}}, add1_b} + {{32{1'b0}}, add1_c_in};
+
+ assign add1_a = qn_data_in;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ add1_b <= shreg_done_latency_dly ? ab_data_out : 32'hXXXXXXXX;
+ else
+ add1_b <= 32'hXXXXXXXX;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ add1_c_in_mask <= (shreg_done_latency_dly && (ab_addr_ext == bram_addr_ext_zero)) ? 1'b1 : 1'b0;
+ else
+ add1_c_in_mask <= 1'b0;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ add1_ce <= shreg_done_latency_dly;
+ else
+ add1_ce <= 1'b0;
+
+
+ assign s_data_in = add1_s;
+ assign sn_data_in = sub1_d;
+
+ always @(posedge clk) begin
+ //
+ s_wren <= add1_ce;
+ sn_wren <= sub1_ce;
+ end
+
+
+
+ //
+ // Subtractor
+ //
+ /*
+ * This subtractor is used to calculate SN = S - N.
+ *
+ */
+ reg sub1_ce; // clock enable
+ reg [31: 0] sub1_d; // difference output
+ wire sub1_b_in; // borrow input
+ wire [31: 0] sub1_a; // A-input
+ reg [31: 0] sub1_b; // B-input
+ reg sub1_b_in_mask; // flag to not borrow anything from the very first word
+ reg sub1_b_out; // borrow output
+
+ /* add masking into borrow feedback chain */
+ assign sub1_b_in = sub1_b_out & ~sub1_b_in_mask;
+
+ always @(posedge clk)
+ //
+ if (sub1_ce)
+ //
+ {sub1_b_out, sub1_d} <= {{1{1'b0}}, sub1_a} - {{1{1'b0}}, sub1_b} - {{32{1'b0}}, sub1_b_in};
+
+ assign sub1_a = add1_s;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ sub1_b <= add1_ce ? n_bram_out : 32'hXXXXXXXX;
+ else
+ sub1_b <= 32'hXXXXXXXX;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ sub1_b_in_mask <= (add1_ce && ((qn_addr_ext - 1'b1) == {1'b0, bram_addr_last})) ? 1'b1 : 1'b0;
+ else
+ sub1_b_in_mask <= 1'b0;
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_CRUNCH)
+ sub1_ce <= add1_ce && (qn_addr_ext > {1'b0, q_addr});
+ else
+ sub1_ce <= 1'b0;
+
+
+ assign s_data_in = add1_s;
+
+ always @(posedge clk)
+ //
+ s_wren <= add1_ce;
+
+
+
+ always @(posedge clk)
+ //
+ if (fsm_state == FSM_STATE_MULT_Q_N_FINAL)
+ flag_select_s <= sub1_b_out & ~add1_c_out;
+
+
+ always @(posedge clk)
+ //
+ case (fsm_state)
+ FSM_STATE_SAVE_START,
+ FSM_STATE_SAVE_WRITE:
+ r_data_in <= flag_select_s ? s_data_out : sn_data_out;
+ endcase
+
+
//
// FSM Process
@@ -878,7 +1102,12 @@ module modexpa7_systolic_multiplier #
else fsm_next_state = FSM_STATE_MULT_Q_N_CRUNCH;
FSM_STATE_MULT_Q_N_RELOAD: if (qn_addr_ext_done) fsm_next_state = FSM_STATE_MULT_Q_N_FINAL;
else fsm_next_state = FSM_STATE_MULT_Q_N_CRUNCH;
- FSM_STATE_MULT_Q_N_FINAL: fsm_next_state = FSM_STATE_STOP;
+ FSM_STATE_MULT_Q_N_FINAL: fsm_next_state = FSM_STATE_SAVE_START;
+ //
+ FSM_STATE_SAVE_START: fsm_next_state = FSM_STATE_SAVE_WRITE;
+ FSM_STATE_SAVE_WRITE: if (r_addr_done) fsm_next_state = FSM_STATE_SAVE_FINAL;
+ else fsm_next_state = FSM_STATE_SAVE_WRITE;
+ FSM_STATE_SAVE_FINAL: fsm_next_state = FSM_STATE_STOP;
//
FSM_STATE_STOP: fsm_next_state = FSM_STATE_IDLE;