aboutsummaryrefslogtreecommitdiff
path: root/rtl
diff options
context:
space:
mode:
Diffstat (limited to 'rtl')
-rw-r--r--rtl/modexpng_core_top.v20
-rw-r--r--rtl/modexpng_general_worker.v1180
-rw-r--r--rtl/modexpng_microcode.vh12
-rw-r--r--rtl/modexpng_uop_rom.v26
4 files changed, 767 insertions, 471 deletions
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index c78a969..dea7f0a 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -87,7 +87,9 @@ module modexpng_core_top
wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) ||
(uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ||
(uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) ||
- (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) ;
+ (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) ||
+ (uop_data_opcode == UOP_OPCODE_CROSS_LADDERS_X2Y ) ||
+ (uop_data_opcode == UOP_OPCODE_MODULAR_SUBTRACT ) ;
wire uop_loop_now;
@@ -1113,8 +1115,15 @@ module modexpng_core_top
wrk_sel_narrow_out <= uop_data_sel_narrow_out;
end
//
+ UOP_OPCODE_MODULAR_SUBTRACT: begin
+ wrk_sel_wide_out <= uop_data_sel_wide_out;
+ wrk_sel_narrow_in <= uop_data_sel_narrow_in;
+ wrk_sel_narrow_out <= uop_data_sel_narrow_out;
+ end
+ //
UOP_OPCODE_COPY_CRT_Y2X,
- UOP_OPCODE_COPY_LADDERS_X2Y: begin
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
wrk_sel_wide_in <= uop_data_sel_wide_in;
wrk_sel_wide_out <= uop_data_sel_wide_out;
wrk_sel_narrow_in <= uop_data_sel_narrow_in;
@@ -1157,7 +1166,8 @@ module modexpng_core_top
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_COPY_CRT_Y2X,
- UOP_OPCODE_COPY_LADDERS_X2Y:
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y:
wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
//
UOP_OPCODE_MODULAR_REDUCE_INIT: begin
@@ -1171,6 +1181,10 @@ module modexpng_core_top
{rdct_word_index_last_x, rdct_word_index_last_y } <= {2{word_index_last_pq }};
end
//
+ UOP_OPCODE_MODULAR_SUBTRACT: begin
+ wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+ end
+ //
UOP_OPCODE_LADDER_INIT: begin
io_mgr_word_index_last <= OP_ADDR_LADDER_LAST;
io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n;
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index 269ef98..74c939b 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -1,70 +1,22 @@
module modexpng_general_worker
(
- clk,
- rst,
-
- ena,
- rdy,
-
- sel_narrow_in,
- sel_narrow_out,
- sel_wide_in,
- sel_wide_out,
-
+ clk, rst,
+ ena, rdy,
+ sel_narrow_in, sel_narrow_out,
+ sel_wide_in, sel_wide_out,
opcode,
-
- word_index_last,
- word_index_last_half,
-
- wrk_rd_wide_xy_ena_x,
- wrk_rd_wide_xy_bank_x,
- wrk_rd_wide_xy_addr_x,
- wrk_rd_wide_x_din_x,
- wrk_rd_wide_y_din_x,
-
- wrk_rd_narrow_xy_ena_x,
- wrk_rd_narrow_xy_bank_x,
- wrk_rd_narrow_xy_addr_x,
- wrk_rd_narrow_x_din_x,
- wrk_rd_narrow_y_din_x,
-
- wrk_rd_wide_xy_ena_y,
- wrk_rd_wide_xy_bank_y,
- wrk_rd_wide_xy_addr_y,
- wrk_rd_wide_x_din_y,
- wrk_rd_wide_y_din_y,
-
- wrk_rd_narrow_xy_ena_y,
- wrk_rd_narrow_xy_bank_y,
- wrk_rd_narrow_xy_addr_y,
- wrk_rd_narrow_x_din_y,
- wrk_rd_narrow_y_din_y,
-
- wrk_wr_wide_xy_ena_x,
- wrk_wr_wide_xy_bank_x,
- wrk_wr_wide_xy_addr_x,
- wrk_wr_wide_x_dout_x,
- wrk_wr_wide_y_dout_x,
-
- wrk_wr_narrow_xy_ena_x,
- wrk_wr_narrow_xy_bank_x,
- wrk_wr_narrow_xy_addr_x,
- wrk_wr_narrow_x_dout_x,
- wrk_wr_narrow_y_dout_x,
-
- wrk_wr_wide_xy_ena_y,
- wrk_wr_wide_xy_bank_y,
- wrk_wr_wide_xy_addr_y,
- wrk_wr_wide_x_dout_y,
- wrk_wr_wide_y_dout_y,
-
- wrk_wr_narrow_xy_ena_y,
- wrk_wr_narrow_xy_bank_y,
- wrk_wr_narrow_xy_addr_y,
- wrk_wr_narrow_x_dout_y,
- wrk_wr_narrow_y_dout_y
+ word_index_last, word_index_last_half,
+ wrk_rd_wide_xy_ena_x, wrk_rd_wide_xy_bank_x, wrk_rd_wide_xy_addr_x, wrk_rd_wide_x_din_x, wrk_rd_wide_y_din_x,
+ wrk_rd_narrow_xy_ena_x, wrk_rd_narrow_xy_bank_x, wrk_rd_narrow_xy_addr_x, wrk_rd_narrow_x_din_x, wrk_rd_narrow_y_din_x,
+ wrk_rd_wide_xy_ena_y, wrk_rd_wide_xy_bank_y, wrk_rd_wide_xy_addr_y, wrk_rd_wide_x_din_y, wrk_rd_wide_y_din_y,
+ wrk_rd_narrow_xy_ena_y, wrk_rd_narrow_xy_bank_y, wrk_rd_narrow_xy_addr_y, wrk_rd_narrow_x_din_y, wrk_rd_narrow_y_din_y,
+ wrk_wr_wide_xy_ena_x, wrk_wr_wide_xy_bank_x, wrk_wr_wide_xy_addr_x, wrk_wr_wide_x_dout_x, wrk_wr_wide_y_dout_x,
+ wrk_wr_narrow_xy_ena_x, wrk_wr_narrow_xy_bank_x, wrk_wr_narrow_xy_addr_x, wrk_wr_narrow_x_dout_x, wrk_wr_narrow_y_dout_x,
+ wrk_wr_wide_xy_ena_y, wrk_wr_wide_xy_bank_y, wrk_wr_wide_xy_addr_y, wrk_wr_wide_x_dout_y, wrk_wr_wide_y_dout_y,
+ wrk_wr_narrow_xy_ena_y, wrk_wr_narrow_xy_bank_y, wrk_wr_narrow_xy_addr_y, wrk_wr_narrow_x_dout_y, wrk_wr_narrow_y_dout_y
);
+
//
// Headers
//
@@ -143,30 +95,44 @@ module modexpng_general_worker
//
// FSM Declaration
//
- localparam [4:0] WRK_FSM_STATE_IDLE = 5'h00;
+ localparam [5:0] WRK_FSM_STATE_IDLE = 6'h00;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1 = 5'h01;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2 = 5'h02;
- localparam [4:0] WRK_FSM_STATE_BUSY = 5'h03;
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST1 = 5'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST2 = 5'h06;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1 = 6'h01;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2 = 6'h02;
+ localparam [5:0] WRK_FSM_STATE_BUSY = 6'h03;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST1 = 6'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST2 = 6'h06;
- localparam [4:0] WRK_FSM_STATE_STOP = 5'h07;
+ localparam [5:0] WRK_FSM_STATE_STOP = 6'h07;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 5'h10;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 5'h11;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 5'h12;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 5'h13;
- localparam [4:0] WRK_FSM_STATE_BUSY_M1 = 5'h14;
- localparam [4:0] WRK_FSM_STATE_BUSY_M2 = 5'h15;
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 5'h16;
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 5'h17;
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 5'h18;
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 5'h19;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 6'h10;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 6'h11;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 6'h12;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 6'h13;
+ localparam [5:0] WRK_FSM_STATE_BUSY_M1 = 6'h14;
+ localparam [5:0] WRK_FSM_STATE_BUSY_M2 = 6'h15;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 6'h16;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 6'h17;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 6'h18;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 6'h19;
+
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_TP = 6'h20;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_TP = 6'h21;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE3_TP = 6'h22;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE4_TP = 6'h23;
+ localparam [5:0] WRK_FSM_STATE_BUSY_TP = 6'h24;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_TP = 6'h25;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_TP = 6'h26;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST3_TP = 6'h27;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST4_TP = 6'h28;
+ localparam [5:0] WRK_FSM_STATE_HOLDOFF_TP = 6'h29;
- reg [4:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
- reg [4:0] wrk_fsm_state_next_one_pass; // single address space sweep
- reg [4:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y)
+ reg [5:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
+ reg [5:0] wrk_fsm_state_next_one_pass; // single address space sweep
+ reg [5:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y)
+ reg [5:0] wrk_fsm_state_next_two_pass; // two address space sweeps
+ reg wrk_fsm_two_pass_pass; // 0=first pass, 1=second pass
+ reg wrk_fsm_two_pass_pass_dly; // 0=first pass, 1=second pass
// TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps)
@@ -292,37 +258,36 @@ module modexpng_general_worker
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly2;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3;
-
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly2;
always @(posedge clk) begin
//
- {rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
- {rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
- //
- {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2};
- {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2};
- //
- {rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
- {rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
+ {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
+ {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
//
- {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2};
- {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2};
+ {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
//
{wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x};
{wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y};
//
{wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x};
- {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y};
+ {wrk_rd_narrow_y_din_x_dly2, wrk_rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x_dly1, wrk_rd_narrow_y_din_x};
+ {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y};
+ {wrk_rd_narrow_y_din_y_dly2, wrk_rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y_dly1, wrk_rd_narrow_y_din_y};
//
end
//
- // Read Enable Logic
+ // Source Read Enable Logic
//
task _update_wide_xy_rd_en; input _en; {rd_wide_xy_ena_x, rd_wide_xy_ena_y } <= {2{_en}}; endtask
@@ -340,48 +305,54 @@ module modexpng_general_worker
//
disable_wide_xy_rd_en;
disable_narrow_xy_rd_en;
- /*
- rd_wide_xy_ena_x <= 1'b0;
- rd_wide_xy_ena_y <= 1'b0;
- rd_narrow_xy_ena_x <= 1'b0;
- rd_narrow_xy_ena_y <= 1'b0;
- */
+ //
end else begin
//
disable_wide_xy_rd_en;
disable_narrow_xy_rd_en;
//
- //rd_wide_xy_ena_x <= 1'b0;
- //rd_wide_xy_ena_y <= 1'b0;
- //rd_narrow_xy_ena_x <= 1'b0;
- //rd_narrow_xy_ena_y <= 1'b0;
+ // one_pass
//
- case (opcode)
+ case (wrk_fsm_state_next_one_pass)
//
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_MODULAR_REDUCE_INIT:
+ WRK_FSM_STATE_LATENCY_PRE1,
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY:
//
- case (wrk_fsm_state_next_one_pass)
+ case (opcode)
//
- WRK_FSM_STATE_LATENCY_PRE1,
- WRK_FSM_STATE_LATENCY_PRE2,
- WRK_FSM_STATE_BUSY:
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
//
enable_narrow_xy_rd_en;
- //{rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{1'b1}};
//
+ UOP_OPCODE_COPY_CRT_Y2X: begin
+ //
+ enable_wide_xy_rd_en;
+ enable_narrow_xy_rd_en;
+ //
+ end
//
endcase
- //
//
- UOP_OPCODE_COPY_CRT_Y2X:
+ endcase
+ //
+ // one_pass_meander
+ //
+ case (wrk_fsm_state_next_one_pass_meander)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1,
+ WRK_FSM_STATE_LATENCY_PRE1_M2,
+ WRK_FSM_STATE_LATENCY_PRE2_M1,
+ WRK_FSM_STATE_LATENCY_PRE2_M2,
+ WRK_FSM_STATE_BUSY_M1,
+ WRK_FSM_STATE_BUSY_M2:
//
- case (wrk_fsm_state_next_one_pass)
+ case (opcode)
//
- WRK_FSM_STATE_LATENCY_PRE1,
- WRK_FSM_STATE_LATENCY_PRE2,
- WRK_FSM_STATE_BUSY: begin
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
//
enable_wide_xy_rd_en;
enable_narrow_xy_rd_en;
@@ -389,24 +360,29 @@ module modexpng_general_worker
end
//
endcase
+ //
+ endcase
+ //
+ // two_pass
+ //
+ case (wrk_fsm_state_next_two_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_TP,
+ WRK_FSM_STATE_LATENCY_PRE2_TP,
+ WRK_FSM_STATE_LATENCY_PRE3_TP,
+ WRK_FSM_STATE_LATENCY_PRE4_TP,
+ WRK_FSM_STATE_BUSY_TP:
//
- UOP_OPCODE_COPY_LADDERS_X2Y:
- //
- case (wrk_fsm_state_next_one_pass_meander)
- //
- WRK_FSM_STATE_LATENCY_PRE1_M1,
- WRK_FSM_STATE_LATENCY_PRE1_M2,
- WRK_FSM_STATE_LATENCY_PRE2_M1,
- WRK_FSM_STATE_LATENCY_PRE2_M2,
- WRK_FSM_STATE_BUSY_M1,
- WRK_FSM_STATE_BUSY_M2: begin
- //
- enable_wide_xy_rd_en;
- enable_narrow_xy_rd_en;
+ case (opcode)
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- end
+ if (!wrk_fsm_two_pass_pass) begin
+ enable_wide_xy_rd_en;
+ enable_narrow_xy_rd_en;
+ end else
+ enable_narrow_xy_rd_en;
//
- endcase
+ endcase
//
endcase
//
@@ -414,7 +390,7 @@ module modexpng_general_worker
//
- // Write Enable Logic
+ // Destination Write Enable Logic
//
task _update_wide_xy_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask
@@ -432,71 +408,53 @@ module modexpng_general_worker
//
disable_wide_xy_wr_en;
disable_narrow_xy_wr_en;
- //wr_wide_xy_ena_x <= 1'b0;
- //wr_wide_xy_ena_y <= 1'b0;
- //wr_narrow_xy_ena_x <= 1'b0;
- //wr_narrow_xy_ena_y <= 1'b0;
//
end else begin
//
disable_wide_xy_wr_en;
disable_narrow_xy_wr_en;
//
- //wr_wide_xy_ena_x <= 1'b0;
- //wr_wide_xy_ena_y <= 1'b0;
- //wr_narrow_xy_ena_x <= 1'b0;
- //wr_narrow_xy_ena_y <= 1'b0;
+ // one_pass
//
- case (opcode)
+ case (wrk_fsm_state)
//
- UOP_OPCODE_PROPAGATE_CARRIES:
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2:
//
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2:
+ UOP_OPCODE_PROPAGATE_CARRIES:
//
enable_narrow_xy_wr_en;
//
- //
- endcase
- //
- UOP_OPCODE_COPY_CRT_Y2X:
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin
+ UOP_OPCODE_COPY_CRT_Y2X: begin
//
enable_wide_xy_wr_en;
- enable_narrow_xy_wr_en;
+ enable_narrow_xy_wr_en;
//
end
//
- endcase
- //
- UOP_OPCODE_MODULAR_REDUCE_INIT:
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2:
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
//
enable_wide_xy_wr_en;
- //
//
endcase
+ //
+ endcase
+ //
+ // one_pass_meander
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2:
//
- UOP_OPCODE_COPY_LADDERS_X2Y:
- //
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY_M2,
- WRK_FSM_STATE_LATENCY_POST1_M2,
- WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
//
enable_wide_xy_wr_en;
enable_narrow_xy_wr_en;
@@ -507,12 +465,42 @@ module modexpng_general_worker
//
endcase
//
+ // two_pass
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_TP,
+ WRK_FSM_STATE_LATENCY_POST1_TP,
+ WRK_FSM_STATE_LATENCY_POST2_TP,
+ WRK_FSM_STATE_LATENCY_POST3_TP,
+ WRK_FSM_STATE_LATENCY_POST4_TP:
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT:
+ //
+ if (!wrk_fsm_two_pass_pass)
+ enable_narrow_xy_wr_en;
+ else begin
+ enable_wide_xy_wr_en;
+ enable_narrow_xy_wr_en;
+ end
+ //
+ endcase
+ //
+ endcase
+ //
end
//
- // Data Logic
+ // Source to Destination Data Logic
+ //
+
+ //
+ // UOP_OPCODE_PROPAGATE_CARRIES
//
+
reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r;
reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r;
reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r;
@@ -523,112 +511,300 @@ module modexpng_general_worker
wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r};
+ wire [CARRY_W -1:0] rd_narrow_x_din_x_w_cry_msb = rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] rd_narrow_y_din_x_w_cry_msb = rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] rd_narrow_x_din_y_w_cry_msb = rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] rd_narrow_y_din_y_w_cry_msb = rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+
wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]};
wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]};
+ task update_wide_dout;
+ input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
+ {wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <=
+ { x_x, y_x, x_y, y_y };
+ endtask
+
+ task update_narrow_dout;
+ input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
+ {wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <=
+ { x_x, y_x, x_y, y_y };
+ endtask
+
+ task update_narrow_carries;
+ input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry;
+ {rd_narrow_x_din_x_cry_r, rd_narrow_y_din_x_cry_r, rd_narrow_x_din_y_cry_r, rd_narrow_y_din_y_cry_r} <=
+ { x_x_cry, y_x_cry, x_y_cry, y_y_cry };
+ endtask
+
+
+ always @(posedge clk)
+ //
+ if (opcode == UOP_OPCODE_PROPAGATE_CARRIES)
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_LATENCY_PRE2:
+ //
+ update_narrow_carries(CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO);
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1:
+ //
+ update_narrow_carries(rd_narrow_x_din_x_w_cry_msb,
+ rd_narrow_y_din_x_w_cry_msb,
+ rd_narrow_x_din_y_w_cry_msb,
+ rd_narrow_y_din_y_w_cry_msb);
+ //
+ endcase
+
+
+ //
+ // UOP_OPCODE_MODULAR_SUBTRACT
+ //
+
+ reg [WORD_W:0] modsub_x_ab;
+ reg [WORD_W:0] modsub_y_ab;
+
+ reg [WORD_W:0] modsub_x_ab_dly;
+ reg [WORD_W:0] modsub_y_ab_dly;
+
+ reg [WORD_W:0] modsub_x_abn;
+ reg [WORD_W:0] modsub_y_abn;
+
+ reg modsub_x_ab_mask_now;
+ reg modsub_y_ab_mask_now;
+
+ reg modsub_x_abn_mask_now;
+ reg modsub_y_abn_mask_now;
+
+ reg modsub_x_borrow_r;
+ reg modsub_y_borrow_r;
+
+ wire modsub_x_ab_masked = modsub_x_ab_mask_now ? 1'b0 : modsub_x_ab[WORD_W];
+ wire modsub_y_ab_masked = modsub_y_ab_mask_now ? 1'b0 : modsub_y_ab[WORD_W];
+
+ wire modsub_x_abn_masked = modsub_x_abn_mask_now ? 1'b0 : modsub_x_abn[WORD_W];
+ wire modsub_y_abn_masked = modsub_y_abn_mask_now ? 1'b0 : modsub_y_abn[WORD_W];
+
+ wire [WORD_W:0] modsub_x_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
+ wire [WORD_W:0] modsub_y_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
+ wire [WORD_W:0] modsub_x_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
+ wire [WORD_W:0] modsub_y_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
+
+ wire [WORD_W:0] modsub_x_wide_x_lsb_pad = {1'b0, wrk_rd_wide_x_din_x_dly1[WORD_W-1:0]};
+ wire [WORD_W:0] modsub_x_wide_y_lsb_pad = {1'b0, wrk_rd_wide_x_din_y_dly1[WORD_W-1:0]};
+
+ wire [WORD_EXT_W -1:0] modsub_x_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_x_ab_dly[WORD_W-1:0]};
+ wire [WORD_EXT_W -1:0] modsub_y_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_y_ab_dly[WORD_W-1:0]};
+
+ wire [WORD_EXT_W -1:0] modsub_x_abn_trunc = {{CARRY_W{1'b0}}, modsub_x_abn[WORD_W-1:0]};
+ wire [WORD_EXT_W -1:0] modsub_y_abn_trunc = {{CARRY_W{1'b0}}, modsub_y_abn[WORD_W-1:0]};
+
+ wire [WORD_EXT_W -1:0] modsub_x_mux = !modsub_x_borrow_r ? wrk_rd_narrow_x_din_x_dly2 : wrk_rd_narrow_y_din_x_dly2;
+ wire [WORD_EXT_W -1:0] modsub_y_mux = !modsub_y_borrow_r ? wrk_rd_narrow_x_din_y_dly2 : wrk_rd_narrow_y_din_y_dly2;
+
+ wire [WORD_W:0] modsub_x_ab_lsb_pad = {1'b0, modsub_x_ab[WORD_W-1:0]};
+ wire [WORD_W:0] modsub_y_ab_lsb_pad = {1'b0, modsub_y_ab[WORD_W-1:0]};
+
+ task update_modsub_ab;
+ begin
+ modsub_x_ab <= modsub_x_narrow_x_lsb_pad - modsub_y_narrow_x_lsb_pad - modsub_x_ab_masked;
+ modsub_y_ab <= modsub_x_narrow_y_lsb_pad - modsub_y_narrow_y_lsb_pad - modsub_y_ab_masked;
+ end
+ endtask
+
+ task update_modsub_abn;
+ begin
+ modsub_x_abn <= modsub_x_ab_lsb_pad + modsub_x_wide_x_lsb_pad + modsub_x_abn_masked;
+ modsub_y_abn <= modsub_y_ab_lsb_pad + modsub_x_wide_y_lsb_pad + modsub_y_abn_masked;
+ end
+ endtask
+
+ always @(posedge clk)
+ //
+ if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_LATENCY_POST4_TP:
+ if (!wrk_fsm_two_pass_pass)
+ {modsub_x_borrow_r, modsub_y_borrow_r} <= {modsub_x_ab_dly[WORD_W], modsub_y_ab_dly[WORD_W]};
+ endcase
+
+ always @(posedge clk) begin
+ modsub_x_ab_dly <= modsub_x_ab;
+ modsub_y_ab_dly <= modsub_y_ab;
+ end
+
always @(posedge clk) begin
//
- wr_wide_x_dout_x <= WORD_EXT_DNC;
- wr_wide_y_dout_x <= WORD_EXT_DNC;
- wr_wide_x_dout_y <= WORD_EXT_DNC;
- wr_wide_y_dout_y <= WORD_EXT_DNC;
- wr_narrow_x_dout_x <= WORD_EXT_DNC;
- wr_narrow_y_dout_x <= WORD_EXT_DNC;
- wr_narrow_x_dout_y <= WORD_EXT_DNC;
- wr_narrow_y_dout_y <= WORD_EXT_DNC;
+ modsub_x_ab <= {1'bX, WORD_DNC};
+ modsub_y_ab <= {1'bX, WORD_DNC};
//
- case (opcode)
+ modsub_x_abn <= {1'bX, WORD_DNC};
+ modsub_y_abn <= {1'bX, WORD_DNC};
+ //
+ if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
//
- UOP_OPCODE_PROPAGATE_CARRIES:
+ case (wrk_fsm_state)
//
- case (wrk_fsm_state)
+ WRK_FSM_STATE_LATENCY_PRE3_TP:
+ update_modsub_ab;
+
+ WRK_FSM_STATE_LATENCY_PRE4_TP,
+ WRK_FSM_STATE_BUSY_TP,
+ WRK_FSM_STATE_LATENCY_POST1_TP,
+ WRK_FSM_STATE_LATENCY_POST2_TP: begin
+ update_modsub_ab;
+ update_modsub_abn;
+ end
+ //
+ WRK_FSM_STATE_LATENCY_POST3_TP:
//
- WRK_FSM_STATE_LATENCY_PRE2: begin
- rd_narrow_x_din_x_cry_r <= CARRY_ZERO;
- rd_narrow_y_din_x_cry_r <= CARRY_ZERO;
- rd_narrow_x_din_y_cry_r <= CARRY_ZERO;
- rd_narrow_y_din_y_cry_r <= CARRY_ZERO;
- end
+ update_modsub_abn;
+ //
+ endcase
+ //
+ end
+
+ always @(posedge clk) begin
+ //
+ modsub_x_ab_mask_now <= 1'b0;
+ modsub_y_ab_mask_now <= 1'b0;
+ //
+ modsub_x_abn_mask_now <= 1'b0;
+ modsub_y_abn_mask_now <= 1'b0;
+ //
+ if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_LATENCY_PRE2_TP: begin
+ modsub_x_ab_mask_now <= 1'b1;
+ modsub_y_ab_mask_now <= 1'b1;
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE3_TP: begin
+ modsub_x_abn_mask_now <= 1'b1;
+ modsub_y_abn_mask_now <= 1'b1;
+ end
+ //
+ endcase
+ //
+ end
+
+ always @(posedge clk) begin
+ //
+ update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
+ update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
+ //
+ // one_pass
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2:
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES:
+ //
+ update_narrow_dout(rd_narrow_x_din_x_w_cry_reduced,
+ rd_narrow_y_din_x_w_cry_reduced,
+ rd_narrow_x_din_y_w_cry_reduced,
+ rd_narrow_y_din_y_w_cry_reduced);
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin // TODO: post2 doesn't need update of carry, since that's the last word
+ UOP_OPCODE_COPY_CRT_Y2X: begin
//
- rd_narrow_x_din_x_cry_r <= rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W];
- rd_narrow_y_din_x_cry_r <= rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W];
- rd_narrow_x_din_y_cry_r <= rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W];
- rd_narrow_y_din_y_cry_r <= rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+ update_wide_dout(wrk_rd_wide_x_din_y,
+ wrk_rd_wide_y_din_y,
+ wrk_rd_wide_x_din_y,
+ wrk_rd_wide_y_din_y);
//
- wr_narrow_x_dout_x <= rd_narrow_x_din_x_w_cry_reduced;
- wr_narrow_y_dout_x <= rd_narrow_y_din_x_w_cry_reduced;
- wr_narrow_x_dout_y <= rd_narrow_x_din_y_w_cry_reduced;
- wr_narrow_y_dout_y <= rd_narrow_y_din_y_w_cry_reduced;
+ update_narrow_dout(wrk_rd_narrow_x_din_y,
+ wrk_rd_narrow_y_din_y,
+ wrk_rd_narrow_x_din_y,
+ wrk_rd_narrow_y_din_y);
//
end
//
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ update_wide_dout(wrk_rd_narrow_x_din_x,
+ wrk_rd_narrow_y_din_x,
+ wrk_rd_narrow_x_din_y,
+ wrk_rd_narrow_y_din_y);
+ //
endcase
+ //
+ endcase
+ //
+ // one_pass_meander
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2:
//
- UOP_OPCODE_COPY_CRT_Y2X:
- //
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin
+ UOP_OPCODE_COPY_LADDERS_X2Y: begin
//
- wr_wide_x_dout_x <= wrk_rd_wide_x_din_y;
- wr_wide_y_dout_x <= wrk_rd_wide_y_din_y;
- wr_wide_x_dout_y <= wrk_rd_wide_x_din_y;
- wr_wide_y_dout_y <= wrk_rd_wide_y_din_y;
+ update_wide_dout(wrk_rd_wide_x_din_x_dly3,
+ wrk_rd_wide_x_din_x_dly2,
+ wrk_rd_wide_x_din_y_dly3,
+ wrk_rd_wide_x_din_y_dly2);
//
- wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_y;
- wr_narrow_y_dout_x <= wrk_rd_narrow_y_din_y;
- wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y;
- wr_narrow_y_dout_y <= wrk_rd_narrow_y_din_y;
+ update_narrow_dout(wrk_rd_narrow_x_din_x_dly3,
+ wrk_rd_narrow_x_din_x_dly2,
+ wrk_rd_narrow_x_din_y_dly3,
+ wrk_rd_narrow_x_din_y_dly2);
//
end
//
- endcase
- //
- UOP_OPCODE_COPY_LADDERS_X2Y:
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY_M2,
- WRK_FSM_STATE_LATENCY_POST1_M2,
- WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
//
- wr_wide_x_dout_x <= wrk_rd_wide_x_din_x_dly3;
- wr_wide_y_dout_x <= wrk_rd_wide_x_din_x_dly2;
- wr_wide_x_dout_y <= wrk_rd_wide_x_din_y_dly3;
- wr_wide_y_dout_y <= wrk_rd_wide_x_din_y_dly2;
+ update_wide_dout(wrk_rd_wide_x_din_x_dly3,
+ wrk_rd_wide_x_din_y_dly2,
+ wrk_rd_wide_x_din_y_dly3,
+ wrk_rd_wide_x_din_x_dly2);
//
- wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_x_dly3;
- wr_narrow_y_dout_x <= wrk_rd_narrow_x_din_x_dly2;
- wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y_dly3;
- wr_narrow_y_dout_y <= wrk_rd_narrow_x_din_y_dly2;
+ update_narrow_dout(wrk_rd_narrow_x_din_x_dly3,
+ wrk_rd_narrow_x_din_y_dly2,
+ wrk_rd_narrow_x_din_y_dly3,
+ wrk_rd_narrow_x_din_x_dly2);
//
end
//
endcase
+ //
+ endcase
+ //
+ // two_pass
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_TP,
+ WRK_FSM_STATE_LATENCY_POST1_TP,
+ WRK_FSM_STATE_LATENCY_POST2_TP,
+ WRK_FSM_STATE_LATENCY_POST3_TP,
+ WRK_FSM_STATE_LATENCY_POST4_TP:
//
- UOP_OPCODE_MODULAR_REDUCE_INIT:
- //
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- wr_wide_x_dout_x <= wrk_rd_narrow_x_din_x;
- wr_wide_y_dout_x <= wrk_rd_narrow_y_din_x;
- wr_wide_x_dout_y <= wrk_rd_narrow_x_din_y;
- wr_wide_y_dout_y <= wrk_rd_narrow_y_din_y;
+ if (!wrk_fsm_two_pass_pass)
+ update_narrow_dout(modsub_x_ab_dly_trunc, modsub_x_abn_trunc, modsub_y_ab_dly_trunc, modsub_y_abn_trunc);
+ else begin
+ update_wide_dout (modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux);
+ update_narrow_dout(modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux);
+ end
//
- end
- //
endcase
//
endcase
@@ -637,254 +813,307 @@ module modexpng_general_worker
//
- // Write Address Logic
+ // Source Read Address Logic
//
- wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half;
- wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half;
+
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_xy_next;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_xy_next;
+
+ wire rd_wide_xy_addr_xy_next_is_last = rd_wide_xy_addr_xy_next == word_index_last_half;
+ wire rd_narrow_xy_addr_xy_next_is_last = rd_narrow_xy_addr_xy_next == word_index_last;
+
+ task update_rd_wide_bank_addr;
+ input [BANK_ADDR_W -1:0] bank;
+ input [ OP_ADDR_W -1:0] addr;
+ begin
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, addr};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, addr};
+ end
+ endtask
+
+ task update_rd_wide_bank;
+ input [BANK_ADDR_W -1:0] bank;
+ begin
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, rd_wide_xy_addr_x};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, rd_wide_xy_addr_y};
+ end
+ endtask
+
+ task update_rd_narrow_bank_addr;
+ input [BANK_ADDR_W -1:0] bank;
+ input [ OP_ADDR_W -1:0] addr;
+ begin
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, addr};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, addr};
+ end
+ endtask
+
+ task update_rd_narrow_bank;
+ input [BANK_ADDR_W -1:0] bank;
+ begin
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, rd_narrow_xy_addr_y};
+ end
+ endtask
+
+ task update_rd_wide_addr_next;
+ input [OP_ADDR_W -1:0] addr;
+ rd_wide_xy_addr_xy_next <= addr;
+ endtask
+
+ task update_rd_narrow_addr_next;
+ input [OP_ADDR_W -1:0] addr;
+ rd_narrow_xy_addr_xy_next <= addr;
+ endtask
+
+ task advance_rd_wide_addr_next;
+ rd_wide_xy_addr_xy_next <= !rd_wide_xy_addr_xy_next_is_last ? rd_wide_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO;
+ endtask
+
+ task advance_rd_narrow_addr_next;
+ rd_narrow_xy_addr_xy_next <= !rd_narrow_xy_addr_xy_next_is_last ? rd_narrow_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO;
+ endtask
always @(posedge clk) begin
//
- {wr_wide_xy_bank_x, wr_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC};
- {wr_wide_xy_bank_y, wr_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
- {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
- {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+ update_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC);
+ update_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC);
//
- case (opcode)
+ // one_pass
+ //
+ case (wrk_fsm_state_next_one_pass)
//
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_COPY_CRT_Y2X:
+ WRK_FSM_STATE_LATENCY_PRE1:
//
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin
- //
- {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly2};
- {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly2};
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_MODULAR_REDUCE_INIT: begin
//
- {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly2};
- {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly2};
+ update_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
+ update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
//
end
//
endcase
//
- UOP_OPCODE_MODULAR_REDUCE_INIT:
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY:
//
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_COPY_CRT_Y2X: begin
+ //
+ update_rd_wide_bank_addr (sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
+ update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
//
- wr_wide_xy_bank_x <= uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H;
- wr_wide_xy_bank_y <= uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H;
+ end
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT: begin
//
- wr_wide_xy_addr_x <= rd_wide_xy_addr_x_dly2;
- wr_wide_xy_addr_y <= rd_wide_xy_addr_y_dly2;
+ update_rd_wide_bank_addr (sel_wide_in, rd_wide_xy_addr_xy_next ); advance_rd_wide_addr_next ;
+ update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
//
end
//
endcase
+ //
+ endcase
+ //
+ // one_pass_meander
+ //
+ case (wrk_fsm_state_next_one_pass_meander)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1:
+ case (opcode)
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
+ update_rd_wide_bank_addr (sel_wide_out, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
+ update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
+ end
+ endcase
+ //
+ WRK_FSM_STATE_LATENCY_PRE2_M1,
+ WRK_FSM_STATE_BUSY_M1:
+ case (opcode)
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
+ update_rd_wide_bank_addr (sel_wide_out, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
+ update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
+ //
+ end
+ //
+ endcase
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M2,
+ WRK_FSM_STATE_LATENCY_PRE2_M2,
+ WRK_FSM_STATE_BUSY_M2:
+ case (opcode)
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
+ update_rd_wide_bank (sel_wide_in );
+ update_rd_narrow_bank(sel_narrow_in);
+ end
+ endcase
+ //
+ endcase
+ //
+ // two_pass
+ //
+ case (wrk_fsm_state_next_two_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_TP:
//
- UOP_OPCODE_COPY_LADDERS_X2Y:
- //
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY_M2,
- WRK_FSM_STATE_LATENCY_POST1_M2,
- WRK_FSM_STATE_LATENCY_POST2_M2: begin
- //
- {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly4};
- {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly4};
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly4};
- {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly4};
+ if (!wrk_fsm_two_pass_pass) begin
+ update_rd_wide_bank_addr (BANK_WIDE_N, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
+ update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
+ end else begin
+ update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
+ end
+ //
+ endcase
+ //
+ WRK_FSM_STATE_LATENCY_PRE2_TP,
+ WRK_FSM_STATE_LATENCY_PRE3_TP,
+ WRK_FSM_STATE_LATENCY_PRE4_TP,
+ WRK_FSM_STATE_BUSY_TP:
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- end
+ if (!wrk_fsm_two_pass_pass) begin
+ update_rd_wide_bank_addr (BANK_WIDE_N, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
+ update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
+ end else begin
+ update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
+ end
//
endcase
//
- //
endcase
//
end
//
- // Read Address Logic
+ // Destination Write Address Logic
//
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_next;
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_next;
-
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next;
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next;
+
+ wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half;
+ wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half;
- wire rd_wide_xy_addr_x_next_is_last = rd_wide_xy_addr_x_next == word_index_last_half;
- wire rd_wide_xy_addr_y_next_is_last = rd_wide_xy_addr_y_next == word_index_last_half;
+ wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_x = uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H;
+ wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_y = uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H;
- wire rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
- wire rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
+ task update_wr_wide_bank_addr;
+ input [BANK_ADDR_W -1:0] x_bank;
+ input [BANK_ADDR_W -1:0] y_bank;
+ input [ OP_ADDR_W -1:0] x_addr;
+ input [ OP_ADDR_W -1:0] y_addr;
+ begin
+ {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {x_bank, x_addr};
+ {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {y_bank, y_addr};
+ end
+ endtask
- always @(posedge clk) begin // TODO: Maybe split into two blocks (read address / next address)??
+ task update_wr_narrow_bank_addr;
+ input [BANK_ADDR_W -1:0] x_bank;
+ input [BANK_ADDR_W -1:0] y_bank;
+ input [ OP_ADDR_W -1:0] x_addr;
+ input [ OP_ADDR_W -1:0] y_addr;
+ begin
+ {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {x_bank, x_addr};
+ {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {y_bank, y_addr};
+ end
+ endtask
+
+ always @(posedge clk) begin
//
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ??
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+ update_wr_wide_bank_addr (BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
+ update_wr_narrow_bank_addr(BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
//
- case (opcode)
+ // one_pass
+ //
+ case (wrk_fsm_state)
//
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_COPY_CRT_Y2X:
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2:
//
- case (wrk_fsm_state_next_one_pass)
+ case (opcode)
//
- WRK_FSM_STATE_LATENCY_PRE1: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
- //
- rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
- rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
- //
- rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
- rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
- //
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_COPY_CRT_Y2X: begin
+ update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
+ update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
end
//
- WRK_FSM_STATE_LATENCY_PRE2,
- WRK_FSM_STATE_BUSY: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_narrow_xy_addr_x_next};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_narrow_xy_addr_y_next};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
- //
- rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
- rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
- //
- rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
- rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
- //
- end
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ update_wr_wide_bank_addr(uop_modular_reduce_init_bank_x, uop_modular_reduce_init_bank_y, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_y_dly2);
//
endcase
//
- UOP_OPCODE_MODULAR_REDUCE_INIT:
- //
- case (wrk_fsm_state_next_one_pass)
- //
- WRK_FSM_STATE_LATENCY_PRE1: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
- //
- rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
- rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
- //
- rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
- rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
- //
- end
- //
- WRK_FSM_STATE_LATENCY_PRE2,
- WRK_FSM_STATE_BUSY: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x_next};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y_next};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
- //
- rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
- rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
- //
- rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
- rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
- //
+ endcase
+ //
+ // one_pass_meander
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2:
+ //
+ case (opcode)
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
+ update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
+ update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
end
- //
endcase
//
- UOP_OPCODE_COPY_LADDERS_X2Y:
+ endcase
+ //
+ // two_pass
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_TP,
+ WRK_FSM_STATE_LATENCY_POST1_TP,
+ WRK_FSM_STATE_LATENCY_POST2_TP,
+ WRK_FSM_STATE_LATENCY_POST3_TP,
+ WRK_FSM_STATE_LATENCY_POST4_TP:
//
- case (wrk_fsm_state_next_one_pass_meander)
- //
- WRK_FSM_STATE_LATENCY_PRE1_M1: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, OP_ADDR_ZERO};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, OP_ADDR_ZERO};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, OP_ADDR_ZERO};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, OP_ADDR_ZERO};
- //
- rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
- rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
- //
- rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
- rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
- //
- end
+ case (opcode)
//
- WRK_FSM_STATE_LATENCY_PRE1_M2: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
- //
- end
- //
- WRK_FSM_STATE_LATENCY_PRE2_M1,
- WRK_FSM_STATE_BUSY_M1: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_next};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_next};
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_next};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_next};
+ if (!wrk_fsm_two_pass_pass) begin
+ update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
+ end else begin
+ update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
+ update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
+ end
//
- rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
- rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
- //
- rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
- rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
- //
- end
- //
- WRK_FSM_STATE_LATENCY_PRE2_M2,
- WRK_FSM_STATE_BUSY_M2: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
- //
- end
- //
- endcase
+ endcase
//
- //
- endcase
+ endcase
//
end
-
+
//
// FSM Process
//
+
always @(posedge clk)
//
if (rst) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
@@ -893,7 +1122,9 @@ module modexpng_general_worker
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_REDUCE_INIT: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
- UOP_OPCODE_COPY_LADDERS_X2Y: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
+ UOP_OPCODE_MODULAR_SUBTRACT: wrk_fsm_state <= wrk_fsm_state_next_two_pass;
default: wrk_fsm_state <= WRK_FSM_STATE_IDLE;
endcase
@@ -901,49 +1132,64 @@ module modexpng_general_worker
//
// Busy Exit Logic
//
- reg wrk_fsm_done_one_pass = 1'b0;
+
+ reg wrk_fsm_done_one_pass = 1'b0;
reg wrk_fsm_done_one_pass_meander = 1'b0;
+ reg wrk_fsm_done_two_pass = 1'b0;
always @(posedge clk) begin
//
wrk_fsm_done_one_pass <= 1'b0;
wrk_fsm_done_one_pass_meander <= 1'b0;
+ wrk_fsm_done_two_pass <= 1'b0;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
- UOP_OPCODE_MODULAR_REDUCE_INIT: begin
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
//
- if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin
- //
- if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass <= 1'b1; // TODO: Check, whether both are necessary...
- if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass <= 1'b1;
- //
- end
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY:
+ if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass <= 1'b1;
+ endcase
//
- end
- //
- UOP_OPCODE_COPY_LADDERS_X2Y: begin
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y:
//
- if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M2) begin
- //
- if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; // TODO: Check, whether both are necessary...
- if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1;
- //
- end
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY_M2:
+ if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1;
+ WRK_FSM_STATE_BUSY_M1:
+ wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander;
+ endcase
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M1)
- wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander;
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY_TP:
+ if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_two_pass <= 1'b1;
+ endcase
//
- end
//
endcase
//
end
-
+
+ //
+ // FSM Helper Logic
+ //
+ always @(posedge clk)
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_IDLE: if (ena) {wrk_fsm_two_pass_pass, wrk_fsm_two_pass_pass_dly} <= {1'b0, 1'b0};
+ WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_two_pass_pass <= 1'b1;
+ WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_two_pass_pass_dly <= 1'b1;
+ endcase
+
+
//
// FSM Transition Logic
//
@@ -985,7 +1231,27 @@ module modexpng_general_worker
//
end
-
+ always @* begin
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_IDLE: wrk_fsm_state_next_two_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1_TP : WRK_FSM_STATE_IDLE;
+ WRK_FSM_STATE_LATENCY_PRE1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE2_TP ;
+ WRK_FSM_STATE_LATENCY_PRE2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE3_TP ;
+ WRK_FSM_STATE_LATENCY_PRE3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE4_TP ;
+ WRK_FSM_STATE_LATENCY_PRE4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_BUSY_TP ;
+ WRK_FSM_STATE_BUSY_TP: wrk_fsm_state_next_two_pass = wrk_fsm_done_two_pass ? WRK_FSM_STATE_LATENCY_POST1_TP : WRK_FSM_STATE_BUSY_TP;
+ WRK_FSM_STATE_LATENCY_POST1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST2_TP ;
+ WRK_FSM_STATE_LATENCY_POST2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST3_TP ;
+ WRK_FSM_STATE_LATENCY_POST3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST4_TP ;
+ WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_HOLDOFF_TP ;
+ WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_state_next_two_pass = wrk_fsm_two_pass_pass_dly ? WRK_FSM_STATE_STOP : WRK_FSM_STATE_LATENCY_PRE1_TP;
+ WRK_FSM_STATE_STOP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ;
+ default: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ;
+ endcase
+ //
+ end
+
+
//
// Ready Logic
//
diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh
index f68c559..3493e26 100644
--- a/rtl/modexpng_microcode.vh
+++ b/rtl/modexpng_microcode.vh
@@ -39,8 +39,9 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 5'd3;
* source and destination WIDE are don't care
*/
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 5'd4;
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_LADDERS_X2Y = 5'd5;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 5'd4;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_LADDERS_X2Y = 5'd5;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_CROSS_LADDERS_X2Y = 5'd7;
/* CRT is don't care
* NPQ specifies the width of the operand
* AUX is don't care
@@ -53,6 +54,13 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 5'd8;
* AUX = AUX_2 forces B input to 1 (AUX_1 reads from source NARROW as usual)
* LADDER specifies Montgomery ladder mode
*/
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_SUBTRACT = 5'd9;
+/* CRT is don't care
+ * NPQ specifies the width of the operand
+ * AUX is don't care
+ * LADDER is don't care
+ */
+
localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_INIT = 5'd10;
localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_PROC = 5'd11;
/* CRT
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
index 04f0c83..adc657a 100644
--- a/rtl/modexpng_uop_rom.v
+++ b/rtl/modexpng_uop_rom.v
@@ -21,35 +21,35 @@ module modexpng_uop_rom
6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; //
6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
- //
+ //
6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
- //
+ //
6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; //
6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; //
- //
+ //
6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; //
- //
+ //
6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; //
6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; //
- //
+ //
6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
- //
+ //
6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; //
- //
+ //
6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; //
- //
+ //
6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; //
6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; //
6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; //
6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; //
6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; //
- //
+ //
6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; //
@@ -70,6 +70,14 @@ module modexpng_uop_rom
6'd38: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_PQ, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; //
6'd39: data <= {UOP_OPCODE_LADDER_STEP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
//
+ 6'd40: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; //
+ //
+ 6'd41: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; //
+ //
+ 6'd42: data <= {UOP_OPCODE_CROSS_LADDERS_X2Y, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_D, BANK_NARROW_D }; //
+ //
+ 6'd43: data <= {UOP_OPCODE_MODULAR_SUBTRACT, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_WIDE_C, BANK_NARROW_C }; //
+ //
default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
endcase