aboutsummaryrefslogtreecommitdiff
path: root/rtl
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-03 16:50:25 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-03 16:50:25 +0300
commit02247784f18dc683d5873a52c1650e72f02273b5 (patch)
tree2e1bc8872c183ab5424545bcaa8ff90ffa7b53be /rtl
parente340b1489b08905e3d8acd17686e178028de7922 (diff)
Added more micro-operations, entire Montgomery exponentiation ladder works now.
Diffstat (limited to 'rtl')
-rw-r--r--rtl/modexpng_core_top.v138
-rw-r--r--rtl/modexpng_general_worker.v402
-rw-r--r--rtl/modexpng_io_manager.v170
-rw-r--r--rtl/modexpng_microcode.vh42
-rw-r--r--rtl/modexpng_mmm_dual.v10
-rw-r--r--rtl/modexpng_parameters.vh8
-rw-r--r--rtl/modexpng_uop_rom.v82
7 files changed, 720 insertions, 132 deletions
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index 6b194dc..c78a969 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -6,6 +6,8 @@ module modexpng_core_top
crt_mode,
word_index_last_n,
word_index_last_pq,
+ bit_index_last_n,
+ bit_index_last_pq,
bus_cs,
bus_we,
bus_addr,
@@ -37,6 +39,9 @@ module modexpng_core_top
input [ OP_ADDR_W -1:0] word_index_last_n;
input [ OP_ADDR_W -1:0] word_index_last_pq;
+ input [ BIT_INDEX_W -1:0] bit_index_last_n;
+ input [ BIT_INDEX_W -1:0] bit_index_last_pq;
+
input bus_cs;
input bus_we;
input [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr;
@@ -71,16 +76,23 @@ module modexpng_core_top
wire [BANK_ADDR_W -1:0] uop_data_sel_wide_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -2*BANK_ADDR_W -1-: BANK_ADDR_W ];
wire [BANK_ADDR_W -1:0] uop_data_sel_narrow_out = uop_data[UOP_W -UOP_OPCODE_W -UOP_CRT_W -UOP_NPQ_W -UOP_AUX_W -UOP_LADDER_W -3*BANK_ADDR_W -1-: BANK_ADDR_W ];
- wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ;
- wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) ||
- (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ;
- wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ;
- wire uop_opcode_is_mmm = uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ;
- wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) ||
- (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ;
-
+ wire uop_opcode_is_stop = uop_data_opcode == UOP_OPCODE_STOP ;
+ wire uop_opcode_is_in = (uop_data_opcode == UOP_OPCODE_INPUT_TO_WIDE ) ||
+ (uop_data_opcode == UOP_OPCODE_INPUT_TO_NARROW ) ;
+ wire uop_opcode_is_out = uop_data_opcode == UOP_OPCODE_OUTPUT_FROM_NARROW ;
+ wire uop_opcode_is_ladder = (uop_data_opcode == UOP_OPCODE_LADDER_INIT ) ||
+ (uop_data_opcode == UOP_OPCODE_LADDER_STEP ) ;
+ wire uop_opcode_is_mmm = (uop_data_opcode == UOP_OPCODE_MODULAR_MULTIPLY ) ||
+ (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_PROC ) ;
+ wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) ||
+ (uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ||
+ (uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) ||
+ (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) ;
+
+ wire uop_loop_now;
+
wire [UOP_ADDR_W -1:0] uop_addr_offset = crt_mode ? UOP_ADDR_OFFSET_USING_CRT : UOP_ADDR_OFFSET_WITHOUT_CRT;
- wire [UOP_ADDR_W -1:0] uop_addr_next = uop_addr + 1'b1;
+ wire [UOP_ADDR_W -1:0] uop_addr_next = uop_loop_now ? uop_addr - 1'b1: uop_addr + 1'b1;
modexpng_uop_rom uop_rom
(
@@ -595,6 +607,14 @@ module modexpng_core_top
reg [OP_ADDR_W -1:0] io_mgr_word_index_last;
reg [UOP_OPCODE_W -1:0] io_mgr_opcode;
+ reg [BIT_INDEX_W -1:0] io_mgr_ladder_steps;
+ wire io_mgr_ladder_d;
+ wire io_mgr_ladder_p;
+ wire io_mgr_ladder_q;
+ wire io_mgr_ladder_done;
+
+ assign uop_loop_now = (uop_data_opcode == UOP_OPCODE_LADDER_STEP) && !io_mgr_ladder_done;
+
wire [WORD_W -1:0] wrk_rd_narrow_x_data_x_trunc = wrk_rd_narrow_x_data_x[WORD_W-1:0];
wire [WORD_W -1:0] wrk_rd_narrow_x_data_y_trunc = wrk_rd_narrow_x_data_y[WORD_W-1:0];
@@ -653,7 +673,13 @@ module modexpng_core_top
.io_out_dout (io_out_data),
.wrk_narrow_x_din_x_trunc (wrk_rd_narrow_x_data_x_trunc),
- .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc)
+ .wrk_narrow_x_din_y_trunc (wrk_rd_narrow_x_data_y_trunc),
+
+ .ladder_steps (io_mgr_ladder_steps),
+ .ladder_d (io_mgr_ladder_d),
+ .ladder_p (io_mgr_ladder_p),
+ .ladder_q (io_mgr_ladder_q),
+ .ladder_done (io_mgr_ladder_done)
);
@@ -685,6 +711,9 @@ module modexpng_core_top
reg mmm_force_unity_b_x;
reg mmm_force_unity_b_y;
+ reg mmm_only_reduce_x;
+ reg mmm_only_reduce_y;
+
wire rdct_ena_x;
wire rdct_ena_y;
wire rdct_rdy_x;
@@ -702,6 +731,7 @@ module modexpng_core_top
.word_index_last (mmm_word_index_last_x),
.word_index_last_minus1 (mmm_word_index_last_minus1_x),
.force_unity_b (mmm_force_unity_b_x),
+ .only_reduce (mmm_only_reduce_x),
.sel_wide_in (mmm_sel_wide_in_x),
.sel_narrow_in (mmm_sel_narrow_in_x),
@@ -757,6 +787,7 @@ module modexpng_core_top
.word_index_last (mmm_word_index_last_y),
.word_index_last_minus1 (mmm_word_index_last_minus1_y),
.force_unity_b (mmm_force_unity_b_y),
+ .only_reduce (mmm_only_reduce_y),
.sel_wide_in (mmm_sel_wide_in_y),
.sel_narrow_in (mmm_sel_narrow_in_y),
@@ -898,6 +929,7 @@ module modexpng_core_top
reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_in;
reg [ BANK_ADDR_W -1:0] wrk_sel_narrow_out;
reg [ OP_ADDR_W -1:0] wrk_word_index_last;
+ reg [ OP_ADDR_W -1:0] wrk_word_index_last_half;
reg [UOP_OPCODE_W -1:0] wrk_opcode;
modexpng_general_worker general_worker
@@ -916,6 +948,7 @@ module modexpng_core_top
.opcode (wrk_opcode),
.word_index_last (wrk_word_index_last),
+ .word_index_last_half (wrk_word_index_last_half),
.wrk_rd_wide_xy_ena_x (wrk_rd_wide_xy_ena_x),
.wrk_rd_wide_xy_bank_x (wrk_rd_wide_xy_bank_x),
@@ -976,10 +1009,11 @@ module modexpng_core_top
//
uop_exit_from_busy = 0;
//
- if (uop_opcode_is_in) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy;
- if (uop_opcode_is_out) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~mmm_ena & mmm_rdy);
- if (uop_opcode_is_mmm) uop_exit_from_busy = ~mmm_ena & mmm_rdy ;
- if (uop_opcode_is_wrk) uop_exit_from_busy = ~wrk_ena & wrk_rdy ;
+ if (uop_opcode_is_in ) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy;
+ if (uop_opcode_is_out ) uop_exit_from_busy = (~io_mgr_ena & io_mgr_rdy) & (~wrk_ena & wrk_rdy);
+ if (uop_opcode_is_mmm ) uop_exit_from_busy = ~mmm_ena & mmm_rdy;
+ if (uop_opcode_is_wrk ) uop_exit_from_busy = ~wrk_ena & wrk_rdy;
+ if (uop_opcode_is_ladder) uop_exit_from_busy = ~io_mgr_ena & io_mgr_rdy;
//
end
@@ -995,17 +1029,22 @@ module modexpng_core_top
mmm_ena_y <= 1'b0;
wrk_ena <= 1'b0;
end else begin
- io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in || uop_opcode_is_out) : 1'b0;
- mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
- mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
- wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk || uop_opcode_is_out) : 1'b0;
+ io_mgr_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_in ||
+ uop_opcode_is_out ||
+ uop_opcode_is_ladder): 1'b0;
+ mmm_ena_x <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+ mmm_ena_y <= uop_fsm_state == UOP_FSM_STATE_DECODE ? uop_opcode_is_mmm : 1'b0;
+ wrk_ena <= uop_fsm_state == UOP_FSM_STATE_DECODE ? (uop_opcode_is_wrk ||
+ uop_opcode_is_out ): 1'b0;
end
//
// Parameters
//
wire uop_aux_is_1 = uop_data_aux == UOP_AUX_1;
-
+
+ // TODO: Add reset to default don't care values.
+
always @(posedge clk)
//
if (uop_fsm_state == UOP_FSM_STATE_DECODE) begin
@@ -1044,9 +1083,10 @@ module modexpng_core_top
UOP_LADDER_00: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b00;
UOP_LADDER_11: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'b11;
UOP_LADDER_D: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX;
- UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= 2'bXX;
+ UOP_LADDER_PQ: {mmm_ladder_mode_x, mmm_ladder_mode_y} <= {io_mgr_ladder_p, io_mgr_ladder_q};
endcase
//
+ {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b0}};
{mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{uop_aux_is_1 ? 1'b0 : 1'b1}};
{mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{uop_data_sel_wide_in }};
{mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{uop_data_sel_narrow_in }};
@@ -1055,24 +1095,42 @@ module modexpng_core_top
//
end
//
+ UOP_OPCODE_MODULAR_REDUCE_PROC: begin
+ //
+ {mmm_ladder_mode_x, mmm_ladder_mode_y } <= {2{1'bX }};
+ //
+ {mmm_only_reduce_x, mmm_only_reduce_y } <= {2{1'b1 }};
+ {mmm_force_unity_b_x, mmm_force_unity_b_y } <= {2{1'b0 }};
+ {mmm_sel_wide_in_x, mmm_sel_wide_in_y } <= {2{BANK_DNC }};
+ {mmm_sel_narrow_in_x, mmm_sel_narrow_in_y } <= {2{BANK_DNC }};
+ {rdct_sel_wide_out_x, rdct_sel_wide_out_y } <= {2{uop_data_sel_wide_out }};
+ {rdct_sel_narrow_out_x, rdct_sel_narrow_out_y} <= {2{uop_data_sel_narrow_out}};
+ //
+ end
+ //
UOP_OPCODE_PROPAGATE_CARRIES: begin
wrk_sel_narrow_in <= uop_data_sel_narrow_in;
wrk_sel_narrow_out <= uop_data_sel_narrow_out;
end
//
- UOP_OPCODE_COPY_CRT_Y2X: begin
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_COPY_LADDERS_X2Y: begin
wrk_sel_wide_in <= uop_data_sel_wide_in;
wrk_sel_wide_out <= uop_data_sel_wide_out;
wrk_sel_narrow_in <= uop_data_sel_narrow_in;
wrk_sel_narrow_out <= uop_data_sel_narrow_out;
end
//
+ UOP_OPCODE_MODULAR_REDUCE_INIT: begin
+ wrk_sel_narrow_in <= uop_data_sel_narrow_in;
+ end
+ //
endcase
//
end
//
- // Length
+ // Lengths
//
wire [OP_ADDR_W -1:0] word_index_last_n_minus1 = word_index_last_n - 1'b1;
wire [OP_ADDR_W -1:0] word_index_last_pq_minus1 = word_index_last_pq - 1'b1;
@@ -1086,7 +1144,10 @@ module modexpng_core_top
case (uop_data_opcode)
//
UOP_OPCODE_INPUT_TO_WIDE,
- UOP_OPCODE_INPUT_TO_NARROW: io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+ UOP_OPCODE_INPUT_TO_NARROW,
+ UOP_OPCODE_OUTPUT_FROM_NARROW:
+ //
+ io_mgr_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
//
UOP_OPCODE_MODULAR_MULTIPLY: begin
{mmm_word_index_last_x, mmm_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }};
@@ -1094,8 +1155,31 @@ module modexpng_core_top
{rdct_word_index_last_x, rdct_word_index_last_y } <= {2{uop_npq_is_n ? word_index_last_n : word_index_last_pq }};
end
//
- UOP_OPCODE_PROPAGATE_CARRIES:
- wrk_word_index_last = uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT: begin
+ wrk_word_index_last <= word_index_last_n;
+ wrk_word_index_last_half <= word_index_last_pq;
+ end
+ //
+ UOP_OPCODE_MODULAR_REDUCE_PROC: begin
+ {mmm_word_index_last_x, mmm_word_index_last_y } <= {2{word_index_last_pq }};
+ {mmm_word_index_last_minus1_x, mmm_word_index_last_minus1_y} <= {2{word_index_last_pq_minus1}};
+ {rdct_word_index_last_x, rdct_word_index_last_y } <= {2{word_index_last_pq }};
+ end
+ //
+ UOP_OPCODE_LADDER_INIT: begin
+ io_mgr_word_index_last <= OP_ADDR_LADDER_LAST;
+ io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n;
+ end
+ //
+ UOP_OPCODE_LADDER_STEP: begin
+ io_mgr_word_index_last <= OP_ADDR_LADDER_LAST;
+ io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n;
+ end
//
endcase
//
@@ -1140,8 +1224,8 @@ module modexpng_core_top
UOP_FSM_STATE_IDLE: valid_reg <= ~next;
UOP_FSM_STATE_DECODE: valid_reg <= uop_opcode_is_stop;
endcase
-
-
+
+
//
// BEGIN DEBUG
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index c35f0b3..269ef98 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -14,6 +14,7 @@ module modexpng_general_worker
opcode,
word_index_last,
+ word_index_last_half,
wrk_rd_wide_xy_ena_x,
wrk_rd_wide_xy_bank_x,
@@ -88,6 +89,7 @@ module modexpng_general_worker
input [ UOP_OPCODE_W -1:0] opcode;
input [ OP_ADDR_W -1:0] word_index_last;
+ input [ OP_ADDR_W -1:0] word_index_last_half;
output wrk_rd_wide_xy_ena_x;
output [ BANK_ADDR_W -1:0] wrk_rd_wide_xy_bank_x;
@@ -141,18 +143,35 @@ module modexpng_general_worker
//
// FSM Declaration
//
- localparam [3:0] WRK_FSM_STATE_IDLE = 4'h0;
- localparam [3:0] WRK_FSM_STATE_LATENCY_PRE1 = 4'h1;
- localparam [3:0] WRK_FSM_STATE_LATENCY_PRE2 = 4'h2;
- localparam [3:0] WRK_FSM_STATE_BUSY = 4'h3;
- localparam [3:0] WRK_FSM_STATE_LATENCY_POST1 = 4'h5; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
- localparam [3:0] WRK_FSM_STATE_LATENCY_POST2 = 4'h6;
- localparam [3:0] WRK_FSM_STATE_STOP = 4'h7;
+ localparam [4:0] WRK_FSM_STATE_IDLE = 5'h00;
- reg [3:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
- reg [3:0] wrk_fsm_state_next_one_pass; // single address space sweep
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1 = 5'h01;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2 = 5'h02;
+ localparam [4:0] WRK_FSM_STATE_BUSY = 5'h03;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST1 = 5'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST2 = 5'h06;
+
+ localparam [4:0] WRK_FSM_STATE_STOP = 5'h07;
+
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 5'h10;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 5'h11;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 5'h12;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 5'h13;
+ localparam [4:0] WRK_FSM_STATE_BUSY_M1 = 5'h14;
+ localparam [4:0] WRK_FSM_STATE_BUSY_M2 = 5'h15;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 5'h16;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 5'h17;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 5'h18;
+ localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 5'h19;
+
+ reg [4:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
+ reg [4:0] wrk_fsm_state_next_one_pass; // single address space sweep
+ reg [4:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y)
+ // TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps)
+
+
//
// Control Signals
//
@@ -244,32 +263,62 @@ module modexpng_general_worker
//
// Delays
//
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
- //reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly1;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly2;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly3;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_dly4;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly1;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly2;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly3;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_dly4;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly1;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly2;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly3;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_dly4;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly1;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly2;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly3;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_dly4;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_x_dly3;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_wide_x_din_y_dly3;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3;
+
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3;
+
always @(posedge clk) begin
//
- //{rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
- //{rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
+ {rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
+ {rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
+ //
+ {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2};
+ {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2};
//
{rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
{rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
//
+ {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2};
+ {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2};
+ //
+ {wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x};
+ {wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y};
+ //
+ {wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x};
+ {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y};
+ //
end
-
-
- //
- // Handy Wires
- //
- wire rd_narrow_xy_addr_x_next_is_last;
- wire rd_narrow_xy_addr_y_next_is_last;
//
@@ -310,7 +359,8 @@ module modexpng_general_worker
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_OUTPUT_FROM_NARROW:
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
//
case (wrk_fsm_state_next_one_pass)
//
@@ -333,12 +383,30 @@ module modexpng_general_worker
WRK_FSM_STATE_LATENCY_PRE2,
WRK_FSM_STATE_BUSY: begin
//
- enable_narrow_xy_rd_en;
enable_wide_xy_rd_en;
+ enable_narrow_xy_rd_en;
//
end
//
endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state_next_one_pass_meander)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1,
+ WRK_FSM_STATE_LATENCY_PRE1_M2,
+ WRK_FSM_STATE_LATENCY_PRE2_M1,
+ WRK_FSM_STATE_LATENCY_PRE2_M2,
+ WRK_FSM_STATE_BUSY_M1,
+ WRK_FSM_STATE_BUSY_M2: begin
+ //
+ enable_wide_xy_rd_en;
+ enable_narrow_xy_rd_en;
+ //
+ end
+ //
+ endcase
//
endcase
//
@@ -389,8 +457,7 @@ module modexpng_general_worker
WRK_FSM_STATE_LATENCY_POST1,
WRK_FSM_STATE_LATENCY_POST2:
//
- enable_narrow_xy_wr_en;
- //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+ enable_narrow_xy_wr_en;
//
//
endcase
@@ -405,7 +472,34 @@ module modexpng_general_worker
//
enable_wide_xy_wr_en;
enable_narrow_xy_wr_en;
- //{wr_narrow_xy_ena_x, wr_narrow_xy_ena_y} <= {2{1'b1}};
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2:
+ //
+ enable_wide_xy_wr_en;
+ //
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ //
+ enable_wide_xy_wr_en;
+ enable_narrow_xy_wr_en;
//
end
//
@@ -424,7 +518,7 @@ module modexpng_general_worker
reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r;
reg [CARRY_W -1:0] rd_narrow_y_din_y_cry_r;
- wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};
+ wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry = wrk_rd_narrow_x_din_x + {{WORD_W{1'b0}}, rd_narrow_x_din_x_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry = wrk_rd_narrow_y_din_x + {{WORD_W{1'b0}}, rd_narrow_y_din_x_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r};
@@ -497,6 +591,45 @@ module modexpng_general_worker
end
//
endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ //
+ wr_wide_x_dout_x <= wrk_rd_wide_x_din_x_dly3;
+ wr_wide_y_dout_x <= wrk_rd_wide_x_din_x_dly2;
+ wr_wide_x_dout_y <= wrk_rd_wide_x_din_y_dly3;
+ wr_wide_y_dout_y <= wrk_rd_wide_x_din_y_dly2;
+ //
+ wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_x_dly3;
+ wr_narrow_y_dout_x <= wrk_rd_narrow_x_din_x_dly2;
+ wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y_dly3;
+ wr_narrow_y_dout_y <= wrk_rd_narrow_x_din_y_dly2;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ wr_wide_x_dout_x <= wrk_rd_narrow_x_din_x;
+ wr_wide_y_dout_x <= wrk_rd_narrow_y_din_x;
+ wr_wide_x_dout_y <= wrk_rd_narrow_x_din_y;
+ wr_wide_y_dout_y <= wrk_rd_narrow_y_din_y;
+ //
+ end
+ //
+ endcase
//
endcase
//
@@ -506,6 +639,9 @@ module modexpng_general_worker
//
// Write Address Logic
//
+ wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half;
+ wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half;
+
always @(posedge clk) begin
//
{wr_wide_xy_bank_x, wr_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC};
@@ -534,22 +670,64 @@ module modexpng_general_worker
//
endcase
//
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2: begin
+ //
+ wr_wide_xy_bank_x <= uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H;
+ wr_wide_xy_bank_y <= uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H;
+ //
+ wr_wide_xy_addr_x <= rd_wide_xy_addr_x_dly2;
+ wr_wide_xy_addr_y <= rd_wide_xy_addr_y_dly2;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ //
+ {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly4};
+ {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly4};
+ //
+ {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly4};
+ {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly4};
+ //
+ end
+ //
+ endcase
+ //
//
endcase
//
end
-
-
+
+
//
// Read Address Logic
//
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_next;
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_next;
+
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next;
reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next;
- assign rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
- assign rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
+ wire rd_wide_xy_addr_x_next_is_last = rd_wide_xy_addr_x_next == word_index_last_half;
+ wire rd_wide_xy_addr_y_next_is_last = rd_wide_xy_addr_y_next == word_index_last_half;
+
+ wire rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
+ wire rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
- always @(posedge clk) begin
+ always @(posedge clk) begin // TODO: Maybe split into two blocks (read address / next address)??
//
{rd_wide_xy_bank_x, rd_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ??
{rd_wide_xy_bank_y, rd_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
@@ -572,6 +750,9 @@ module modexpng_general_worker
{rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
{rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
//
+ rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+ //
rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
//
@@ -586,11 +767,113 @@ module modexpng_general_worker
{rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
{rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
//
+ rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
+ rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+ //
+ rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
+ rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ case (wrk_fsm_state_next_one_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
+ //
+ rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x_next};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y_next};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
+ //
+ rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
+ rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+ //
+ rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
+ rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
+ //
+ end
+ //
+ endcase
+ //
+ UOP_OPCODE_COPY_LADDERS_X2Y:
+ //
+ case (wrk_fsm_state_next_one_pass_meander)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, OP_ADDR_ZERO};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, OP_ADDR_ZERO};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, OP_ADDR_ZERO};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, OP_ADDR_ZERO};
+ //
+ rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
+ rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
+ //
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M2: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
+ //
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE2_M1,
+ WRK_FSM_STATE_BUSY_M1: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_next};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_next};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_next};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_next};
+ //
+ rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
+ rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
+ //
rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
//
end
//
+ WRK_FSM_STATE_LATENCY_PRE2_M2,
+ WRK_FSM_STATE_BUSY_M2: begin
+ //
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
+ //
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
+ //
+ end
+ //
endcase
//
//
@@ -608,7 +891,9 @@ module modexpng_general_worker
else case (opcode)
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_COPY_CRT_Y2X: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_MODULAR_REDUCE_INIT: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
+ UOP_OPCODE_COPY_LADDERS_X2Y: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
default: wrk_fsm_state <= WRK_FSM_STATE_IDLE;
endcase
@@ -616,17 +901,20 @@ module modexpng_general_worker
//
// Busy Exit Logic
//
- reg wrk_fsm_done_one_pass = 1'b0;
+ reg wrk_fsm_done_one_pass = 1'b0;
+ reg wrk_fsm_done_one_pass_meander = 1'b0;
always @(posedge clk) begin
//
- wrk_fsm_done_one_pass <= 1'b0;
+ wrk_fsm_done_one_pass <= 1'b0;
+ wrk_fsm_done_one_pass_meander <= 1'b0;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_COPY_CRT_Y2X: begin
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_MODULAR_REDUCE_INIT: begin
//
if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin
//
@@ -637,6 +925,20 @@ module modexpng_general_worker
//
end
//
+ UOP_OPCODE_COPY_LADDERS_X2Y: begin
+ //
+ if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M2) begin
+ //
+ if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; // TODO: Check, whether both are necessary...
+ if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1;
+ //
+ end
+ //
+ if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M1)
+ wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander;
+ //
+ end
+ //
endcase
//
end
@@ -654,7 +956,31 @@ module modexpng_general_worker
WRK_FSM_STATE_BUSY: wrk_fsm_state_next_one_pass = wrk_fsm_done_one_pass ? WRK_FSM_STATE_LATENCY_POST1 : WRK_FSM_STATE_BUSY ;
WRK_FSM_STATE_LATENCY_POST1: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_LATENCY_POST2 ;
WRK_FSM_STATE_LATENCY_POST2: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_STOP ;
- WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
+ WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
+ default: wrk_fsm_state_next_one_pass = WRK_FSM_STATE_IDLE ;
+ endcase
+ //
+ end
+
+ always @* begin
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_IDLE: wrk_fsm_state_next_one_pass_meander = ena ? WRK_FSM_STATE_LATENCY_PRE1_M1 : WRK_FSM_STATE_IDLE ;
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE1_M2 ;
+ WRK_FSM_STATE_LATENCY_PRE1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M1 ;
+ WRK_FSM_STATE_LATENCY_PRE2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_PRE2_M2 ;
+ WRK_FSM_STATE_LATENCY_PRE2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M1 ;
+ WRK_FSM_STATE_BUSY_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_BUSY_M2 ;
+ WRK_FSM_STATE_BUSY_M2: wrk_fsm_state_next_one_pass_meander = wrk_fsm_done_one_pass_meander ? WRK_FSM_STATE_LATENCY_POST1_M1 : WRK_FSM_STATE_BUSY_M1 ;
+ WRK_FSM_STATE_LATENCY_POST1_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST1_M2 ;
+ WRK_FSM_STATE_LATENCY_POST1_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M1 ;
+ WRK_FSM_STATE_LATENCY_POST2_M1: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_LATENCY_POST2_M2 ;
+ WRK_FSM_STATE_LATENCY_POST2_M2: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_STOP ;
+ //
+ WRK_FSM_STATE_STOP: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ;
+ //
+ default: wrk_fsm_state_next_one_pass_meander = WRK_FSM_STATE_IDLE ;
endcase
//
end
diff --git a/rtl/modexpng_io_manager.v b/rtl/modexpng_io_manager.v
index dfbd676..59f4709 100644
--- a/rtl/modexpng_io_manager.v
+++ b/rtl/modexpng_io_manager.v
@@ -53,7 +53,13 @@ module modexpng_io_manager
io_out_dout,
wrk_narrow_x_din_x_trunc,
- wrk_narrow_x_din_y_trunc
+ wrk_narrow_x_din_y_trunc,
+
+ ladder_steps,
+ ladder_d,
+ ladder_p,
+ ladder_q,
+ ladder_done
);
//
@@ -120,6 +126,12 @@ module modexpng_io_manager
output [ WORD_W -1:0] wrk_narrow_x_din_x_trunc;
output [ WORD_W -1:0] wrk_narrow_x_din_y_trunc;
+
+ input [ BIT_INDEX_W -1:0] ladder_steps;
+ output ladder_d;
+ output ladder_p;
+ output ladder_q;
+ output ladder_done;
//
@@ -254,6 +266,10 @@ module modexpng_io_manager
wire opcode_is_output = opcode == UOP_OPCODE_OUTPUT_FROM_NARROW;
+ wire opcode_is_ladder_init = opcode == UOP_OPCODE_LADDER_INIT;
+ wire opcode_is_ladder_step = opcode == UOP_OPCODE_LADDER_STEP;
+ wire opcode_is_ladder = opcode_is_ladder_init || opcode_is_ladder_step;
+
wire opcode_is_input_wide = opcode == UOP_OPCODE_INPUT_TO_WIDE;
wire opcode_is_input_narrow = opcode == UOP_OPCODE_INPUT_TO_NARROW;
@@ -269,8 +285,90 @@ module modexpng_io_manager
wire in_1_addr_op_next_is_last;
wire in_2_addr_op_next_is_last;
+ wire in_2_addr_op_next_is_one;
wire dummy_addr_op_next_is_last;
+
+ //
+ // Ladder Init/Step Logic
+ //
+ reg ladder_d_r;
+ reg ladder_p_r;
+ reg ladder_q_r;
+ reg ladder_done_r = 1'b0;
+
+ assign ladder_d = ladder_d_r;
+ assign ladder_p = ladder_p_r;
+ assign ladder_q = ladder_q_r;
+ assign ladder_done = ladder_done_r;
+
+ reg [BIT_INDEX_W -1:0] ladder_index;
+ reg [BIT_INDEX_W -1:0] ladder_index_next;
+ wire [ OP_ADDR_W -1:0] ladder_index_msb = ladder_index[BIT_INDEX_W-1-: OP_ADDR_W];
+ wire [ WORD_MUX_W -1:0] ladder_index_lsb = ladder_index[ WORD_MUX_W-1-:WORD_MUX_W];
+ wire ladder_index_is_zero = ladder_index == BIT_INDEX_ZERO;
+
+ always @(posedge clk)
+ //
+ if (io_fsm_state_next == IO_FSM_STATE_LATENCY_PRE1) begin
+ //
+ if (opcode_is_ladder_init) begin
+ ladder_index <= ladder_steps;
+ ladder_index_next <= ladder_steps - 1'b1;
+ ladder_done_r <= 1'b0;
+ end
+ //
+ if (opcode_is_ladder_step) begin
+ ladder_index <= ladder_index_next;
+ ladder_index_next <= ladder_index_next - 1'b1;
+ if (ladder_index_is_zero) ladder_done_r <= 1'b1;
+ end
+ //
+ end
+
+
+ //
+ // Ladder Mux
+ //
+ reg ladder_dpq_mux;
+
+ always @(io_in_2_din, ladder_index_lsb)
+ //
+ case(ladder_index_lsb)
+ 4'b0000: ladder_dpq_mux = io_in_2_din[ 0];
+ 4'b0001: ladder_dpq_mux = io_in_2_din[ 1];
+ 4'b0010: ladder_dpq_mux = io_in_2_din[ 2];
+ 4'b0011: ladder_dpq_mux = io_in_2_din[ 3];
+ 4'b0100: ladder_dpq_mux = io_in_2_din[ 4];
+ 4'b0101: ladder_dpq_mux = io_in_2_din[ 5];
+ 4'b0110: ladder_dpq_mux = io_in_2_din[ 6];
+ 4'b0111: ladder_dpq_mux = io_in_2_din[ 7];
+ 4'b1000: ladder_dpq_mux = io_in_2_din[ 8];
+ 4'b1001: ladder_dpq_mux = io_in_2_din[ 9];
+ 4'b1010: ladder_dpq_mux = io_in_2_din[10];
+ 4'b1011: ladder_dpq_mux = io_in_2_din[11];
+ 4'b1100: ladder_dpq_mux = io_in_2_din[12];
+ 4'b1101: ladder_dpq_mux = io_in_2_din[13];
+ 4'b1110: ladder_dpq_mux = io_in_2_din[14];
+ 4'b1111: ladder_dpq_mux = io_in_2_din[15];
+ endcase
+
+ always @(posedge clk)
+ //
+ case (io_fsm_state)
+ //
+ IO_FSM_STATE_BUSY:
+ if (opcode_is_ladder) ladder_d_r <= ladder_dpq_mux;
+ //
+ IO_FSM_STATE_LATENCY_POST1:
+ if (opcode_is_ladder) ladder_p_r <= ladder_dpq_mux;
+ //
+ IO_FSM_STATE_LATENCY_POST2:
+ if (opcode_is_ladder) ladder_q_r <= ladder_dpq_mux;
+ //
+ endcase
+
+
//
// Source Enable Logic
//
@@ -284,8 +382,8 @@ module modexpng_io_manager
IO_FSM_STATE_LATENCY_PRE1,
IO_FSM_STATE_LATENCY_PRE2,
IO_FSM_STATE_BUSY: begin
- in_1_en <= opcode_is_input && sel_aux_is_1;
- in_2_en <= opcode_is_input && sel_aux_is_2;
+ in_1_en <= opcode_is_input && sel_aux_is_1;
+ in_2_en <= (opcode_is_input && sel_aux_is_2) || opcode_is_ladder;
end
//
IO_FSM_STATE_EXTRA: begin
@@ -450,35 +548,59 @@ module modexpng_io_manager
wire [OP_ADDR_W -1:0] dummy_addr_op_next = dummy_addr_next;
assign in_1_addr_op_next_is_last = in_1_addr_op_next == word_index_last;
- assign in_2_addr_op_next_is_last = in_2_addr_op_next == word_index_last;
+ assign in_2_addr_op_next_is_last = in_2_addr_op_next == word_index_last;
+ assign in_2_addr_op_next_is_one = in_2_addr_op_next == OP_ADDR_ONE;
assign dummy_addr_op_next_is_last = dummy_addr_op_next == word_index_last;
- always @(posedge clk)
+ always @(posedge clk) begin
+ //
+ {in_1_addr_bank, in_1_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
+ {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
+ { dummy_addr_op} <= { OP_ADDR_DNC};
+ //
+ in_1_addr_next <= {BANK_DNC, OP_ADDR_DNC};
+ in_2_addr_next <= {BANK_DNC, OP_ADDR_DNC};
+ dummy_addr_next <= { OP_ADDR_DNC};
//
case (io_fsm_state_next)
//
IO_FSM_STATE_LATENCY_PRE1: begin
//
- {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO};
- {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO};
- { dummy_addr_op} <= { OP_ADDR_ZERO};
+ {in_1_addr_bank, in_1_addr_op } <= {sel_in, OP_ADDR_ZERO};
+ if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= {sel_in, OP_ADDR_ZERO};
+ else {in_2_addr_bank, in_2_addr_op } <= {BANK_DNC, OP_ADDR_DNC};
+ { dummy_addr_op} <= { OP_ADDR_ZERO};
+ //
+ in_1_addr_next <= {sel_in, OP_ADDR_ONE};
+ in_2_addr_next <= {sel_in, OP_ADDR_ONE};
+ dummy_addr_next <= { OP_ADDR_ONE};
+ //
+ end
+ //
+ IO_FSM_STATE_LATENCY_PRE2: begin
+ //
+ {in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
+ if (!opcode_is_ladder) {in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
+ else {in_2_addr_bank, in_2_addr_op } <= {BANK_IN_2_D, ladder_index_msb};
+ { dummy_addr_op} <= dummy_addr_next;
//
- in_1_addr_next <= {sel_in, OP_ADDR_ONE};
- in_2_addr_next <= {sel_in, OP_ADDR_ONE};
- dummy_addr_next <= { OP_ADDR_ONE};
+ in_1_addr_next <= in_1_addr_next + 1'b1;
+ if (!opcode_is_ladder) in_2_addr_next <= in_2_addr_next + 1'b1;
+ else in_2_addr_next <= {BANK_IN_2_P, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
+ dummy_addr_next <= dummy_addr_next + 1'b1;
//
end
//
- IO_FSM_STATE_LATENCY_PRE2,
IO_FSM_STATE_BUSY: begin
//
{in_1_addr_bank, in_1_addr_op } <= in_1_addr_next;
{in_2_addr_bank, in_2_addr_op } <= in_2_addr_next;
{ dummy_addr_op} <= dummy_addr_next;
//
- in_1_addr_next <= in_1_addr_next + 1'b1;
- in_2_addr_next <= in_2_addr_next + 1'b1;
- dummy_addr_next <= dummy_addr_next + 1'b1;
+ in_1_addr_next <= in_1_addr_next + 1'b1;
+ if (!opcode_is_ladder) in_2_addr_next <= in_2_addr_next + 1'b1;
+ else in_2_addr_next <= {BANK_IN_2_Q, 1'b1, ladder_index_msb[OP_ADDR_W-2:0]};
+ dummy_addr_next <= dummy_addr_next + 1'b1;
//
end
//
@@ -499,7 +621,8 @@ module modexpng_io_manager
end
//
endcase
-
+ //
+ end
//
@@ -525,7 +648,7 @@ module modexpng_io_manager
if (opcode_is_input) begin
if (sel_aux_is_1 && in_1_addr_op_next_is_last) io_fsm_done <= 1'b1;
if (sel_aux_is_2 && in_2_addr_op_next_is_last) io_fsm_done <= 1'b1;
- end else if (opcode_is_output) begin
+ end else if (opcode_is_output || opcode_is_ladder) begin
if (dummy_addr_op_next_is_last) io_fsm_done <= 1'b1;
end
//
@@ -571,4 +694,17 @@ module modexpng_io_manager
endcase
+ //
+ // BEGIN DEBUG
+ //
+ always @(posedge clk)
+ //
+ if ((io_fsm_state == IO_FSM_STATE_STOP) && opcode_is_ladder)
+ $display("[%4d] / %d / %d / %d", ladder_index, ladder_d_r, ladder_p_r, ladder_q_r);
+
+ //
+ // END DEBUG
+ //
+
+
endmodule
diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh
index 2e591e7..f68c559 100644
--- a/rtl/modexpng_microcode.vh
+++ b/rtl/modexpng_microcode.vh
@@ -1,8 +1,8 @@
-localparam UOP_OPCODE_W = 4;
+localparam UOP_OPCODE_W = 5;
localparam UOP_CRT_W = 1;
localparam UOP_NPQ_W = 1;
localparam UOP_AUX_W = 1;
-localparam UOP_LADDER_W = 1;
+localparam UOP_LADDER_W = 2;
localparam UOP_SEL_W = 4 * BANK_ADDR_W;
localparam UOP_ADDR_W = 6; // 64 instructions
@@ -11,17 +11,17 @@ localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_USING_CRT = 6'd0;
localparam [UOP_ADDR_W -1:0] UOP_ADDR_OFFSET_WITHOUT_CRT = 6'd31;
-// 4 1 1 1 2 4*3=12
+// 5 1 1 1 2 4*3=12
localparam UOP_W = UOP_OPCODE_W + UOP_CRT_W + UOP_NPQ_W + UOP_AUX_W + UOP_LADDER_W + UOP_SEL_W;
-// [20:17] [16] [15] [14] [13:12] [11:9][8:6][5:3][2:0]
+// [21:17] [16] [15] [14] [13:12] [11:9][8:6][5:3][2:0]
// OPCODE
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 4'd0;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_STOP = 5'd0;
/* all fields are don't care
*/
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 4'd1;
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_WIDE = 5'd1;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 5'd2;
/* CRT tells into which of the dual MMM to write
* NPQ specifies the width of the operand
* AUX specifies from which INPUT to read
@@ -31,7 +31,7 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_INPUT_TO_NARROW = 4'd2;
*
*/
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 5'd3;
/* CRT tells from which of the dual MMM to read
* NPQ specifies the width of the operand
* AUX is don't care
@@ -39,27 +39,45 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 4'd3;
* source and destination WIDE are don't care
*/
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 4'd4;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 5'd4;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_LADDERS_X2Y = 5'd5;
/* CRT is don't care
* NPQ specifies the width of the operand
* AUX is don't care
* LADDER is don't care
*/
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 4'd8;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 5'd8;
/* CRT is don't care
* NPQ specifies the width of the operand
* AUX = AUX_2 forces B input to 1 (AUX_1 reads from source NARROW as usual)
* LADDER specifies Montgomery ladder mode
*/
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_INIT = 5'd10;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_PROC = 5'd11;
+/* CRT
+ * NPQ
+ * AUX
+ * LADDER is don't care
+ */
+
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 4'd11;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_PROPAGATE_CARRIES = 5'd12;
/* CRT is don't care
* NPQ specifies the width of the operand
* AUX is don't care
* LADDER is don't care
* source and destination WIDE are don't care
- */
+ */
+
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_INIT = 5'd16;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_LADDER_STEP = 5'd17;
+/* CRT is don't care
+ * NPQ is don't care
+ * AUX is don't care
+ * LADDER is don't care
+ * WIDE and NARROW are don't care
+ */
// CRT
localparam [UOP_CRT_W -1:0] UOP_CRT_X = 1'b0;
diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v
index 14f1b47..6e52a97 100644
--- a/rtl/modexpng_mmm_dual.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -8,6 +8,7 @@ module modexpng_mmm_dual
word_index_last,
word_index_last_minus1,
force_unity_b,
+ only_reduce,
sel_wide_in, sel_narrow_in,
@@ -72,6 +73,7 @@ module modexpng_mmm_dual
input [7:0] word_index_last;
input [7:0] word_index_last_minus1;
input force_unity_b;
+ input only_reduce;
input [BANK_ADDR_W-1:0] sel_wide_in;
input [BANK_ADDR_W-1:0] sel_narrow_in;
@@ -120,7 +122,8 @@ module modexpng_mmm_dual
//
reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE;
reg [FSM_STATE_WIDTH-1:0] fsm_state_next;
-
+
+ wire [FSM_STATE_WIDTH-1:0] fsm_state_after_idle;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
@@ -905,16 +908,17 @@ module modexpng_mmm_dual
//
// FSM Transition Logic
//
+ assign fsm_state_after_idle = !only_reduce ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_INIT;
assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
- assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
+ assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
always @* begin
//
fsm_state_next = FSM_STATE_IDLE;
//
case (fsm_state)
- FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE;
+ FSM_STATE_IDLE: fsm_state_next = ena ? fsm_state_after_idle /*FSM_STATE_MULT_SQUARE_COL_0_INIT*/ : FSM_STATE_IDLE;
FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index 6e6c3ca..c7566ad 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -23,7 +23,7 @@ localparam MAC_W = 47;
localparam BUS_DATA_W = 32;
localparam BUS_OP_ADDR_W = cryptech_clog2(MAX_OP_W / BUS_DATA_W);
-
+localparam BIT_INDEX_W = cryptech_clog2(MAX_OP_W);
localparam BANK_ADDR_W = 3;
localparam OP_ADDR_W = cryptech_clog2(MAX_OP_W / WORD_W);
@@ -33,6 +33,8 @@ localparam MAC_INDEX_W = cryptech_clog2(NUM_MULTS);
localparam CARRY_W = WORD_EXT_W - WORD_W;
+localparam WORD_MUX_W = cryptech_clog2(WORD_W);
+
localparam [CARRY_W-1:0] CARRY_ZERO = {CARRY_W{1'b0}};
localparam [BANK_ADDR_W-1:0] BANK_WIDE_A = 3'd0;
@@ -80,6 +82,10 @@ localparam [BANK_ADDR_W-1:0] BANK_OUT_YM = 3'd2;
localparam [BANK_ADDR_W-1:0] BANK_DNC = {BANK_ADDR_W{1'bX}};
+localparam [OP_ADDR_W-1:0] OP_ADDR_LADDER_LAST = 3; // 0..3, i.e. <dummy>, D, P, Q
+
+localparam [BIT_INDEX_W-1:0] BIT_INDEX_ZERO = {BIT_INDEX_W{1'b0}};
+
localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_COEFF = 0;
localparam [OP_ADDR_W-1:0] OP_ADDR_EXT_Q = 1;
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
index 016b1b0..04f0c83 100644
--- a/rtl/modexpng_uop_rom.v
+++ b/rtl/modexpng_uop_rom.v
@@ -10,53 +10,67 @@ module modexpng_uop_rom
input wire clk;
input wire [UOP_ADDR_W -1:0] addr;
- output reg [UOP_W -1:0] data;
+ output reg [UOP_W -1:0] data;
always @(posedge clk)
//
case (addr)
- 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
- 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
- 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; //
- 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; //
- 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
- 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
+ 6'd00: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
+ 6'd01: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N, BANK_WIDE_N, BANK_DNC }; //
+ 6'd02: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_X, BANK_WIDE_A, BANK_DNC }; //
+ 6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; //
+ 6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
+ 6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
//
- 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
- 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
- 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
- 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
- 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
- 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
+ 6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
+ 6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
//
- 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; //
- 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
- 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; //
+ 6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; //
+ 6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; //
//
- 6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; //
+ 6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; //
//
- 6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; //
- 6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; //
+ 6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; //
+ 6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; //
//
- 6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
//
- 6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; //
+ 6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; //
//
- 6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; //
//
- 6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; //
- 6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; //
- 6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; //
- 6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; //
- 6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; //
+ 6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; //
+ 6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; //
+ 6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; //
+ 6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; //
+ 6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; //
//
- 6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
- 6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
- 6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; //
- 6'd29: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_DNC, BANK_NARROW_A }; //
- 6'd30: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_DNC, BANK_NARROW_E }; //
- //
- default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
+ 6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
+ 6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd29: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_DNC, BANK_NARROW_A }; //
+ 6'd30: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_DNC, BANK_NARROW_E }; //
+ //
+ 6'd31: data <= {UOP_OPCODE_MODULAR_REDUCE_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_DNC }; //
+ //
+ 6'd32: data <= {UOP_OPCODE_MODULAR_REDUCE_PROC, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; //
+ //
+ 6'd33: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_D, BANK_NARROW_A, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd34: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_C, BANK_NARROW_A, BANK_WIDE_D, BANK_NARROW_D }; //
+ 6'd35: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_A, BANK_DNC, BANK_WIDE_C, BANK_NARROW_C }; //
+ //
+ 6'd36: data <= {UOP_OPCODE_COPY_LADDERS_X2Y, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_C, BANK_NARROW_C }; //
+ //
+ 6'd37: data <= {UOP_OPCODE_LADDER_INIT, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
+ 6'd38: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_PQ, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; //
+ 6'd39: data <= {UOP_OPCODE_LADDER_STEP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
+ //
+ default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
endcase
endmodule