aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-21 12:44:33 +0300
committerPavel V. Shatov (Meister) <meisterpaul1@yandex.ru>2019-10-21 12:44:33 +0300
commit1e3303286bdb0d400d78d9d8b0aa90b29949c4a3 (patch)
treeee9755a626a594f1c97f2511023bb734b8527432
parent02247784f18dc683d5873a52c1650e72f02273b5 (diff)
Refactored general worker module
Added modular subtraction micro-operation
-rw-r--r--bench/tb_core_full.v2
-rw-r--r--bench/tb_square.v1379
-rw-r--r--rtl/modexpng_core_top.v20
-rw-r--r--rtl/modexpng_general_worker.v1180
-rw-r--r--rtl/modexpng_microcode.vh12
-rw-r--r--rtl/modexpng_uop_rom.v26
6 files changed, 768 insertions, 1851 deletions
diff --git a/bench/tb_core_full.v b/bench/tb_core_full.v
index 248634e..e592ac5 100644
--- a/bench/tb_core_full.v
+++ b/bench/tb_core_full.v
@@ -274,7 +274,7 @@ module tb_core_full;
word_index_last_pq = CORE_NUM_WORDS_PQ - 1;
bit_index_last_n = TB_MODULUS_LENGTH_N - 1;
- bit_index_last_pq = TB_MODULUS_LENGTH_N / 2 - 1;
+ bit_index_last_pq = 9;//TB_MODULUS_LENGTH_N / 2 - 1;
core_crt_mode = 1'b1;
diff --git a/bench/tb_square.v b/bench/tb_square.v
deleted file mode 100644
index 733e741..0000000
--- a/bench/tb_square.v
+++ /dev/null
@@ -1,1379 +0,0 @@
-`timescale 1ns / 1ps
-
-module tb_square;
-
-
- //
- // Headers
- //
- `include "../rtl/modexpng_parameters.vh"
- `include "../rtl/modexpng_parameters_x8.vh"
- `include "../rtl/modexpng_mmm_fsm.vh"
-
-
- //
- // Clock
- //
- `define CLK_FREQUENCY_MHZ 100.0
- `define CLK_PERIOD_NS (1000.0 / `CLK_FREQUENCY_MHZ)
- `define CLK_PERIOD_HALF_NS (0.5 * `CLK_PERIOD_NS)
-
- reg clk = 1'b0;
-
- always begin
- #`CLK_PERIOD_HALF_NS clk = 1'b1;
- #`CLK_PERIOD_HALF_NS clk = 1'b0;
- end
-
-
- //
- // Reset
- //
- reg rst = 1'b1;
-
-
-
- //
- // T1, T2
- //
- reg [17:0] T1[0:31];
- reg [17:0] T2[0:31];
- reg [17:0] AB[0:63];
- reg [17:0] N_COEFF[0:32];
- reg [17:0] Q[0:32];
- reg [17:0] N[0:31];
- reg [17:0] M[0:64];
-
-
- //
- // Init
- //
- initial begin
- //
- T1[ 0] = 18'h191c5; T1[ 1] = 18'h1a118; T1[ 2] = 18'h06e06; T1[ 3] = 18'h0ea69;
- T1[ 4] = 18'h12944; T1[ 5] = 18'h0c242; T1[ 6] = 18'h0fc64; T1[ 7] = 18'h14efe;
- T1[ 8] = 18'h113da; T1[ 9] = 18'h06ff7; T1[10] = 18'h0ef0d; T1[11] = 18'h18581;
- T1[12] = 18'h1a62c; T1[13] = 18'h052b7; T1[14] = 18'h114f7; T1[15] = 18'h1c53e;
- T1[16] = 18'h0c63e; T1[17] = 18'h0dd14; T1[18] = 18'h0fba8; T1[19] = 18'h1b8e6;
- T1[20] = 18'h0d944; T1[21] = 18'h10292; T1[22] = 18'h0d276; T1[23] = 18'h027b1;
- T1[24] = 18'h0c0c7; T1[25] = 18'h100a9; T1[26] = 18'h0a9ab; T1[27] = 18'h0e696;
- T1[28] = 18'h10798; T1[29] = 18'h0ae91; T1[30] = 18'h08d4d; T1[31] = 18'h0080b;
- //
- T2[ 0] = 18'h1193b; T2[ 1] = 18'h0de9c; T2[ 2] = 18'h0b993; T2[ 3] = 18'h0d2cd;
- T2[ 4] = 18'h106ad; T2[ 5] = 18'h076da; T2[ 6] = 18'h10cab; T2[ 7] = 18'h15cd5;
- T2[ 8] = 18'h15425; T2[ 9] = 18'h16287; T2[10] = 18'h0fd64; T2[11] = 18'h06ee0;
- T2[12] = 18'h1b0c9; T2[13] = 18'h01a5e; T2[14] = 18'h1855c; T2[15] = 18'h17bf9;
- T2[16] = 18'h1c83c; T2[17] = 18'h158ed; T2[18] = 18'h086df; T2[19] = 18'h16676;
- T2[20] = 18'h0a0f8; T2[21] = 18'h14545; T2[22] = 18'h09641; T2[23] = 18'h16863;
- T2[24] = 18'h17e20; T2[25] = 18'h0d457; T2[26] = 18'h05a9b; T2[27] = 18'h1a4cf;
- T2[28] = 18'h1582a; T2[29] = 18'h1686c; T2[30] = 18'h1394e; T2[31] = 18'h0bdbc;
- //
- N[ 0] = 18'h00f97; N[ 1] = 18'h018bb; N[ 2] = 18'h08a44; N[ 3] = 18'h00858;
- N[ 4] = 18'h06647; N[ 5] = 18'h0042c; N[ 6] = 18'h0fa09; N[ 7] = 18'h0c8d3;
- N[ 8] = 18'h0bbc7; N[ 9] = 18'h0e2dd; N[10] = 18'h017fd; N[11] = 18'h0ef4a;
- N[12] = 18'h002ef; N[13] = 18'h090c1; N[14] = 18'h032db; N[15] = 18'h028b1;
- N[16] = 18'h05f0a; N[17] = 18'h0ebfd; N[18] = 18'h017ca; N[19] = 18'h09587;
- N[20] = 18'h0d266; N[21] = 18'h0563c; N[22] = 18'h041af; N[23] = 18'h0433f;
- N[24] = 18'h08e83; N[25] = 18'h0bc19; N[26] = 18'h000b2; N[27] = 18'h05b53;
- N[28] = 18'h00e5d; N[29] = 18'h09bc5; N[30] = 18'h0a822; N[31] = 18'h0efff;
- //
- N_COEFF[ 0] = 18'h09fd9; N_COEFF[ 1] = 18'h0b367; N_COEFF[ 2] = 18'h0e467; N_COEFF[ 3] = 18'h0de24;
- N_COEFF[ 4] = 18'h02022; N_COEFF[ 5] = 18'h0f0e8; N_COEFF[ 6] = 18'h02919; N_COEFF[ 7] = 18'h09901;
- N_COEFF[ 8] = 18'h0da43; N_COEFF[ 9] = 18'h0023b; N_COEFF[10] = 18'h0ebf8; N_COEFF[11] = 18'h0f04e;
- N_COEFF[12] = 18'h0942f; N_COEFF[13] = 18'h029e9; N_COEFF[14] = 18'h07cb0; N_COEFF[15] = 18'h08c25;
- N_COEFF[16] = 18'h04e60; N_COEFF[17] = 18'h05cdc; N_COEFF[18] = 18'h0dff7; N_COEFF[19] = 18'h0279b;
- N_COEFF[20] = 18'h0610d; N_COEFF[21] = 18'h0f04a; N_COEFF[22] = 18'h001dc; N_COEFF[23] = 18'h03429;
- N_COEFF[24] = 18'h0f78c; N_COEFF[25] = 18'h0c3e2; N_COEFF[26] = 18'h00ed8; N_COEFF[27] = 18'h039c0;
- N_COEFF[28] = 18'h02ac2; N_COEFF[29] = 18'h0f703; N_COEFF[30] = 18'h0c54e; N_COEFF[31] = 18'h022d9;
- N_COEFF[32] = 18'h0f994;
- //
- AB[ 0] = 18'h0c199; AB[ 1] = 18'h0957a; AB[ 2] = 18'h070ad; AB[ 3] = 18'h0e5a6;
- AB[ 4] = 18'h0fec9; AB[ 5] = 18'h00b73; AB[ 6] = 18'h09c72; AB[ 7] = 18'h0cdf0;
- AB[ 8] = 18'h08755; AB[ 9] = 18'h07560; AB[10] = 18'h084b1; AB[11] = 18'h0ad3f;
- AB[12] = 18'h074fe; AB[13] = 18'h04d74; AB[14] = 18'h00e16; AB[15] = 18'h0d3b3;
- AB[16] = 18'h0d418; AB[17] = 18'h02f12; AB[18] = 18'h0c301; AB[19] = 18'h0be2b;
- AB[20] = 18'h08222; AB[21] = 18'h0056c; AB[22] = 18'h01c7c; AB[23] = 18'h0bc95;
- AB[24] = 18'h03427; AB[25] = 18'h0c65a; AB[26] = 18'h089ac; AB[27] = 18'h02117;
- AB[28] = 18'h0ff7d; AB[29] = 18'h01cde; AB[30] = 18'h02709; AB[31] = 18'h01c56;
- AB[32] = 18'h0f35a; AB[33] = 18'h08ce6; AB[34] = 18'h0a8e5; AB[35] = 18'h0d6d4;
- AB[36] = 18'h06868; AB[37] = 18'h09105; AB[38] = 18'h0219e; AB[39] = 18'h0bc40;
- AB[40] = 18'h00e0a; AB[41] = 18'h07783; AB[42] = 18'h0187a; AB[43] = 18'h0b922;
- AB[44] = 18'h02609; AB[45] = 18'h0c64b; AB[46] = 18'h06b4b; AB[47] = 18'h04b79;
- AB[48] = 18'h0fed6; AB[49] = 18'h03eac; AB[50] = 18'h04cac; AB[51] = 18'h0d47d;
- AB[52] = 18'h045fd; AB[53] = 18'h04fa8; AB[54] = 18'h0597c; AB[55] = 18'h0a10d;
- AB[56] = 18'h0bf44; AB[57] = 18'h08671; AB[58] = 18'h0112a; AB[59] = 18'h08ccf;
- AB[60] = 18'h0cae5; AB[61] = 18'h04d94; AB[62] = 18'h0b95a; AB[63] = 18'h00040;
- //
- Q[ 0] = 18'h021b1; Q[ 1] = 18'h0d2db; Q[ 2] = 18'h0754b; Q[ 3] = 18'h01fc1;
- Q[ 4] = 18'h063f7; Q[ 5] = 18'h086e5; Q[ 6] = 18'h0bcea; Q[ 7] = 18'h02260;
- Q[ 8] = 18'h0c54c; Q[ 9] = 18'h0e298; Q[10] = 18'h05d07; Q[11] = 18'h0f978;
- Q[12] = 18'h0e742; Q[13] = 18'h0a3f0; Q[14] = 18'h0b31e; Q[15] = 18'h041b7;
- Q[16] = 18'h06ed9; Q[17] = 18'h03ac5; Q[18] = 18'h0f8eb; Q[19] = 18'h0c619;
- Q[20] = 18'h067e9; Q[21] = 18'h00350; Q[22] = 18'h00376; Q[23] = 18'h02ebf;
- Q[24] = 18'h0b125; Q[25] = 18'h05f7d; Q[26] = 18'h0f121; Q[27] = 18'h07ba4;
- Q[28] = 18'h03050; Q[29] = 18'h0642e; Q[30] = 18'h0c2fc; Q[31] = 18'h0dfcf;
- Q[32] = 18'h03f9e;
- //
- M[ 0] = 18'h03e67; M[ 1] = 18'h06a85; M[ 2] = 18'h08f52; M[ 3] = 18'h01a59;
- M[ 4] = 18'h00136; M[ 5] = 18'h0f48c; M[ 6] = 18'h0638d; M[ 7] = 18'h0320f;
- M[ 8] = 18'h078aa; M[ 9] = 18'h08a9f; M[10] = 18'h07b4e; M[11] = 18'h052c0;
- M[12] = 18'h08b01; M[13] = 18'h0b28b; M[14] = 18'h0f1e9; M[15] = 18'h02c4c;
- M[16] = 18'h02be7; M[17] = 18'h0d0ed; M[18] = 18'h03cfe; M[19] = 18'h041d4;
- M[20] = 18'h07ddd; M[21] = 18'h0fa93; M[22] = 18'h0e383; M[23] = 18'h0436a;
- M[24] = 18'h0cbd8; M[25] = 18'h039a5; M[26] = 18'h07653; M[27] = 18'h0dee8;
- M[28] = 18'h00082; M[29] = 18'h0e321; M[30] = 18'h0d8f6; M[31] = 18'h0e3a9;
- M[32] = 18'h00ca5; M[33] = 18'h035ed; M[34] = 18'h02b8f; M[35] = 18'h063bd;
- M[36] = 18'h0ec9f; M[37] = 18'h0b8bb; M[38] = 18'h00389; M[39] = 18'h0ca27;
- M[40] = 18'h0bea7; M[41] = 18'h0df1e; M[42] = 18'h0d685; M[43] = 18'h0cc1b;
- M[44] = 18'h036c4; M[45] = 18'h01ce9; M[46] = 18'h0c43b; M[47] = 18'h05f58;
- M[48] = 18'h02c77; M[49] = 18'h03a12; M[50] = 18'h0eea8; M[51] = 18'h0ac31;
- M[52] = 18'h05838; M[53] = 18'h093ac; M[54] = 18'h0fd54; M[55] = 18'h06e13;
- M[56] = 18'h002e2; M[57] = 18'h06af4; M[58] = 18'h0ea18; M[59] = 18'h083b3;
- M[60] = 18'h059f7; M[61] = 18'h016d3; M[62] = 18'h0c3ad; M[63] = 18'h0dbfc;
- M[64] = 18'h03ba4;
- //
- end
-
-
- //
- // BRAMs
- //
- reg tb_fat_bram_xy_ena = 1'b0;
- reg [ 2:0] tb_fat_bram_xy_bank;
- reg [ 7:0] tb_fat_bram_xy_addr;
- reg [17:0] tb_fat_bram_x_din;
- reg [17:0] tb_fat_bram_y_din;
-
- reg mgr_fat_bram_xy_ena = 1'b0;
- reg [ 2:0] mgr_fat_bram_xy_bank;
- reg [ 7:0] mgr_fat_bram_xy_addr;
- reg [17:0] mgr_fat_bram_x_din;
- reg [17:0] mgr_fat_bram_y_din;
-
- reg mac_fat_bram_xy_ena = 1'b0;
- reg mac_fat_bram_xy_ena_aux = 1'b0;
- reg mac_fat_bram_xy_reg_ena = 1'b0;
- reg mac_fat_bram_xy_reg_ena_aux = 1'b0;
- reg [ 2:0] mac_fat_bram_xy_bank;
- reg [ 2:0] mac_fat_bram_xy_bank_aux;
- reg [ 7:0] mac_fat_bram_xy_addr[0:4];
- wire [17:0] mac_fat_bram_x_dout[0:4];
- wire [17:0] mac_fat_bram_y_dout[0:4];
- wire [ 7:0] mac_fat_bram_xy_addr_aux = mac_fat_bram_xy_addr[4]; // handy for debug
- wire [17:0] mac_fat_bram_x_dout_aux = mac_fat_bram_x_dout[4]; // handy for debug
- wire [17:0] mac_fat_bram_y_dout_aux = mac_fat_bram_x_dout[4]; // handy for debug
-
- reg tb_slim_bram_xy_ena = 1'b0;
- reg [ 1:0] tb_slim_bram_xy_bank;
- reg [ 7:0] tb_slim_bram_xy_addr;
- reg [17:0] tb_slim_bram_x_din;
- reg [17:0] tb_slim_bram_y_din;
-
- reg mgr_slim_bram_xy_ena = 1'b0;
- reg [ 1:0] mgr_slim_bram_xy_bank;
- reg [ 7:0] mgr_slim_bram_xy_addr;
- reg [17:0] mgr_slim_bram_x_din;
- reg [17:0] mgr_slim_bram_y_din;
-
- reg mac_slim_bram_xy_ena = 1'b0;
- reg mac_slim_bram_xy_reg_ena = 1'b0;
- reg [ 1:0] mac_slim_bram_xy_bank;
- reg [ 7:0] mac_slim_bram_xy_addr;
- reg [ 7:0] mac_slim_bram_xy_addr_dly;
- wire [17:0] mac_slim_bram_x_dout;
- wire [17:0] mac_slim_bram_y_dout;
-
- always @(posedge clk)
- //
- mac_slim_bram_xy_addr_dly <= mac_slim_bram_xy_addr;
-
- reg mac_slim_bram_xy_reg_ena_dly = 1'b0;
- always @(posedge clk)
- mac_slim_bram_xy_reg_ena_dly <= mac_slim_bram_xy_reg_ena;
-
-
-
- genvar z;
- generate for (z=0; z<((NUM_MULTS/2)+1); z=z+1)
- begin : gen_fat_bram
- //
- ip_bram_36k fat_bram_x
- (
- .clka (clk),
- .ena (mgr_fat_bram_xy_ena),
- .wea (mgr_fat_bram_xy_ena),
- .addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}),
- .dina (mgr_fat_bram_x_din),
-
- .clkb (clk),
- .enb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_ena : mac_fat_bram_xy_ena_aux),
- .regceb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_reg_ena : mac_fat_bram_xy_reg_ena_aux),
- .addrb ({(z < (NUM_MULTS/2) ?
- mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux), mac_fat_bram_xy_addr[z]}),
- .doutb (mac_fat_bram_x_dout[z])
- );
- //
- ip_bram_36k fat_bram_y
- (
- .clka (clk),
- .ena (mgr_fat_bram_xy_ena),
- .wea (mgr_fat_bram_xy_ena),
- .addra ({mgr_fat_bram_xy_bank, mgr_fat_bram_xy_addr}),
- .dina (mgr_fat_bram_y_din),
-
- .clkb (clk),
- .enb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_ena : mac_fat_bram_xy_ena_aux),
- .regceb (z < (NUM_MULTS/2) ? mac_fat_bram_xy_reg_ena : mac_fat_bram_xy_reg_ena_aux),
- .addrb ({z < (NUM_MULTS/2) ?
- mac_fat_bram_xy_bank : mac_fat_bram_xy_bank_aux, mac_fat_bram_xy_addr[z]}),
- .doutb (mac_fat_bram_y_dout[z])
- );
- //
- end
- endgenerate
-
- ip_bram_18k slim_bram_x
- (
- .clka (clk),
- .ena (mgr_slim_bram_xy_ena),
- .wea (mgr_slim_bram_xy_ena),
- .addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}),
- .dina (mgr_slim_bram_x_din),
-
- .clkb (clk),
- .enb (mac_slim_bram_xy_ena),
- .regceb (mac_slim_bram_xy_reg_ena),
- .addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}),
- .doutb (mac_slim_bram_x_dout)
- );
-
- ip_bram_18k slim_bram_y
- (
- .clka (clk),
- .ena (mgr_slim_bram_xy_ena),
- .wea (mgr_slim_bram_xy_ena),
- .addra ({mgr_slim_bram_xy_bank, mgr_slim_bram_xy_addr}),
- .dina (mgr_slim_bram_y_din),
-
- .clkb (clk),
- .enb (mac_slim_bram_xy_ena),
- .regceb (mac_slim_bram_xy_reg_ena),
- .addrb ({mac_slim_bram_xy_bank, mac_slim_bram_xy_addr}),
- .doutb (mac_slim_bram_y_dout)
- );
-
-
-
- //
- // Enable, Ready
- //
- reg ena = 1'b0;
-
- integer i;
- initial begin
-
- for (i=0; i<10; i=i+1)
- wait_clock_tick;
-
- rst = 1'b0;
-
- for (i=0; i<10; i=i+1)
- wait_clock_tick;
-
- tb_fat_bram_xy_ena = 1'b1;
- tb_slim_bram_xy_ena = 1'b1;
-
- for (i=0; i<32; i=i+1) begin
- tb_fat_bram_xy_bank = BANK_FAT_T1T2;
- tb_fat_bram_xy_addr = i[7:0];
- tb_fat_bram_x_din = T1[i];
- tb_fat_bram_y_din = T2[i];
-
- tb_slim_bram_xy_bank = BANK_SLIM_T1T2;
- tb_slim_bram_xy_addr = i[7:0];
- tb_slim_bram_x_din = T1[i];
- tb_slim_bram_y_din = T2[i];
-
- wait_clock_tick;
- end
-
- for (i=0; i<32; i=i+1) begin
- tb_slim_bram_xy_bank = BANK_SLIM_N_COEFF;
- tb_slim_bram_xy_addr = i[7:0];
- tb_slim_bram_x_din = N_COEFF[i];
- tb_slim_bram_y_din = N_COEFF[i];
-
- wait_clock_tick;
- end
- for (i=32; i<33; i=i+1) begin
- tb_slim_bram_xy_bank = BANK_SLIM_EXT;
- tb_slim_bram_xy_addr = 0; // !
- tb_slim_bram_x_din = N_COEFF[i];
- tb_slim_bram_y_din = N_COEFF[i];
-
- wait_clock_tick;
- end
-
- for (i=0; i<32; i=i+1) begin
- tb_fat_bram_xy_bank = BANK_FAT_N;
- tb_fat_bram_xy_addr = i[7:0];
- tb_fat_bram_x_din = N[i];
- tb_fat_bram_y_din = N[i];
-
- wait_clock_tick;
- end
-
- tb_fat_bram_xy_ena = 1'b0;
- tb_slim_bram_xy_ena = 1'b0;
-
- tb_fat_bram_xy_bank = {3{1'bX}};
- tb_fat_bram_xy_addr = {8{1'bX}};
- tb_fat_bram_x_din = {18{1'bX}};
- tb_fat_bram_y_din = {18{1'bX}};
-
- tb_slim_bram_xy_bank = {2{1'bX}};
- tb_slim_bram_xy_addr = {8{1'bX}};
- tb_slim_bram_x_din = {18{1'bX}};
- tb_slim_bram_y_din = {18{1'bX}};
-
- for (i=0; i<10; i=i+1)
- wait_clock_tick;
-
- ena = 1'b1;
- wait_clock_tick;
- ena = 1'b0;
-
- for (i=0; i<10000; i=i+1)
- wait_clock_tick;
-
- verify_ab;
- verify_q;
- verify_m;
-
- end
-
-
- //
- // DSPs
- //
- reg dsp_x_ce_a;
- reg dsp_x_ce_b;
- reg dsp_x_ce_b_dly;
- reg dsp_x_ce_m;
- reg dsp_x_ce_p;
- reg dsp_x_ce_mode;
-
- reg [9 -1:0] dsp_x_mode_z = {9{1'b1}};
-
- wire [5*18-1:0] dsp_x_a;
- reg [1*17-1:0] dsp_x_b;
- wire [9*47-1:0] dsp_x_p;
-
- reg dsp_y_ce_a;
- reg dsp_y_ce_b;
- reg dsp_y_ce_b_dly;
- reg dsp_y_ce_m;
- reg dsp_y_ce_p;
- reg dsp_y_ce_mode;
-
- reg [9 -1:0] dsp_y_mode_z = {9{1'b1}};
-
- wire [5*18-1:0] dsp_y_a;
- reg [1*17-1:0] dsp_y_b;
- wire [9*47-1:0] dsp_y_p;
-
- generate for (z=0; z<((NUM_MULTS/2)+1); z=z+1)
- begin : gen_dsp_xy_a_split
- assign dsp_x_a[18*z+:18] = mac_fat_bram_x_dout[z];
- assign dsp_y_a[18*z+:18] = mac_fat_bram_y_dout[z];
- end
- endgenerate
-
- always @(posedge clk)
- //
- {dsp_y_ce_b_dly, dsp_x_ce_b_dly} <= {dsp_y_ce_b, dsp_x_ce_b};
-
-
- reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}};
- reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}};
- reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}};
- reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}};
-
- dsp_array dsp_x
- (
- .clk (clk),
-
- .ce_a (dsp_x_ce_a),
- .ce_b (dsp_x_ce_b),
- .ce_m (dsp_x_ce_m),
- .ce_p (dsp_x_ce_p),
- .ce_mode (dsp_x_ce_mode),
-
- .mode_z (dsp_x_mode_z),
-
- .a (dsp_x_a),
- .b (dsp_x_b),
- .p (dsp_x_p)
- );
-
- dsp_array dsp_y
- (
- .clk (clk),
-
- .ce_a (dsp_y_ce_a),
- .ce_b (dsp_y_ce_b),
- .ce_m (dsp_y_ce_m),
- .ce_p (dsp_y_ce_p),
- .ce_mode (dsp_y_ce_mode),
-
- .mode_z (dsp_y_mode_z),
-
- .a (dsp_y_a),
- .b (dsp_y_b),
- .p (dsp_y_p)
- );
-
-
- //
- // FSM State and Next States
- //
- reg [FSM_STATE_WIDTH-1:0] fsm_state = FSM_STATE_IDLE;
- reg [FSM_STATE_WIDTH-1:0] fsm_state_next;
-
-
- always @(posedge clk)
- //
- if (rst) fsm_state <= FSM_STATE_IDLE;
- else fsm_state <= fsm_state_next;
-
-
- localparam [7:0] index_last = 8'd31;
- localparam [7:0] index_last_minus1 = index_last - 1'b1;
-
-
- //
- // Column
- //
- reg [4:0] col_index; // current column index
- reg [4:0] col_index_prev; // delayed column index value
- reg [4:0] col_index_last; // index of the very last column
- reg [4:0] col_index_next1; // precomputed next column index
- //reg [4:0] col_index_next2; // precomputed next column index after next column index
- reg col_is_last; // flag set during the very last column
-
- always @(posedge clk)
- //
- col_index_prev <= col_index;
-
-
- wire mult_square_addr_almost_done_comb;
- reg mult_square_addr_almost_done_flop;
- reg mult_square_addr_surely_done_flop;
-
- wire mult_triangle_addr_almost_done_comb;
- reg mult_triangle_addr_almost_done_flop;
- reg mult_triangle_addr_surely_done_flop;
- reg mult_triangle_addr_tardy_done_flop;
-
- wire mult_rectangle_addr_almost_done_comb;
- reg mult_rectangle_addr_almost_done_flop;
- reg mult_rectangle_addr_surely_done_flop;
- reg mult_rectangle_addr_tardy_done_flop;
-
-
- assign mult_square_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
- assign mult_triangle_addr_almost_done_comb = (mac_slim_bram_xy_addr[2:0] == index_last_minus1[2:0]) && (mac_slim_bram_xy_addr[7:3] == col_index);
- assign mult_rectangle_addr_almost_done_comb = mac_slim_bram_xy_addr == index_last_minus1;
-
-
-
-
- always @(posedge clk)
- //
- case (fsm_state)
-
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY:
- mult_square_addr_almost_done_flop <= mult_square_addr_almost_done_comb;
- //{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <=
- //{mult_square_addr_surely_done_comb, mult_square_addr_almost_done_comb};
- default:
- mult_square_addr_almost_done_flop <= 1'b0;
- //{mult_square_addr_surely_done_flop, mult_square_addr_almost_done_flop} <= 2'b00;
-
- endcase
-
- always @(posedge clk)
- //
- mult_square_addr_surely_done_flop <= mult_square_addr_almost_done_flop;
-
- always @(posedge clk)
- //
- case (fsm_state)
-
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:
- mult_triangle_addr_almost_done_flop <= mult_triangle_addr_almost_done_comb;
- //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <=
- //{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb};
-
- default:
- mult_triangle_addr_almost_done_flop <= 1'b0;
- //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00;
-
- endcase
-
- always @(posedge clk) begin
- //
- mult_triangle_addr_surely_done_flop <= mult_triangle_addr_almost_done_flop;
- mult_triangle_addr_tardy_done_flop <= mult_triangle_addr_surely_done_flop;
- //
- end
-
-
- always @(posedge clk)
- //
- case (fsm_state)
-
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
- mult_rectangle_addr_almost_done_flop <= mult_rectangle_addr_almost_done_comb;
- //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <=
- //{mult_triangle_addr_surely_done_comb, mult_triangle_addr_almost_done_comb};
-
- default:
- mult_rectangle_addr_almost_done_flop <= 1'b0;
- //{mult_triangle_addr_surely_done_flop, mult_triangle_addr_almost_done_flop} <= 2'b00;
-
- endcase
-
- always @(posedge clk) begin
- //
- mult_rectangle_addr_surely_done_flop <= mult_rectangle_addr_almost_done_flop;
- mult_rectangle_addr_tardy_done_flop <= mult_rectangle_addr_surely_done_flop;
- //
- end
-
-
- //
- // Recombinator Interface
- //
- wire [ 2:0] recomb_fat_bram_xy_bank;
- wire [ 7:0] recomb_fat_bram_xy_addr;
- wire [17:0] recomb_fat_bram_x_dout;
- wire [17:0] recomb_fat_bram_y_dout;
- wire recomb_fat_bram_xy_dout_valid;
- wire [ 2:0] recomb_slim_bram_xy_bank;
- wire [ 7:0] recomb_slim_bram_xy_addr;
- wire [17:0] recomb_slim_bram_x_dout;
- wire [17:0] recomb_slim_bram_y_dout;
- wire recomb_slim_bram_xy_dout_valid;
- wire recomb_rdy;
-
-
-
-
- //
- // FSM Transition Logic
- //
- wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_square;
- wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_triangle;
- wire [FSM_STATE_WIDTH-1:0] fsm_state_after_mult_rectangle;
-
-
- //
- // Slim - Address
- //
- always @(posedge clk)
- //
- case (fsm_state_next)
- //
- FSM_STATE_MULT_SQUARE_COL_0_INIT,
- FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_addr <= !mult_square_addr_almost_done_flop ? mac_slim_bram_xy_addr + 1'b1 : 8'd0;
- //
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_triangle_addr_almost_done_flop || (col_is_last && mult_triangle_addr_surely_done_flop) ?
- 8'd0 : mac_slim_bram_xy_addr + 1'b1;
- //
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_slim_bram_xy_addr <= 8'd0;
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_addr <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ?
- 8'd1 : mac_slim_bram_xy_addr + 1'b1;
- //
- default: mac_slim_bram_xy_addr <= 8'dX;
- endcase
-
-
- wire [2:0] fat_bram_offset_rom[0:3];
-
- generate for (z=1; z<NUM_MULTS; z=z+2)
- begin : gen_fat_bram_offset
- assign fat_bram_offset_rom[(z-1)/2] = z[2:0];
- end
- endgenerate
-
- //
- // Fat - Address
- //
- integer j;
- always @(posedge clk) begin
- //
- for (j=0; j<(NUM_MULTS/2); j=j+1)
- //
- case (fsm_state_next)
- //
- // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
- //
- FSM_STATE_MULT_SQUARE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
- FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
- //
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
- //
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[j] <= {5'd0, fat_bram_offset_rom[j]};
- FSM_STATE_MULT_RECTANGLE_COL_N_INIT: mac_fat_bram_xy_addr[j] <= {col_index_next1, fat_bram_offset_rom[j]};
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[j] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[j], index_last);
- //
- default: mac_fat_bram_xy_addr[j] <= 8'dX;
- endcase
- //
- case (fsm_state_next)
- //
- // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
- //
- FSM_STATE_MULT_SQUARE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
- FSM_STATE_MULT_SQUARE_COL_N_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
- //
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT: mac_fat_bram_xy_addr[4] <= {5'd0, 3'd1};
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_addr[4] <= mac_fat_bram_xy_addr_next(mac_fat_bram_xy_addr[4], index_last);
- //
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_addr[4] <= 8'dX;//{5'd0, 3'd0};
- FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
- FSM_STATE_MULT_RECTANGLE_HOLDOFF: mac_fat_bram_xy_addr[4] <= recomb_fat_bram_xy_dout_valid ? recomb_fat_bram_xy_addr : 8'dX;//recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ?
- //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4];
- //
- default: mac_fat_bram_xy_addr[4] <= 8'dX;
- endcase
-//
- end
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- //
- FSM_STATE_MULT_SQUARE_COL_0_INIT,
- FSM_STATE_MULT_SQUARE_COL_N_INIT,
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_bank <= BANK_SLIM_T1T2;
- //
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= col_is_last && (mult_triangle_addr_almost_done_flop || mult_triangle_addr_surely_done_flop) ?
- BANK_SLIM_EXT : BANK_SLIM_N_COEFF;
- //
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_bank <= mult_rectangle_addr_almost_done_flop || mult_rectangle_addr_surely_done_flop ?
- BANK_SLIM_EXT : BANK_SLIM_Q;
- //
- default: mac_slim_bram_xy_bank <= 2'bXX;
- endcase
-
- always @(posedge clk) begin
- //
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_INIT,
- FSM_STATE_MULT_SQUARE_COL_N_INIT,
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_T1T2;
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_fat_bram_xy_bank <= BANK_FAT_ABL;
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_ABL;
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_bank <= BANK_FAT_N;
- default: mac_fat_bram_xy_bank <= 3'bXXX;
- endcase
- //
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_INIT,
- FSM_STATE_MULT_SQUARE_COL_N_INIT,
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_fat_bram_xy_bank_aux <= BANK_FAT_T1T2;
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABH;
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABL;
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
- FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (recomb_fat_bram_xy_dout_valid)
- case (recomb_fat_bram_xy_bank)
- BANK_FAT_ML: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABL;
- BANK_FAT_MH: mac_fat_bram_xy_bank_aux <= BANK_FAT_ABH;
- BANK_FAT_EXT: mac_fat_bram_xy_bank_aux <= BANK_FAT_EXT;
- default: mac_fat_bram_xy_bank_aux <= 3'bXXX;
- endcase
- else mac_fat_bram_xy_bank_aux <= 3'bXXX;
- default: mac_fat_bram_xy_bank_aux <= 3'bXXX;
- endcase
- //
- end
-
-
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_INIT,
- FSM_STATE_MULT_SQUARE_COL_N_INIT,
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: mac_slim_bram_xy_ena <= ~mult_square_addr_almost_done_flop;
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_slim_bram_xy_ena <= !col_is_last ? ~mult_triangle_addr_almost_done_flop : ~mult_triangle_addr_surely_done_flop;
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: mac_slim_bram_xy_ena <= 1'b1;
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_slim_bram_xy_ena <= ~mult_rectangle_addr_surely_done_flop;
- default: mac_slim_bram_xy_ena <= 1'b0;
- endcase
-
- always @(posedge clk) begin
- //
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_INIT,
- FSM_STATE_MULT_SQUARE_COL_N_INIT,
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: mac_fat_bram_xy_ena <= 1'b1;
- default: mac_fat_bram_xy_ena <= 1'b0;
- endcase
- //
- case (fsm_state_next)
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: mac_fat_bram_xy_ena_aux <= 1'b1;
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT: mac_fat_bram_xy_ena_aux <= 1'b0;//1'b1;
- FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
- FSM_STATE_MULT_RECTANGLE_HOLDOFF: mac_fat_bram_xy_ena_aux <= recomb_fat_bram_xy_dout_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML);
- default: mac_fat_bram_xy_ena_aux <= 1'b0;
- endcase
- //
- end
-
- always @(posedge clk)
- //
- mac_slim_bram_xy_reg_ena <= mac_slim_bram_xy_ena;
-
- always @(posedge clk)
- //
- {mac_fat_bram_xy_reg_ena_aux, mac_fat_bram_xy_reg_ena} <= {mac_fat_bram_xy_ena_aux, mac_fat_bram_xy_ena};
-
- reg ladder_mode = 1'b1; // 0 = X:T1*T2, Y:T2*T2
- // 1 = X:T1*T2, Y:T2*T1
-
-
- reg dsp_swap_xy;
-
- always @(posedge clk)
- //
- case (fsm_state)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_swap_xy <= 1'b1;
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_swap_xy <= 1'b0;
- endcase
-
- always @(posedge clk)
- //
- if (mac_slim_bram_xy_reg_ena_dly) begin // rewrite
- if (!dsp_swap_xy)
- {dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]};
- else begin
- if (!ladder_mode) {dsp_y_b, dsp_x_b} <= {mac_slim_bram_x_dout[16:0], mac_slim_bram_y_dout[16:0]};
- else {dsp_y_b, dsp_x_b} <= {mac_slim_bram_y_dout[16:0], mac_slim_bram_x_dout[16:0]};
- end
- end
- else
- {dsp_y_b, dsp_x_b} <= {2{{17{1'bX}}}};
-
-
- function [7:0] mac_fat_bram_xy_addr_next;
- input [7:0] mac_fat_bram_xy_addr_current;
- input [7:0] mac_fat_bram_xy_addr_last;
- begin
- if (mac_fat_bram_xy_addr_current > 8'd0)
- mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_current - 1'b1;
- else
- mac_fat_bram_xy_addr_next = mac_fat_bram_xy_addr_last;
- end
- endfunction
-
-
-
- always @(posedge clk)
- //
- {dsp_y_ce_a, dsp_x_ce_a} <= {2{mac_slim_bram_xy_reg_ena | mac_slim_bram_xy_reg_ena_dly}};
-
- always @(posedge clk)
- //
- {dsp_y_ce_b, dsp_x_ce_b} <= {2{mac_slim_bram_xy_reg_ena_dly}};
-
- always @(posedge clk)
- //
- {dsp_y_ce_m, dsp_x_ce_m} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly};
-
- always @(posedge clk)
- //
- {dsp_y_ce_p, dsp_x_ce_p} <= {dsp_y_ce_m, dsp_x_ce_m};
-
- always @(posedge clk)
- //
- {dsp_y_ce_mode, dsp_x_ce_mode} <= {dsp_y_ce_b_dly, dsp_x_ce_b_dly};
-
- task wait_clock_tick;
- begin
- #`CLK_PERIOD_NS;
- end
- endtask
-
- //
- // Increment Logic
- //
- always @(posedge clk)
- //
- case (fsm_state_next)
- //
- FSM_STATE_MULT_SQUARE_COL_0_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
- col_index <= 5'd0;
- col_index_last <= index_last[7:3];
- col_index_next1 <= 5'd1;
- //col_index_next2 <= 5'd2;
- col_is_last <= 1'b0;
-
- end
- //
- FSM_STATE_MULT_SQUARE_COL_N_INIT,
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
- col_index <= col_index_next1;
- col_is_last <= col_index_next1 == col_index_last;
- col_index_next1 <= col_index_next1 == col_index_last ? 5'd0 : col_index_next1 + 5'd1;
- //col_index_next2 <= col_index_next2 + 1'b1;
- end
- //
- endcase
-
- assign fsm_state_after_mult_square = col_is_last ? FSM_STATE_MULT_SQUARE_HOLDOFF : FSM_STATE_MULT_SQUARE_COL_N_INIT;
- assign fsm_state_after_mult_triangle = col_is_last ? FSM_STATE_MULT_TRIANGLE_HOLDOFF : FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
- assign fsm_state_after_mult_rectangle = col_is_last ? FSM_STATE_MULT_RECTANGLE_HOLDOFF : FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}};
- FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, mac_slim_bram_xy_addr_dly);
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}};
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, mac_slim_bram_xy_addr_dly);
- default: dsp_xy_mode_z_adv4 <= {9{1'b1}};
- endcase
-
- always @(posedge clk) begin
- {dsp_y_mode_z, dsp_x_mode_z} <= {2{dsp_xy_mode_z_adv1}};
- //
- dsp_xy_mode_z_adv1 <= {dsp_xy_mode_z_adv2};
- dsp_xy_mode_z_adv2 <= {dsp_xy_mode_z_adv3};
- dsp_xy_mode_z_adv3 <= {dsp_xy_mode_z_adv4};
- end
-
- function [NUM_MULTS:0] calc_mac_mode_z_square;
- input [ 4:0] col_index_value;
- input [ 7:0] mac_slim_bram_xy_addr_value;
- begin
- if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
- case (mac_slim_bram_xy_addr_value[2:0])
- 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
- 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
- 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
- 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
- 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
- 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
- 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
- 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
- endcase
- else
- calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
- end
- endfunction
-
- function [NUM_MULTS:0] calc_mac_mode_z_rectangle;
- input [ 4:0] col_index_value;
- input [ 7:0] mac_slim_bram_xy_addr_value;
- begin
- if (mac_slim_bram_xy_addr_value[7:3] == col_index_value)
- case (mac_slim_bram_xy_addr_value[2:0])
- 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110};
- 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101};
- 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011};
- 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111};
- 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111};
- 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111};
- 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111};
- 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111};
- endcase
- else
- calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}};
- end
- endfunction
-
- reg recomb_x_ena = 1'b0;
- reg recomb_y_ena = 1'b0;
-
- always @(posedge clk) begin
- //
- recomb_x_ena <= dsp_x_ce_a && !dsp_x_ce_b && !dsp_x_ce_m && !dsp_x_ce_p;
- recomb_y_ena <= dsp_y_ce_a && !dsp_y_ce_b && !dsp_y_ce_m && !dsp_y_ce_p;
- //
- end
-
- modexpng_part_recombinator recomb
- (
- .clk (clk),
- .rdy (recomb_rdy),
- .fsm_state_next (fsm_state_next),
- .index_last (index_last),
- .dsp_x_ce_p (dsp_x_ce_p),
- .dsp_y_ce_p (dsp_y_ce_p),
- .ena_x (recomb_x_ena),
- .ena_y (recomb_y_ena),
- .dsp_x_p (dsp_x_p),
- .dsp_y_p (dsp_y_p),
- .col_index (col_index),
- .col_index_last (col_index_last),
- .slim_bram_xy_addr (mac_slim_bram_xy_addr),
- .slim_bram_xy_bank (mac_slim_bram_xy_bank),
- .rcmb_fat_bram_xy_bank (recomb_fat_bram_xy_bank),
- .rcmb_fat_bram_xy_addr (recomb_fat_bram_xy_addr),
- .rcmb_fat_bram_x_dout (recomb_fat_bram_x_dout),
- .rcmb_fat_bram_y_dout (recomb_fat_bram_y_dout),
- .rcmb_fat_bram_xy_dout_valid (recomb_fat_bram_xy_dout_valid),
- .rcmb_slim_bram_xy_bank (recomb_slim_bram_xy_bank),
- .rcmb_slim_bram_xy_addr (recomb_slim_bram_xy_addr),
- .rcmb_slim_bram_x_dout (recomb_slim_bram_x_dout),
- .rcmb_slim_bram_y_dout (recomb_slim_bram_y_dout),
- .rcmb_slim_bram_xy_dout_valid (recomb_slim_bram_xy_dout_valid)
- );
-
- reg [17:0] AB_READ[0:63];
- reg [17:0] Q_READ[0:32];
- reg [17:0] M_READ[0:64];
-
- always @(posedge clk) begin
- //
- if (recomb_fat_bram_xy_dout_valid)
- //
- case (recomb_fat_bram_xy_bank)
- BANK_FAT_ABL: AB_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout;
- BANK_FAT_ABH: AB_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
- BANK_FAT_ML: M_READ[recomb_fat_bram_xy_addr % 32] <= recomb_fat_bram_x_dout;
- BANK_FAT_MH: M_READ[32 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
- BANK_FAT_EXT: M_READ[64 + (recomb_fat_bram_xy_addr % 32)] <= recomb_fat_bram_x_dout;
- endcase
- //
- if (recomb_slim_bram_xy_dout_valid)
- //
- case (recomb_slim_bram_xy_bank)
- BANK_SLIM_Q: Q_READ[recomb_slim_bram_xy_addr] <= recomb_slim_bram_x_dout;
- BANK_SLIM_EXT: if (recomb_slim_bram_xy_addr == 8'd1)
- Q_READ[32] <= recomb_slim_bram_x_dout;
- endcase
- //
- end
-
-
- always @(posedge clk)
- //
- if (tb_fat_bram_xy_ena) begin
- mgr_fat_bram_xy_ena <= 1'b1;
- mgr_fat_bram_xy_bank <= tb_fat_bram_xy_bank;
- mgr_fat_bram_xy_addr <= tb_fat_bram_xy_addr;
- mgr_fat_bram_x_din <= tb_fat_bram_x_din;
- mgr_fat_bram_y_din <= tb_fat_bram_y_din;
- end else if (recomb_fat_bram_xy_dout_valid) begin
- mgr_fat_bram_xy_ena <= 1'b1;
- mgr_fat_bram_xy_bank <= recomb_fat_bram_xy_bank;
- mgr_fat_bram_xy_addr <= recomb_fat_bram_xy_addr;
- mgr_fat_bram_x_din <= recomb_fat_bram_x_dout;
- mgr_fat_bram_y_din <= recomb_fat_bram_y_dout;
- end else begin
- mgr_fat_bram_xy_ena <= 1'b0;
- mgr_fat_bram_xy_bank <= 3'bXXX;
- mgr_fat_bram_xy_addr <= 8'hXX;
- mgr_fat_bram_x_din <= {18{1'bX}};
- mgr_fat_bram_y_din <= {18{1'bX}};
- end
-
-
- always @(posedge clk)
- //
- if (tb_slim_bram_xy_ena) begin
- mgr_slim_bram_xy_ena <= 1'b1;
- mgr_slim_bram_xy_bank <= tb_slim_bram_xy_bank;
- mgr_slim_bram_xy_addr <= tb_slim_bram_xy_addr;
- mgr_slim_bram_x_din <= tb_slim_bram_x_din;
- mgr_slim_bram_y_din <= tb_slim_bram_y_din;
- end else if (recomb_slim_bram_xy_dout_valid) begin
- mgr_slim_bram_xy_ena <= 1'b1;
- mgr_slim_bram_xy_bank <= recomb_slim_bram_xy_bank;
- mgr_slim_bram_xy_addr <= recomb_slim_bram_xy_addr;
- mgr_slim_bram_x_din <= recomb_slim_bram_x_dout;
- mgr_slim_bram_y_din <= recomb_slim_bram_y_dout;
- end else begin
- mgr_slim_bram_xy_ena <= 1'b0;
- mgr_slim_bram_xy_bank <= 3'bXXX;
- mgr_slim_bram_xy_addr <= 8'hXX;
- mgr_slim_bram_x_din <= {18{1'bX}};
- mgr_slim_bram_y_din <= {18{1'bX}};
- end
-
-
- task verify_ab;
- reg verify_ab_ok;
- begin
- verify_ab_ok = 1;
- for (i=0; i<64; i=i+1)
- if (AB_READ[i] === AB[i])
- $display("AB / AB_READ [%02d] = 0x%05x / 0x%05x", i, AB[i], AB_READ[i]);
- else begin
- $display("AB / AB_READ [%02d] = 0x%05x / 0x%05x <???>", i, AB[i], AB_READ[i]);
- verify_ab_ok = 0;
- end
- if (verify_ab_ok)
- $display("AB is OK.");
- else
- $display("AB is WRONG!");
- end
- endtask
-
-
- task verify_q;
- reg verify_q_ok;
- begin
- verify_q_ok = 1;
- for (i=0; i<33; i=i+1)
- if (Q_READ[i] === Q[i])
- $display("Q / Q_READ [%02d] = 0x%05x / 0x%05x", i, Q[i], Q_READ[i]);
- else begin
- $display("Q / Q_READ [%02d] = 0x%05x / 0x%05x <???>", i, Q[i], Q_READ[i]);
- verify_q_ok = 0;
- end
- if (verify_q_ok)
- $display("Q is OK.");
- else
- $display("Q is WRONG!");
- end
- endtask
-
-
- task verify_m;
- reg verify_m_ok;
- begin
- verify_m_ok = 1;
- for (i=0; i<65; i=i+1)
- if (M_READ[i] === M[i])
- $display("M / M_READ [%02d] = 0x%05x / 0x%05x", i, M[i], M_READ[i]);
- else begin
- $display("M / M_READ [%02d] = 0x%05x / 0x%05x <???>", i, M[i], M_READ[i]);
- verify_m_ok = 0;
- end
- if (verify_m_ok)
- $display("M is OK.");
- else
- $display("M is WRONG!");
- end
- endtask
-
-
- wire mult_square_addr_done = mult_square_addr_surely_done_flop;
- wire mult_triangle_addr_done = !col_is_last ? mult_triangle_addr_surely_done_flop : mult_triangle_addr_tardy_done_flop;
- wire mult_rectangle_addr_done = mult_rectangle_addr_tardy_done_flop;
-
-
- always @* begin
- //
- fsm_state_next = FSM_STATE_IDLE;
- //
- case (fsm_state)
- FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_MULT_SQUARE_COL_0_INIT : FSM_STATE_IDLE;
-
- FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
- FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
- FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = mult_square_addr_done ? FSM_STATE_MULT_SQUARE_COL_N_INIT : FSM_STATE_MULT_SQUARE_COL_0_BUSY;
-
- FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
- FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
- FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = mult_square_addr_done ? fsm_state_after_mult_square : FSM_STATE_MULT_SQUARE_COL_N_BUSY;
-
- FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_MULT_TRIANGLE_COL_0_INIT : FSM_STATE_MULT_SQUARE_HOLDOFF;
-
- FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
- FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
- FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = mult_triangle_addr_done ? FSM_STATE_MULT_TRIANGLE_COL_N_INIT : FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;
-
- FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
- FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
- FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = mult_triangle_addr_done ? fsm_state_after_mult_triangle : FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
-
- FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_MULT_RECTANGLE_COL_0_INIT : FSM_STATE_MULT_TRIANGLE_HOLDOFF;
-
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
- FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
- FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = mult_rectangle_addr_done ? FSM_STATE_MULT_RECTANGLE_COL_N_INIT : FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;
-
- FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
- FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
- FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = mult_rectangle_addr_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
-
- FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = recomb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF;
-
- default: fsm_state_next = FSM_STATE_IDLE ;
-
- endcase
- //
- end
-
-
-
- //
- // Reductor
- //
- reg reductor_ena = 1'b0;
-
- always @(posedge clk)
- //
- if (!reductor_ena)
- case (fsm_state)
- FSM_STATE_MULT_RECTANGLE_COL_0_INIT: reductor_ena <= 1'b1;
- endcase
- else begin
-
-
- end
-
-
- reg recomb_fat_bram_xy_dout_valid_dly1;
- reg recomb_fat_bram_xy_dout_valid_dly2;
- reg recomb_fat_bram_xy_dout_valid_dly3;
-
- reg [2:0] recomb_fat_bram_xy_bank_dly1;
- reg [2:0] recomb_fat_bram_xy_bank_dly2;
- reg [2:0] recomb_fat_bram_xy_bank_dly3;
-
- reg [7:0] recomb_fat_bram_xy_addr_dly1;
- reg [7:0] recomb_fat_bram_xy_addr_dly2;
- reg [7:0] recomb_fat_bram_xy_addr_dly3;
-
- always @(posedge clk) begin
- //
- recomb_fat_bram_xy_dout_valid_dly1 <= recomb_fat_bram_xy_dout_valid;
- recomb_fat_bram_xy_dout_valid_dly2 <= recomb_fat_bram_xy_dout_valid_dly1;
- recomb_fat_bram_xy_dout_valid_dly3 <= recomb_fat_bram_xy_dout_valid_dly2;
- //
- end
-
- reg [17:0] recomb_fat_bram_x_dout_dly1;
- reg [17:0] recomb_fat_bram_x_dout_dly2;
- reg [17:0] recomb_fat_bram_x_dout_dly3;
-
- always @(posedge clk) begin
- //
- if (recomb_fat_bram_xy_dout_valid) recomb_fat_bram_x_dout_dly1 <= recomb_fat_bram_x_dout;
- if (recomb_fat_bram_xy_dout_valid_dly1) recomb_fat_bram_x_dout_dly2 <= recomb_fat_bram_x_dout_dly1;
- if (recomb_fat_bram_xy_dout_valid_dly2) recomb_fat_bram_x_dout_dly3 <= recomb_fat_bram_x_dout_dly2;
- //
- end
-
- always @(posedge clk) begin
- //
- if (recomb_fat_bram_xy_dout_valid) recomb_fat_bram_xy_bank_dly1 <= recomb_fat_bram_xy_bank;
- if (recomb_fat_bram_xy_dout_valid_dly1) recomb_fat_bram_xy_bank_dly2 <= recomb_fat_bram_xy_bank_dly1;
- if (recomb_fat_bram_xy_dout_valid_dly2) recomb_fat_bram_xy_bank_dly3 <= recomb_fat_bram_xy_bank_dly2;
- //
- end
-
- always @(posedge clk) begin
- //
- if (recomb_fat_bram_xy_dout_valid) recomb_fat_bram_xy_addr_dly1 <= recomb_fat_bram_xy_addr;
- if (recomb_fat_bram_xy_dout_valid_dly1) recomb_fat_bram_xy_addr_dly2 <= recomb_fat_bram_xy_addr_dly1;
- if (recomb_fat_bram_xy_dout_valid_dly2) recomb_fat_bram_xy_addr_dly3 <= recomb_fat_bram_xy_addr_dly2;
- //
- end
-
-
- reg [ 1:0] reductor_fat_bram_x_lsb_carry;
- reg [15:0] reductor_fat_bram_x_lsb_dummy;
- reg [17:0] reductor_fat_bram_x_lsb_dout;
-
- reg [17:0] reductor_fat_bram_x_msb_dout;
-
- always @(posedge clk)
- //
- if (!reductor_ena) begin
- reductor_fat_bram_x_lsb_carry <= 2'b00;
- end else if (recomb_fat_bram_xy_dout_valid_dly3) begin
-
- case (recomb_fat_bram_xy_bank_dly3)
- BANK_FAT_ML: {reductor_fat_bram_x_lsb_carry, reductor_fat_bram_x_lsb_dummy} <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_lsb_carry;
- BANK_FAT_MH:
- if (recomb_fat_bram_xy_addr_dly3 == 8'd0)
- {reductor_fat_bram_x_lsb_carry, reductor_fat_bram_x_lsb_dummy} <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_lsb_carry;
- else if (recomb_fat_bram_xy_addr_dly3 == 8'd1)
- reductor_fat_bram_x_msb_dout <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_lsb_carry;
- else
- reductor_fat_bram_x_msb_dout <= recomb_fat_bram_x_dout_dly3 + mac_fat_bram_x_dout_aux;
- BANK_FAT_EXT:
- reductor_fat_bram_x_msb_dout <= recomb_fat_bram_x_dout_dly3;
- endcase
- //
- end
- /*
-
-
- reg [17:0] recomb_fat_bram_x_dout_dly1;
- reg [17:0] recomb_fat_bram_x_dout_dly2;
-
- reg [ 2:0] recomb_fat_bram_xy_bank_dly1;
- reg [ 2:0] recomb_fat_bram_xy_bank_dly2;
-
- reg [1:0] reductor_fat_bram_x_carry;
-
- reg [15:0] reductor_fat_bram_x_dummy;
- reg [17:0] reductor_fat_bram_x_dout;
- reg reductor_fat_bram_xy_dout_valid;
-
- always @(posedge clk)
- //
- if (reductor_ena) begin
-
- if (recomb_fat_bram_xy_dout_valid) begin
- recomb_fat_bram_x_dout_dly1 <= recomb_fat_bram_x_dout;
- recomb_fat_bram_xy_bank_dly1 <= recomb_fat_bram_xy_bank;
- end
-
- if (mac_fat_bram_xy_ena_aux) begin
- recomb_fat_bram_x_dout_dly2 <= recomb_fat_bram_x_dout_dly1;
- recomb_fat_bram_xy_bank_dly2 <= recomb_fat_bram_xy_bank_dly1;
- end
-
- if (mac_fat_bram_xy_reg_ena_aux)
- case (recomb_fat_bram_xy_bank_dly2)
- BANK_FAT_ML: {reductor_fat_bram_x_carry, reductor_fat_bram_x_dummy} <= recomb_fat_bram_x_dout_dly2 + mac_fat_bram_x_dout_aux + reductor_fat_bram_x_carry;
- endcase
-
- //reductor_fat_bram_xy_dout_valid <= mac_fat_bram_xy_reg_ena_aux;
-
- end else begin
-
- reductor_fat_bram_x_carry <= 2'b00;
- reductor_fat_bram_xy_dout_valid <= 1'b0;
-
- end
- */
-
-
-
-endmodule
-
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
index c78a969..dea7f0a 100644
--- a/rtl/modexpng_core_top.v
+++ b/rtl/modexpng_core_top.v
@@ -87,7 +87,9 @@ module modexpng_core_top
wire uop_opcode_is_wrk = (uop_data_opcode == UOP_OPCODE_PROPAGATE_CARRIES ) ||
(uop_data_opcode == UOP_OPCODE_COPY_CRT_Y2X ) ||
(uop_data_opcode == UOP_OPCODE_MODULAR_REDUCE_INIT ) ||
- (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) ;
+ (uop_data_opcode == UOP_OPCODE_COPY_LADDERS_X2Y ) ||
+ (uop_data_opcode == UOP_OPCODE_CROSS_LADDERS_X2Y ) ||
+ (uop_data_opcode == UOP_OPCODE_MODULAR_SUBTRACT ) ;
wire uop_loop_now;
@@ -1113,8 +1115,15 @@ module modexpng_core_top
wrk_sel_narrow_out <= uop_data_sel_narrow_out;
end
//
+ UOP_OPCODE_MODULAR_SUBTRACT: begin
+ wrk_sel_wide_out <= uop_data_sel_wide_out;
+ wrk_sel_narrow_in <= uop_data_sel_narrow_in;
+ wrk_sel_narrow_out <= uop_data_sel_narrow_out;
+ end
+ //
UOP_OPCODE_COPY_CRT_Y2X,
- UOP_OPCODE_COPY_LADDERS_X2Y: begin
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
wrk_sel_wide_in <= uop_data_sel_wide_in;
wrk_sel_wide_out <= uop_data_sel_wide_out;
wrk_sel_narrow_in <= uop_data_sel_narrow_in;
@@ -1157,7 +1166,8 @@ module modexpng_core_top
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_COPY_CRT_Y2X,
- UOP_OPCODE_COPY_LADDERS_X2Y:
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y:
wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
//
UOP_OPCODE_MODULAR_REDUCE_INIT: begin
@@ -1171,6 +1181,10 @@ module modexpng_core_top
{rdct_word_index_last_x, rdct_word_index_last_y } <= {2{word_index_last_pq }};
end
//
+ UOP_OPCODE_MODULAR_SUBTRACT: begin
+ wrk_word_index_last <= uop_npq_is_n ? word_index_last_n : word_index_last_pq;
+ end
+ //
UOP_OPCODE_LADDER_INIT: begin
io_mgr_word_index_last <= OP_ADDR_LADDER_LAST;
io_mgr_ladder_steps <= crt_mode ? bit_index_last_pq : bit_index_last_n;
diff --git a/rtl/modexpng_general_worker.v b/rtl/modexpng_general_worker.v
index 269ef98..74c939b 100644
--- a/rtl/modexpng_general_worker.v
+++ b/rtl/modexpng_general_worker.v
@@ -1,70 +1,22 @@
module modexpng_general_worker
(
- clk,
- rst,
-
- ena,
- rdy,
-
- sel_narrow_in,
- sel_narrow_out,
- sel_wide_in,
- sel_wide_out,
-
+ clk, rst,
+ ena, rdy,
+ sel_narrow_in, sel_narrow_out,
+ sel_wide_in, sel_wide_out,
opcode,
-
- word_index_last,
- word_index_last_half,
-
- wrk_rd_wide_xy_ena_x,
- wrk_rd_wide_xy_bank_x,
- wrk_rd_wide_xy_addr_x,
- wrk_rd_wide_x_din_x,
- wrk_rd_wide_y_din_x,
-
- wrk_rd_narrow_xy_ena_x,
- wrk_rd_narrow_xy_bank_x,
- wrk_rd_narrow_xy_addr_x,
- wrk_rd_narrow_x_din_x,
- wrk_rd_narrow_y_din_x,
-
- wrk_rd_wide_xy_ena_y,
- wrk_rd_wide_xy_bank_y,
- wrk_rd_wide_xy_addr_y,
- wrk_rd_wide_x_din_y,
- wrk_rd_wide_y_din_y,
-
- wrk_rd_narrow_xy_ena_y,
- wrk_rd_narrow_xy_bank_y,
- wrk_rd_narrow_xy_addr_y,
- wrk_rd_narrow_x_din_y,
- wrk_rd_narrow_y_din_y,
-
- wrk_wr_wide_xy_ena_x,
- wrk_wr_wide_xy_bank_x,
- wrk_wr_wide_xy_addr_x,
- wrk_wr_wide_x_dout_x,
- wrk_wr_wide_y_dout_x,
-
- wrk_wr_narrow_xy_ena_x,
- wrk_wr_narrow_xy_bank_x,
- wrk_wr_narrow_xy_addr_x,
- wrk_wr_narrow_x_dout_x,
- wrk_wr_narrow_y_dout_x,
-
- wrk_wr_wide_xy_ena_y,
- wrk_wr_wide_xy_bank_y,
- wrk_wr_wide_xy_addr_y,
- wrk_wr_wide_x_dout_y,
- wrk_wr_wide_y_dout_y,
-
- wrk_wr_narrow_xy_ena_y,
- wrk_wr_narrow_xy_bank_y,
- wrk_wr_narrow_xy_addr_y,
- wrk_wr_narrow_x_dout_y,
- wrk_wr_narrow_y_dout_y
+ word_index_last, word_index_last_half,
+ wrk_rd_wide_xy_ena_x, wrk_rd_wide_xy_bank_x, wrk_rd_wide_xy_addr_x, wrk_rd_wide_x_din_x, wrk_rd_wide_y_din_x,
+ wrk_rd_narrow_xy_ena_x, wrk_rd_narrow_xy_bank_x, wrk_rd_narrow_xy_addr_x, wrk_rd_narrow_x_din_x, wrk_rd_narrow_y_din_x,
+ wrk_rd_wide_xy_ena_y, wrk_rd_wide_xy_bank_y, wrk_rd_wide_xy_addr_y, wrk_rd_wide_x_din_y, wrk_rd_wide_y_din_y,
+ wrk_rd_narrow_xy_ena_y, wrk_rd_narrow_xy_bank_y, wrk_rd_narrow_xy_addr_y, wrk_rd_narrow_x_din_y, wrk_rd_narrow_y_din_y,
+ wrk_wr_wide_xy_ena_x, wrk_wr_wide_xy_bank_x, wrk_wr_wide_xy_addr_x, wrk_wr_wide_x_dout_x, wrk_wr_wide_y_dout_x,
+ wrk_wr_narrow_xy_ena_x, wrk_wr_narrow_xy_bank_x, wrk_wr_narrow_xy_addr_x, wrk_wr_narrow_x_dout_x, wrk_wr_narrow_y_dout_x,
+ wrk_wr_wide_xy_ena_y, wrk_wr_wide_xy_bank_y, wrk_wr_wide_xy_addr_y, wrk_wr_wide_x_dout_y, wrk_wr_wide_y_dout_y,
+ wrk_wr_narrow_xy_ena_y, wrk_wr_narrow_xy_bank_y, wrk_wr_narrow_xy_addr_y, wrk_wr_narrow_x_dout_y, wrk_wr_narrow_y_dout_y
);
+
//
// Headers
//
@@ -143,30 +95,44 @@ module modexpng_general_worker
//
// FSM Declaration
//
- localparam [4:0] WRK_FSM_STATE_IDLE = 5'h00;
+ localparam [5:0] WRK_FSM_STATE_IDLE = 6'h00;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1 = 5'h01;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2 = 5'h02;
- localparam [4:0] WRK_FSM_STATE_BUSY = 5'h03;
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST1 = 5'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST2 = 5'h06;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1 = 6'h01;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2 = 6'h02;
+ localparam [5:0] WRK_FSM_STATE_BUSY = 6'h03;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST1 = 6'h05; // NOTE: 4 is skipped to match the numbering in IO_MANAGER to ease debug!
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST2 = 6'h06;
- localparam [4:0] WRK_FSM_STATE_STOP = 5'h07;
+ localparam [5:0] WRK_FSM_STATE_STOP = 6'h07;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 5'h10;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 5'h11;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 5'h12;
- localparam [4:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 5'h13;
- localparam [4:0] WRK_FSM_STATE_BUSY_M1 = 5'h14;
- localparam [4:0] WRK_FSM_STATE_BUSY_M2 = 5'h15;
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 5'h16;
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 5'h17;
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 5'h18;
- localparam [4:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 5'h19;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M1 = 6'h10;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_M2 = 6'h11;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M1 = 6'h12;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_M2 = 6'h13;
+ localparam [5:0] WRK_FSM_STATE_BUSY_M1 = 6'h14;
+ localparam [5:0] WRK_FSM_STATE_BUSY_M2 = 6'h15;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M1 = 6'h16;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_M2 = 6'h17;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M1 = 6'h18;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_M2 = 6'h19;
+
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE1_TP = 6'h20;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE2_TP = 6'h21;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE3_TP = 6'h22;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_PRE4_TP = 6'h23;
+ localparam [5:0] WRK_FSM_STATE_BUSY_TP = 6'h24;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST1_TP = 6'h25;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST2_TP = 6'h26;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST3_TP = 6'h27;
+ localparam [5:0] WRK_FSM_STATE_LATENCY_POST4_TP = 6'h28;
+ localparam [5:0] WRK_FSM_STATE_HOLDOFF_TP = 6'h29;
- reg [4:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
- reg [4:0] wrk_fsm_state_next_one_pass; // single address space sweep
- reg [4:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y)
+ reg [5:0] wrk_fsm_state = WRK_FSM_STATE_IDLE;
+ reg [5:0] wrk_fsm_state_next_one_pass; // single address space sweep
+ reg [5:0] wrk_fsm_state_next_one_pass_meander; // single address space sweep with interleaving source/destination banks (needed by copy_ladders_x2y)
+ reg [5:0] wrk_fsm_state_next_two_pass; // two address space sweeps
+ reg wrk_fsm_two_pass_pass; // 0=first pass, 1=second pass
+ reg wrk_fsm_two_pass_pass_dly; // 0=first pass, 1=second pass
// TODO: Comment on how narrow/wide address increment works (narrow is one long sweep, wide is two twice shorter sweeps)
@@ -292,37 +258,36 @@ module modexpng_general_worker
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly1;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly2;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_x_dly3;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_x_dly2;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly1;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly2;
reg [WORD_EXT_W -1:0] wrk_rd_narrow_x_din_y_dly3;
-
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly1;
+ reg [WORD_EXT_W -1:0] wrk_rd_narrow_y_din_y_dly2;
always @(posedge clk) begin
//
- {rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
- {rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
- //
- {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2};
- {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2};
- //
- {rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
- {rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
+ {rd_wide_xy_addr_x_dly4, rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1} <= {rd_wide_xy_addr_x_dly3, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_x_dly1, rd_wide_xy_addr_x};
+ {rd_wide_xy_addr_y_dly4, rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1} <= {rd_wide_xy_addr_y_dly3, rd_wide_xy_addr_y_dly2, rd_wide_xy_addr_y_dly1, rd_wide_xy_addr_y};
//
- {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2};
- {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2};
+ {rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1} <= {rd_narrow_xy_addr_x_dly3, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_x_dly1, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_addr_y_dly4, rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1} <= {rd_narrow_xy_addr_y_dly3, rd_narrow_xy_addr_y_dly2, rd_narrow_xy_addr_y_dly1, rd_narrow_xy_addr_y};
//
{wrk_rd_wide_x_din_x_dly3, wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1} <= {wrk_rd_wide_x_din_x_dly2, wrk_rd_wide_x_din_x_dly1, wrk_rd_wide_x_din_x};
{wrk_rd_wide_x_din_y_dly3, wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1} <= {wrk_rd_wide_x_din_y_dly2, wrk_rd_wide_x_din_y_dly1, wrk_rd_wide_x_din_y};
//
{wrk_rd_narrow_x_din_x_dly3, wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1} <= {wrk_rd_narrow_x_din_x_dly2, wrk_rd_narrow_x_din_x_dly1, wrk_rd_narrow_x_din_x};
- {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y};
+ {wrk_rd_narrow_y_din_x_dly2, wrk_rd_narrow_y_din_x_dly1} <= {wrk_rd_narrow_y_din_x_dly1, wrk_rd_narrow_y_din_x};
+ {wrk_rd_narrow_x_din_y_dly3, wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1} <= {wrk_rd_narrow_x_din_y_dly2, wrk_rd_narrow_x_din_y_dly1, wrk_rd_narrow_x_din_y};
+ {wrk_rd_narrow_y_din_y_dly2, wrk_rd_narrow_y_din_y_dly1} <= {wrk_rd_narrow_y_din_y_dly1, wrk_rd_narrow_y_din_y};
//
end
//
- // Read Enable Logic
+ // Source Read Enable Logic
//
task _update_wide_xy_rd_en; input _en; {rd_wide_xy_ena_x, rd_wide_xy_ena_y } <= {2{_en}}; endtask
@@ -340,48 +305,54 @@ module modexpng_general_worker
//
disable_wide_xy_rd_en;
disable_narrow_xy_rd_en;
- /*
- rd_wide_xy_ena_x <= 1'b0;
- rd_wide_xy_ena_y <= 1'b0;
- rd_narrow_xy_ena_x <= 1'b0;
- rd_narrow_xy_ena_y <= 1'b0;
- */
+ //
end else begin
//
disable_wide_xy_rd_en;
disable_narrow_xy_rd_en;
//
- //rd_wide_xy_ena_x <= 1'b0;
- //rd_wide_xy_ena_y <= 1'b0;
- //rd_narrow_xy_ena_x <= 1'b0;
- //rd_narrow_xy_ena_y <= 1'b0;
+ // one_pass
//
- case (opcode)
+ case (wrk_fsm_state_next_one_pass)
//
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_MODULAR_REDUCE_INIT:
+ WRK_FSM_STATE_LATENCY_PRE1,
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY:
//
- case (wrk_fsm_state_next_one_pass)
+ case (opcode)
//
- WRK_FSM_STATE_LATENCY_PRE1,
- WRK_FSM_STATE_LATENCY_PRE2,
- WRK_FSM_STATE_BUSY:
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
//
enable_narrow_xy_rd_en;
- //{rd_narrow_xy_ena_x, rd_narrow_xy_ena_y} <= {2{1'b1}};
//
+ UOP_OPCODE_COPY_CRT_Y2X: begin
+ //
+ enable_wide_xy_rd_en;
+ enable_narrow_xy_rd_en;
+ //
+ end
//
endcase
- //
//
- UOP_OPCODE_COPY_CRT_Y2X:
+ endcase
+ //
+ // one_pass_meander
+ //
+ case (wrk_fsm_state_next_one_pass_meander)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1,
+ WRK_FSM_STATE_LATENCY_PRE1_M2,
+ WRK_FSM_STATE_LATENCY_PRE2_M1,
+ WRK_FSM_STATE_LATENCY_PRE2_M2,
+ WRK_FSM_STATE_BUSY_M1,
+ WRK_FSM_STATE_BUSY_M2:
//
- case (wrk_fsm_state_next_one_pass)
+ case (opcode)
//
- WRK_FSM_STATE_LATENCY_PRE1,
- WRK_FSM_STATE_LATENCY_PRE2,
- WRK_FSM_STATE_BUSY: begin
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
//
enable_wide_xy_rd_en;
enable_narrow_xy_rd_en;
@@ -389,24 +360,29 @@ module modexpng_general_worker
end
//
endcase
+ //
+ endcase
+ //
+ // two_pass
+ //
+ case (wrk_fsm_state_next_two_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_TP,
+ WRK_FSM_STATE_LATENCY_PRE2_TP,
+ WRK_FSM_STATE_LATENCY_PRE3_TP,
+ WRK_FSM_STATE_LATENCY_PRE4_TP,
+ WRK_FSM_STATE_BUSY_TP:
//
- UOP_OPCODE_COPY_LADDERS_X2Y:
- //
- case (wrk_fsm_state_next_one_pass_meander)
- //
- WRK_FSM_STATE_LATENCY_PRE1_M1,
- WRK_FSM_STATE_LATENCY_PRE1_M2,
- WRK_FSM_STATE_LATENCY_PRE2_M1,
- WRK_FSM_STATE_LATENCY_PRE2_M2,
- WRK_FSM_STATE_BUSY_M1,
- WRK_FSM_STATE_BUSY_M2: begin
- //
- enable_wide_xy_rd_en;
- enable_narrow_xy_rd_en;
+ case (opcode)
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- end
+ if (!wrk_fsm_two_pass_pass) begin
+ enable_wide_xy_rd_en;
+ enable_narrow_xy_rd_en;
+ end else
+ enable_narrow_xy_rd_en;
//
- endcase
+ endcase
//
endcase
//
@@ -414,7 +390,7 @@ module modexpng_general_worker
//
- // Write Enable Logic
+ // Destination Write Enable Logic
//
task _update_wide_xy_wr_en; input _en; {wr_wide_xy_ena_x, wr_wide_xy_ena_y } <= {2{_en}}; endtask
@@ -432,71 +408,53 @@ module modexpng_general_worker
//
disable_wide_xy_wr_en;
disable_narrow_xy_wr_en;
- //wr_wide_xy_ena_x <= 1'b0;
- //wr_wide_xy_ena_y <= 1'b0;
- //wr_narrow_xy_ena_x <= 1'b0;
- //wr_narrow_xy_ena_y <= 1'b0;
//
end else begin
//
disable_wide_xy_wr_en;
disable_narrow_xy_wr_en;
//
- //wr_wide_xy_ena_x <= 1'b0;
- //wr_wide_xy_ena_y <= 1'b0;
- //wr_narrow_xy_ena_x <= 1'b0;
- //wr_narrow_xy_ena_y <= 1'b0;
+ // one_pass
//
- case (opcode)
+ case (wrk_fsm_state)
//
- UOP_OPCODE_PROPAGATE_CARRIES:
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2:
//
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2:
+ UOP_OPCODE_PROPAGATE_CARRIES:
//
enable_narrow_xy_wr_en;
//
- //
- endcase
- //
- UOP_OPCODE_COPY_CRT_Y2X:
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin
+ UOP_OPCODE_COPY_CRT_Y2X: begin
//
enable_wide_xy_wr_en;
- enable_narrow_xy_wr_en;
+ enable_narrow_xy_wr_en;
//
end
//
- endcase
- //
- UOP_OPCODE_MODULAR_REDUCE_INIT:
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2:
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
//
enable_wide_xy_wr_en;
- //
//
endcase
+ //
+ endcase
+ //
+ // one_pass_meander
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2:
//
- UOP_OPCODE_COPY_LADDERS_X2Y:
- //
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY_M2,
- WRK_FSM_STATE_LATENCY_POST1_M2,
- WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
//
enable_wide_xy_wr_en;
enable_narrow_xy_wr_en;
@@ -507,12 +465,42 @@ module modexpng_general_worker
//
endcase
//
+ // two_pass
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_TP,
+ WRK_FSM_STATE_LATENCY_POST1_TP,
+ WRK_FSM_STATE_LATENCY_POST2_TP,
+ WRK_FSM_STATE_LATENCY_POST3_TP,
+ WRK_FSM_STATE_LATENCY_POST4_TP:
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT:
+ //
+ if (!wrk_fsm_two_pass_pass)
+ enable_narrow_xy_wr_en;
+ else begin
+ enable_wide_xy_wr_en;
+ enable_narrow_xy_wr_en;
+ end
+ //
+ endcase
+ //
+ endcase
+ //
end
//
- // Data Logic
+ // Source to Destination Data Logic
+ //
+
+ //
+ // UOP_OPCODE_PROPAGATE_CARRIES
//
+
reg [CARRY_W -1:0] rd_narrow_x_din_x_cry_r;
reg [CARRY_W -1:0] rd_narrow_y_din_x_cry_r;
reg [CARRY_W -1:0] rd_narrow_x_din_y_cry_r;
@@ -523,112 +511,300 @@ module modexpng_general_worker
wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry = wrk_rd_narrow_x_din_y + {{WORD_W{1'b0}}, rd_narrow_x_din_y_cry_r};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry = wrk_rd_narrow_y_din_y + {{WORD_W{1'b0}}, rd_narrow_y_din_y_cry_r};
+ wire [CARRY_W -1:0] rd_narrow_x_din_x_w_cry_msb = rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] rd_narrow_y_din_x_w_cry_msb = rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] rd_narrow_x_din_y_w_cry_msb = rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+ wire [CARRY_W -1:0] rd_narrow_y_din_y_w_cry_msb = rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+
wire [WORD_EXT_W -1:0] rd_narrow_x_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_x_w_cry[WORD_W -1:0]};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_x_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_x_w_cry[WORD_W -1:0]};
wire [WORD_EXT_W -1:0] rd_narrow_x_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_x_din_y_w_cry[WORD_W -1:0]};
wire [WORD_EXT_W -1:0] rd_narrow_y_din_y_w_cry_reduced = {{CARRY_W{1'b0}}, rd_narrow_y_din_y_w_cry[WORD_W -1:0]};
+ task update_wide_dout;
+ input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
+ {wr_wide_x_dout_x, wr_wide_y_dout_x, wr_wide_x_dout_y, wr_wide_y_dout_y} <=
+ { x_x, y_x, x_y, y_y };
+ endtask
+
+ task update_narrow_dout;
+ input [WORD_EXT_W-1:0] x_x, y_x, x_y, y_y;
+ {wr_narrow_x_dout_x, wr_narrow_y_dout_x, wr_narrow_x_dout_y, wr_narrow_y_dout_y} <=
+ { x_x, y_x, x_y, y_y };
+ endtask
+
+ task update_narrow_carries;
+ input [CARRY_W-1:0] x_x_cry, y_x_cry, x_y_cry, y_y_cry;
+ {rd_narrow_x_din_x_cry_r, rd_narrow_y_din_x_cry_r, rd_narrow_x_din_y_cry_r, rd_narrow_y_din_y_cry_r} <=
+ { x_x_cry, y_x_cry, x_y_cry, y_y_cry };
+ endtask
+
+
+ always @(posedge clk)
+ //
+ if (opcode == UOP_OPCODE_PROPAGATE_CARRIES)
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_LATENCY_PRE2:
+ //
+ update_narrow_carries(CARRY_ZERO, CARRY_ZERO, CARRY_ZERO, CARRY_ZERO);
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1:
+ //
+ update_narrow_carries(rd_narrow_x_din_x_w_cry_msb,
+ rd_narrow_y_din_x_w_cry_msb,
+ rd_narrow_x_din_y_w_cry_msb,
+ rd_narrow_y_din_y_w_cry_msb);
+ //
+ endcase
+
+
+ //
+ // UOP_OPCODE_MODULAR_SUBTRACT
+ //
+
+ reg [WORD_W:0] modsub_x_ab;
+ reg [WORD_W:0] modsub_y_ab;
+
+ reg [WORD_W:0] modsub_x_ab_dly;
+ reg [WORD_W:0] modsub_y_ab_dly;
+
+ reg [WORD_W:0] modsub_x_abn;
+ reg [WORD_W:0] modsub_y_abn;
+
+ reg modsub_x_ab_mask_now;
+ reg modsub_y_ab_mask_now;
+
+ reg modsub_x_abn_mask_now;
+ reg modsub_y_abn_mask_now;
+
+ reg modsub_x_borrow_r;
+ reg modsub_y_borrow_r;
+
+ wire modsub_x_ab_masked = modsub_x_ab_mask_now ? 1'b0 : modsub_x_ab[WORD_W];
+ wire modsub_y_ab_masked = modsub_y_ab_mask_now ? 1'b0 : modsub_y_ab[WORD_W];
+
+ wire modsub_x_abn_masked = modsub_x_abn_mask_now ? 1'b0 : modsub_x_abn[WORD_W];
+ wire modsub_y_abn_masked = modsub_y_abn_mask_now ? 1'b0 : modsub_y_abn[WORD_W];
+
+ wire [WORD_W:0] modsub_x_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_x_din_x[WORD_W-1:0]};
+ wire [WORD_W:0] modsub_y_narrow_x_lsb_pad = {1'b0, wrk_rd_narrow_y_din_x[WORD_W-1:0]};
+ wire [WORD_W:0] modsub_x_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_x_din_y[WORD_W-1:0]};
+ wire [WORD_W:0] modsub_y_narrow_y_lsb_pad = {1'b0, wrk_rd_narrow_y_din_y[WORD_W-1:0]};
+
+ wire [WORD_W:0] modsub_x_wide_x_lsb_pad = {1'b0, wrk_rd_wide_x_din_x_dly1[WORD_W-1:0]};
+ wire [WORD_W:0] modsub_x_wide_y_lsb_pad = {1'b0, wrk_rd_wide_x_din_y_dly1[WORD_W-1:0]};
+
+ wire [WORD_EXT_W -1:0] modsub_x_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_x_ab_dly[WORD_W-1:0]};
+ wire [WORD_EXT_W -1:0] modsub_y_ab_dly_trunc = {{CARRY_W{1'b0}}, modsub_y_ab_dly[WORD_W-1:0]};
+
+ wire [WORD_EXT_W -1:0] modsub_x_abn_trunc = {{CARRY_W{1'b0}}, modsub_x_abn[WORD_W-1:0]};
+ wire [WORD_EXT_W -1:0] modsub_y_abn_trunc = {{CARRY_W{1'b0}}, modsub_y_abn[WORD_W-1:0]};
+
+ wire [WORD_EXT_W -1:0] modsub_x_mux = !modsub_x_borrow_r ? wrk_rd_narrow_x_din_x_dly2 : wrk_rd_narrow_y_din_x_dly2;
+ wire [WORD_EXT_W -1:0] modsub_y_mux = !modsub_y_borrow_r ? wrk_rd_narrow_x_din_y_dly2 : wrk_rd_narrow_y_din_y_dly2;
+
+ wire [WORD_W:0] modsub_x_ab_lsb_pad = {1'b0, modsub_x_ab[WORD_W-1:0]};
+ wire [WORD_W:0] modsub_y_ab_lsb_pad = {1'b0, modsub_y_ab[WORD_W-1:0]};
+
+ task update_modsub_ab;
+ begin
+ modsub_x_ab <= modsub_x_narrow_x_lsb_pad - modsub_y_narrow_x_lsb_pad - modsub_x_ab_masked;
+ modsub_y_ab <= modsub_x_narrow_y_lsb_pad - modsub_y_narrow_y_lsb_pad - modsub_y_ab_masked;
+ end
+ endtask
+
+ task update_modsub_abn;
+ begin
+ modsub_x_abn <= modsub_x_ab_lsb_pad + modsub_x_wide_x_lsb_pad + modsub_x_abn_masked;
+ modsub_y_abn <= modsub_y_ab_lsb_pad + modsub_x_wide_y_lsb_pad + modsub_y_abn_masked;
+ end
+ endtask
+
+ always @(posedge clk)
+ //
+ if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_LATENCY_POST4_TP:
+ if (!wrk_fsm_two_pass_pass)
+ {modsub_x_borrow_r, modsub_y_borrow_r} <= {modsub_x_ab_dly[WORD_W], modsub_y_ab_dly[WORD_W]};
+ endcase
+
+ always @(posedge clk) begin
+ modsub_x_ab_dly <= modsub_x_ab;
+ modsub_y_ab_dly <= modsub_y_ab;
+ end
+
always @(posedge clk) begin
//
- wr_wide_x_dout_x <= WORD_EXT_DNC;
- wr_wide_y_dout_x <= WORD_EXT_DNC;
- wr_wide_x_dout_y <= WORD_EXT_DNC;
- wr_wide_y_dout_y <= WORD_EXT_DNC;
- wr_narrow_x_dout_x <= WORD_EXT_DNC;
- wr_narrow_y_dout_x <= WORD_EXT_DNC;
- wr_narrow_x_dout_y <= WORD_EXT_DNC;
- wr_narrow_y_dout_y <= WORD_EXT_DNC;
+ modsub_x_ab <= {1'bX, WORD_DNC};
+ modsub_y_ab <= {1'bX, WORD_DNC};
//
- case (opcode)
+ modsub_x_abn <= {1'bX, WORD_DNC};
+ modsub_y_abn <= {1'bX, WORD_DNC};
+ //
+ if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
//
- UOP_OPCODE_PROPAGATE_CARRIES:
+ case (wrk_fsm_state)
//
- case (wrk_fsm_state)
+ WRK_FSM_STATE_LATENCY_PRE3_TP:
+ update_modsub_ab;
+
+ WRK_FSM_STATE_LATENCY_PRE4_TP,
+ WRK_FSM_STATE_BUSY_TP,
+ WRK_FSM_STATE_LATENCY_POST1_TP,
+ WRK_FSM_STATE_LATENCY_POST2_TP: begin
+ update_modsub_ab;
+ update_modsub_abn;
+ end
+ //
+ WRK_FSM_STATE_LATENCY_POST3_TP:
//
- WRK_FSM_STATE_LATENCY_PRE2: begin
- rd_narrow_x_din_x_cry_r <= CARRY_ZERO;
- rd_narrow_y_din_x_cry_r <= CARRY_ZERO;
- rd_narrow_x_din_y_cry_r <= CARRY_ZERO;
- rd_narrow_y_din_y_cry_r <= CARRY_ZERO;
- end
+ update_modsub_abn;
+ //
+ endcase
+ //
+ end
+
+ always @(posedge clk) begin
+ //
+ modsub_x_ab_mask_now <= 1'b0;
+ modsub_y_ab_mask_now <= 1'b0;
+ //
+ modsub_x_abn_mask_now <= 1'b0;
+ modsub_y_abn_mask_now <= 1'b0;
+ //
+ if (opcode == UOP_OPCODE_MODULAR_SUBTRACT)
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_LATENCY_PRE2_TP: begin
+ modsub_x_ab_mask_now <= 1'b1;
+ modsub_y_ab_mask_now <= 1'b1;
+ end
+ //
+ WRK_FSM_STATE_LATENCY_PRE3_TP: begin
+ modsub_x_abn_mask_now <= 1'b1;
+ modsub_y_abn_mask_now <= 1'b1;
+ end
+ //
+ endcase
+ //
+ end
+
+ always @(posedge clk) begin
+ //
+ update_wide_dout (WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
+ update_narrow_dout(WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC, WORD_EXT_DNC);
+ //
+ // one_pass
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2:
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_PROPAGATE_CARRIES:
+ //
+ update_narrow_dout(rd_narrow_x_din_x_w_cry_reduced,
+ rd_narrow_y_din_x_w_cry_reduced,
+ rd_narrow_x_din_y_w_cry_reduced,
+ rd_narrow_y_din_y_w_cry_reduced);
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin // TODO: post2 doesn't need update of carry, since that's the last word
+ UOP_OPCODE_COPY_CRT_Y2X: begin
//
- rd_narrow_x_din_x_cry_r <= rd_narrow_x_din_x_w_cry[WORD_EXT_W -1:WORD_W];
- rd_narrow_y_din_x_cry_r <= rd_narrow_y_din_x_w_cry[WORD_EXT_W -1:WORD_W];
- rd_narrow_x_din_y_cry_r <= rd_narrow_x_din_y_w_cry[WORD_EXT_W -1:WORD_W];
- rd_narrow_y_din_y_cry_r <= rd_narrow_y_din_y_w_cry[WORD_EXT_W -1:WORD_W];
+ update_wide_dout(wrk_rd_wide_x_din_y,
+ wrk_rd_wide_y_din_y,
+ wrk_rd_wide_x_din_y,
+ wrk_rd_wide_y_din_y);
//
- wr_narrow_x_dout_x <= rd_narrow_x_din_x_w_cry_reduced;
- wr_narrow_y_dout_x <= rd_narrow_y_din_x_w_cry_reduced;
- wr_narrow_x_dout_y <= rd_narrow_x_din_y_w_cry_reduced;
- wr_narrow_y_dout_y <= rd_narrow_y_din_y_w_cry_reduced;
+ update_narrow_dout(wrk_rd_narrow_x_din_y,
+ wrk_rd_narrow_y_din_y,
+ wrk_rd_narrow_x_din_y,
+ wrk_rd_narrow_y_din_y);
//
end
//
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ //
+ update_wide_dout(wrk_rd_narrow_x_din_x,
+ wrk_rd_narrow_y_din_x,
+ wrk_rd_narrow_x_din_y,
+ wrk_rd_narrow_y_din_y);
+ //
endcase
+ //
+ endcase
+ //
+ // one_pass_meander
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2:
//
- UOP_OPCODE_COPY_CRT_Y2X:
- //
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin
+ UOP_OPCODE_COPY_LADDERS_X2Y: begin
//
- wr_wide_x_dout_x <= wrk_rd_wide_x_din_y;
- wr_wide_y_dout_x <= wrk_rd_wide_y_din_y;
- wr_wide_x_dout_y <= wrk_rd_wide_x_din_y;
- wr_wide_y_dout_y <= wrk_rd_wide_y_din_y;
+ update_wide_dout(wrk_rd_wide_x_din_x_dly3,
+ wrk_rd_wide_x_din_x_dly2,
+ wrk_rd_wide_x_din_y_dly3,
+ wrk_rd_wide_x_din_y_dly2);
//
- wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_y;
- wr_narrow_y_dout_x <= wrk_rd_narrow_y_din_y;
- wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y;
- wr_narrow_y_dout_y <= wrk_rd_narrow_y_din_y;
+ update_narrow_dout(wrk_rd_narrow_x_din_x_dly3,
+ wrk_rd_narrow_x_din_x_dly2,
+ wrk_rd_narrow_x_din_y_dly3,
+ wrk_rd_narrow_x_din_y_dly2);
//
end
//
- endcase
- //
- UOP_OPCODE_COPY_LADDERS_X2Y:
- //
- case (wrk_fsm_state)
- //
- WRK_FSM_STATE_BUSY_M2,
- WRK_FSM_STATE_LATENCY_POST1_M2,
- WRK_FSM_STATE_LATENCY_POST2_M2: begin
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
//
- wr_wide_x_dout_x <= wrk_rd_wide_x_din_x_dly3;
- wr_wide_y_dout_x <= wrk_rd_wide_x_din_x_dly2;
- wr_wide_x_dout_y <= wrk_rd_wide_x_din_y_dly3;
- wr_wide_y_dout_y <= wrk_rd_wide_x_din_y_dly2;
+ update_wide_dout(wrk_rd_wide_x_din_x_dly3,
+ wrk_rd_wide_x_din_y_dly2,
+ wrk_rd_wide_x_din_y_dly3,
+ wrk_rd_wide_x_din_x_dly2);
//
- wr_narrow_x_dout_x <= wrk_rd_narrow_x_din_x_dly3;
- wr_narrow_y_dout_x <= wrk_rd_narrow_x_din_x_dly2;
- wr_narrow_x_dout_y <= wrk_rd_narrow_x_din_y_dly3;
- wr_narrow_y_dout_y <= wrk_rd_narrow_x_din_y_dly2;
+ update_narrow_dout(wrk_rd_narrow_x_din_x_dly3,
+ wrk_rd_narrow_x_din_y_dly2,
+ wrk_rd_narrow_x_din_y_dly3,
+ wrk_rd_narrow_x_din_x_dly2);
//
end
//
endcase
+ //
+ endcase
+ //
+ // two_pass
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_TP,
+ WRK_FSM_STATE_LATENCY_POST1_TP,
+ WRK_FSM_STATE_LATENCY_POST2_TP,
+ WRK_FSM_STATE_LATENCY_POST3_TP,
+ WRK_FSM_STATE_LATENCY_POST4_TP:
//
- UOP_OPCODE_MODULAR_REDUCE_INIT:
- //
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- wr_wide_x_dout_x <= wrk_rd_narrow_x_din_x;
- wr_wide_y_dout_x <= wrk_rd_narrow_y_din_x;
- wr_wide_x_dout_y <= wrk_rd_narrow_x_din_y;
- wr_wide_y_dout_y <= wrk_rd_narrow_y_din_y;
+ if (!wrk_fsm_two_pass_pass)
+ update_narrow_dout(modsub_x_ab_dly_trunc, modsub_x_abn_trunc, modsub_y_ab_dly_trunc, modsub_y_abn_trunc);
+ else begin
+ update_wide_dout (modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux);
+ update_narrow_dout(modsub_x_mux, modsub_x_mux, modsub_y_mux, modsub_y_mux);
+ end
//
- end
- //
endcase
//
endcase
@@ -637,254 +813,307 @@ module modexpng_general_worker
//
- // Write Address Logic
+ // Source Read Address Logic
//
- wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half;
- wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half;
+
+ reg [OP_ADDR_W -1:0] rd_wide_xy_addr_xy_next;
+ reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_xy_next;
+
+ wire rd_wide_xy_addr_xy_next_is_last = rd_wide_xy_addr_xy_next == word_index_last_half;
+ wire rd_narrow_xy_addr_xy_next_is_last = rd_narrow_xy_addr_xy_next == word_index_last;
+
+ task update_rd_wide_bank_addr;
+ input [BANK_ADDR_W -1:0] bank;
+ input [ OP_ADDR_W -1:0] addr;
+ begin
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, addr};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, addr};
+ end
+ endtask
+
+ task update_rd_wide_bank;
+ input [BANK_ADDR_W -1:0] bank;
+ begin
+ {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {bank, rd_wide_xy_addr_x};
+ {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {bank, rd_wide_xy_addr_y};
+ end
+ endtask
+
+ task update_rd_narrow_bank_addr;
+ input [BANK_ADDR_W -1:0] bank;
+ input [ OP_ADDR_W -1:0] addr;
+ begin
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, addr};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, addr};
+ end
+ endtask
+
+ task update_rd_narrow_bank;
+ input [BANK_ADDR_W -1:0] bank;
+ begin
+ {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {bank, rd_narrow_xy_addr_x};
+ {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {bank, rd_narrow_xy_addr_y};
+ end
+ endtask
+
+ task update_rd_wide_addr_next;
+ input [OP_ADDR_W -1:0] addr;
+ rd_wide_xy_addr_xy_next <= addr;
+ endtask
+
+ task update_rd_narrow_addr_next;
+ input [OP_ADDR_W -1:0] addr;
+ rd_narrow_xy_addr_xy_next <= addr;
+ endtask
+
+ task advance_rd_wide_addr_next;
+ rd_wide_xy_addr_xy_next <= !rd_wide_xy_addr_xy_next_is_last ? rd_wide_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO;
+ endtask
+
+ task advance_rd_narrow_addr_next;
+ rd_narrow_xy_addr_xy_next <= !rd_narrow_xy_addr_xy_next_is_last ? rd_narrow_xy_addr_xy_next + 1'b1 : OP_ADDR_ZERO;
+ endtask
always @(posedge clk) begin
//
- {wr_wide_xy_bank_x, wr_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC};
- {wr_wide_xy_bank_y, wr_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
- {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
- {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+ update_rd_wide_bank_addr (BANK_DNC, OP_ADDR_DNC);
+ update_rd_narrow_bank_addr(BANK_DNC, OP_ADDR_DNC);
//
- case (opcode)
+ // one_pass
+ //
+ case (wrk_fsm_state_next_one_pass)
//
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_COPY_CRT_Y2X:
+ WRK_FSM_STATE_LATENCY_PRE1:
//
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin
- //
- {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly2};
- {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly2};
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_COPY_CRT_Y2X,
+ UOP_OPCODE_MODULAR_REDUCE_INIT: begin
//
- {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly2};
- {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly2};
+ update_rd_wide_bank_addr (sel_wide_in, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
+ update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
//
end
//
endcase
//
- UOP_OPCODE_MODULAR_REDUCE_INIT:
+ WRK_FSM_STATE_LATENCY_PRE2,
+ WRK_FSM_STATE_BUSY:
//
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY,
- WRK_FSM_STATE_LATENCY_POST1,
- WRK_FSM_STATE_LATENCY_POST2: begin
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_OUTPUT_FROM_NARROW,
+ UOP_OPCODE_COPY_CRT_Y2X: begin
+ //
+ update_rd_wide_bank_addr (sel_wide_in, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
+ update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
//
- wr_wide_xy_bank_x <= uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H;
- wr_wide_xy_bank_y <= uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H;
+ end
+ //
+ UOP_OPCODE_MODULAR_REDUCE_INIT: begin
//
- wr_wide_xy_addr_x <= rd_wide_xy_addr_x_dly2;
- wr_wide_xy_addr_y <= rd_wide_xy_addr_y_dly2;
+ update_rd_wide_bank_addr (sel_wide_in, rd_wide_xy_addr_xy_next ); advance_rd_wide_addr_next ;
+ update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
//
end
//
endcase
+ //
+ endcase
+ //
+ // one_pass_meander
+ //
+ case (wrk_fsm_state_next_one_pass_meander)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M1:
+ case (opcode)
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
+ update_rd_wide_bank_addr (sel_wide_out, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
+ update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
+ end
+ endcase
+ //
+ WRK_FSM_STATE_LATENCY_PRE2_M1,
+ WRK_FSM_STATE_BUSY_M1:
+ case (opcode)
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
+ update_rd_wide_bank_addr (sel_wide_out, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
+ update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
+ //
+ end
+ //
+ endcase
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_M2,
+ WRK_FSM_STATE_LATENCY_PRE2_M2,
+ WRK_FSM_STATE_BUSY_M2:
+ case (opcode)
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
+ update_rd_wide_bank (sel_wide_in );
+ update_rd_narrow_bank(sel_narrow_in);
+ end
+ endcase
+ //
+ endcase
+ //
+ // two_pass
+ //
+ case (wrk_fsm_state_next_two_pass)
+ //
+ WRK_FSM_STATE_LATENCY_PRE1_TP:
//
- UOP_OPCODE_COPY_LADDERS_X2Y:
- //
- case (wrk_fsm_state)
+ case (opcode)
//
- WRK_FSM_STATE_BUSY_M2,
- WRK_FSM_STATE_LATENCY_POST1_M2,
- WRK_FSM_STATE_LATENCY_POST2_M2: begin
- //
- {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_dly4};
- {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_dly4};
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_dly4};
- {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_dly4};
+ if (!wrk_fsm_two_pass_pass) begin
+ update_rd_wide_bank_addr (BANK_WIDE_N, OP_ADDR_ZERO); update_rd_wide_addr_next (OP_ADDR_ONE);
+ update_rd_narrow_bank_addr(sel_narrow_in, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
+ end else begin
+ update_rd_narrow_bank_addr(sel_narrow_out, OP_ADDR_ZERO); update_rd_narrow_addr_next(OP_ADDR_ONE);
+ end
+ //
+ endcase
+ //
+ WRK_FSM_STATE_LATENCY_PRE2_TP,
+ WRK_FSM_STATE_LATENCY_PRE3_TP,
+ WRK_FSM_STATE_LATENCY_PRE4_TP,
+ WRK_FSM_STATE_BUSY_TP:
+ //
+ case (opcode)
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- end
+ if (!wrk_fsm_two_pass_pass) begin
+ update_rd_wide_bank_addr (BANK_WIDE_N, rd_narrow_xy_addr_xy_next); advance_rd_wide_addr_next ;
+ update_rd_narrow_bank_addr(sel_narrow_in, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
+ end else begin
+ update_rd_narrow_bank_addr(sel_narrow_out, rd_narrow_xy_addr_xy_next); advance_rd_narrow_addr_next;
+ end
//
endcase
//
- //
endcase
//
end
//
- // Read Address Logic
+ // Destination Write Address Logic
//
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_x_next;
- reg [OP_ADDR_W -1:0] rd_wide_xy_addr_y_next;
-
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_x_next;
- reg [OP_ADDR_W -1:0] rd_narrow_xy_addr_y_next;
+
+ wire uop_modular_reduce_init_feed_lsb_x = rd_narrow_xy_addr_x_dly2 <= word_index_last_half;
+ wire uop_modular_reduce_init_feed_lsb_y = rd_narrow_xy_addr_y_dly2 <= word_index_last_half;
- wire rd_wide_xy_addr_x_next_is_last = rd_wide_xy_addr_x_next == word_index_last_half;
- wire rd_wide_xy_addr_y_next_is_last = rd_wide_xy_addr_y_next == word_index_last_half;
+ wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_x = uop_modular_reduce_init_feed_lsb_x ? BANK_WIDE_L : BANK_WIDE_H;
+ wire [BANK_ADDR_W -1:0] uop_modular_reduce_init_bank_y = uop_modular_reduce_init_feed_lsb_y ? BANK_WIDE_L : BANK_WIDE_H;
- wire rd_narrow_xy_addr_x_next_is_last = rd_narrow_xy_addr_x_next == word_index_last;
- wire rd_narrow_xy_addr_y_next_is_last = rd_narrow_xy_addr_y_next == word_index_last;
+ task update_wr_wide_bank_addr;
+ input [BANK_ADDR_W -1:0] x_bank;
+ input [BANK_ADDR_W -1:0] y_bank;
+ input [ OP_ADDR_W -1:0] x_addr;
+ input [ OP_ADDR_W -1:0] y_addr;
+ begin
+ {wr_wide_xy_bank_x, wr_wide_xy_addr_x} <= {x_bank, x_addr};
+ {wr_wide_xy_bank_y, wr_wide_xy_addr_y} <= {y_bank, y_addr};
+ end
+ endtask
- always @(posedge clk) begin // TODO: Maybe split into two blocks (read address / next address)??
+ task update_wr_narrow_bank_addr;
+ input [BANK_ADDR_W -1:0] x_bank;
+ input [BANK_ADDR_W -1:0] y_bank;
+ input [ OP_ADDR_W -1:0] x_addr;
+ input [ OP_ADDR_W -1:0] y_addr;
+ begin
+ {wr_narrow_xy_bank_x, wr_narrow_xy_addr_x} <= {x_bank, x_addr};
+ {wr_narrow_xy_bank_y, wr_narrow_xy_addr_y} <= {y_bank, y_addr};
+ end
+ endtask
+
+ always @(posedge clk) begin
//
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x } <= {BANK_DNC, OP_ADDR_DNC}; // TODO: Add same default path for io_manager ??
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y } <= {BANK_DNC, OP_ADDR_DNC};
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {BANK_DNC, OP_ADDR_DNC};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {BANK_DNC, OP_ADDR_DNC};
+ update_wr_wide_bank_addr (BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
+ update_wr_narrow_bank_addr(BANK_DNC, BANK_DNC, OP_ADDR_DNC, OP_ADDR_DNC);
//
- case (opcode)
+ // one_pass
+ //
+ case (wrk_fsm_state)
//
- UOP_OPCODE_PROPAGATE_CARRIES,
- UOP_OPCODE_OUTPUT_FROM_NARROW,
- UOP_OPCODE_COPY_CRT_Y2X:
+ WRK_FSM_STATE_BUSY,
+ WRK_FSM_STATE_LATENCY_POST1,
+ WRK_FSM_STATE_LATENCY_POST2:
//
- case (wrk_fsm_state_next_one_pass)
+ case (opcode)
//
- WRK_FSM_STATE_LATENCY_PRE1: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
- //
- rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
- rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
- //
- rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
- rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
- //
+ UOP_OPCODE_PROPAGATE_CARRIES,
+ UOP_OPCODE_COPY_CRT_Y2X: begin
+ update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
+ update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly2, rd_narrow_xy_addr_y_dly2);
end
//
- WRK_FSM_STATE_LATENCY_PRE2,
- WRK_FSM_STATE_BUSY: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_narrow_xy_addr_x_next};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_narrow_xy_addr_y_next};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
- //
- rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
- rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
- //
- rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
- rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
- //
- end
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
+ update_wr_wide_bank_addr(uop_modular_reduce_init_bank_x, uop_modular_reduce_init_bank_y, rd_wide_xy_addr_x_dly2, rd_wide_xy_addr_y_dly2);
//
endcase
//
- UOP_OPCODE_MODULAR_REDUCE_INIT:
- //
- case (wrk_fsm_state_next_one_pass)
- //
- WRK_FSM_STATE_LATENCY_PRE1: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, OP_ADDR_ZERO};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, OP_ADDR_ZERO};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, OP_ADDR_ZERO};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, OP_ADDR_ZERO};
- //
- rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
- rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
- //
- rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
- rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
- //
- end
- //
- WRK_FSM_STATE_LATENCY_PRE2,
- WRK_FSM_STATE_BUSY: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x_next};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y_next};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x_next};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y_next};
- //
- rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
- rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
- //
- rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
- rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
- //
+ endcase
+ //
+ // one_pass_meander
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_M2,
+ WRK_FSM_STATE_LATENCY_POST1_M2,
+ WRK_FSM_STATE_LATENCY_POST2_M2:
+ //
+ case (opcode)
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: begin
+ update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
+ update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
end
- //
endcase
//
- UOP_OPCODE_COPY_LADDERS_X2Y:
+ endcase
+ //
+ // two_pass
+ //
+ case (wrk_fsm_state)
+ //
+ WRK_FSM_STATE_BUSY_TP,
+ WRK_FSM_STATE_LATENCY_POST1_TP,
+ WRK_FSM_STATE_LATENCY_POST2_TP,
+ WRK_FSM_STATE_LATENCY_POST3_TP,
+ WRK_FSM_STATE_LATENCY_POST4_TP:
//
- case (wrk_fsm_state_next_one_pass_meander)
- //
- WRK_FSM_STATE_LATENCY_PRE1_M1: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, OP_ADDR_ZERO};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, OP_ADDR_ZERO};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, OP_ADDR_ZERO};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, OP_ADDR_ZERO};
- //
- rd_wide_xy_addr_x_next <= OP_ADDR_ONE;
- rd_wide_xy_addr_y_next <= OP_ADDR_ONE;
- //
- rd_narrow_xy_addr_x_next <= OP_ADDR_ONE;
- rd_narrow_xy_addr_y_next <= OP_ADDR_ONE;
- //
- end
+ case (opcode)
//
- WRK_FSM_STATE_LATENCY_PRE1_M2: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
- //
- end
- //
- WRK_FSM_STATE_LATENCY_PRE2_M1,
- WRK_FSM_STATE_BUSY_M1: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_out, rd_narrow_xy_addr_x_next};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_out, rd_narrow_xy_addr_y_next};
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_out, rd_narrow_xy_addr_x_next};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_out, rd_narrow_xy_addr_y_next};
+ if (!wrk_fsm_two_pass_pass) begin
+ update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
+ end else begin
+ update_wr_wide_bank_addr (sel_wide_out, sel_wide_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
+ update_wr_narrow_bank_addr(sel_narrow_out, sel_narrow_out, rd_narrow_xy_addr_x_dly4, rd_narrow_xy_addr_y_dly4);
+ end
//
- rd_wide_xy_addr_x_next <= !rd_wide_xy_addr_x_next_is_last ? rd_wide_xy_addr_x_next + 1'b1: OP_ADDR_ZERO;
- rd_wide_xy_addr_y_next <= !rd_wide_xy_addr_y_next_is_last ? rd_wide_xy_addr_y_next + 1'b1: OP_ADDR_ZERO;
- //
- rd_narrow_xy_addr_x_next <= rd_narrow_xy_addr_x_next + 1'b1;
- rd_narrow_xy_addr_y_next <= rd_narrow_xy_addr_y_next + 1'b1;
- //
- end
- //
- WRK_FSM_STATE_LATENCY_PRE2_M2,
- WRK_FSM_STATE_BUSY_M2: begin
- //
- {rd_wide_xy_bank_x, rd_wide_xy_addr_x} <= {sel_wide_in, rd_wide_xy_addr_x};
- {rd_wide_xy_bank_y, rd_wide_xy_addr_y} <= {sel_wide_in, rd_wide_xy_addr_y};
- //
- {rd_narrow_xy_bank_x, rd_narrow_xy_addr_x} <= {sel_narrow_in, rd_narrow_xy_addr_x};
- {rd_narrow_xy_bank_y, rd_narrow_xy_addr_y} <= {sel_narrow_in, rd_narrow_xy_addr_y};
- //
- end
- //
- endcase
+ endcase
//
- //
- endcase
+ endcase
//
end
-
+
//
// FSM Process
//
+
always @(posedge clk)
//
if (rst) wrk_fsm_state <= WRK_FSM_STATE_IDLE;
@@ -893,7 +1122,9 @@ module modexpng_general_worker
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
UOP_OPCODE_MODULAR_REDUCE_INIT: wrk_fsm_state <= wrk_fsm_state_next_one_pass;
- UOP_OPCODE_COPY_LADDERS_X2Y: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y: wrk_fsm_state <= wrk_fsm_state_next_one_pass_meander;
+ UOP_OPCODE_MODULAR_SUBTRACT: wrk_fsm_state <= wrk_fsm_state_next_two_pass;
default: wrk_fsm_state <= WRK_FSM_STATE_IDLE;
endcase
@@ -901,49 +1132,64 @@ module modexpng_general_worker
//
// Busy Exit Logic
//
- reg wrk_fsm_done_one_pass = 1'b0;
+
+ reg wrk_fsm_done_one_pass = 1'b0;
reg wrk_fsm_done_one_pass_meander = 1'b0;
+ reg wrk_fsm_done_two_pass = 1'b0;
always @(posedge clk) begin
//
wrk_fsm_done_one_pass <= 1'b0;
wrk_fsm_done_one_pass_meander <= 1'b0;
+ wrk_fsm_done_two_pass <= 1'b0;
//
case (opcode)
//
UOP_OPCODE_PROPAGATE_CARRIES,
UOP_OPCODE_OUTPUT_FROM_NARROW,
UOP_OPCODE_COPY_CRT_Y2X,
- UOP_OPCODE_MODULAR_REDUCE_INIT: begin
+ UOP_OPCODE_MODULAR_REDUCE_INIT:
//
- if (wrk_fsm_state == WRK_FSM_STATE_BUSY) begin
- //
- if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass <= 1'b1; // TODO: Check, whether both are necessary...
- if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass <= 1'b1;
- //
- end
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY:
+ if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass <= 1'b1;
+ endcase
//
- end
- //
- UOP_OPCODE_COPY_LADDERS_X2Y: begin
+ UOP_OPCODE_COPY_LADDERS_X2Y,
+ UOP_OPCODE_CROSS_LADDERS_X2Y:
//
- if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M2) begin
- //
- if (rd_narrow_xy_addr_x_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1; // TODO: Check, whether both are necessary...
- if (rd_narrow_xy_addr_y_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1;
- //
- end
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY_M2:
+ if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_one_pass_meander <= 1'b1;
+ WRK_FSM_STATE_BUSY_M1:
+ wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander;
+ endcase
+ //
+ UOP_OPCODE_MODULAR_SUBTRACT:
//
- if (wrk_fsm_state == WRK_FSM_STATE_BUSY_M1)
- wrk_fsm_done_one_pass_meander <= wrk_fsm_done_one_pass_meander;
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_BUSY_TP:
+ if (rd_narrow_xy_addr_xy_next_is_last) wrk_fsm_done_two_pass <= 1'b1;
+ endcase
//
- end
//
endcase
//
end
-
+
+ //
+ // FSM Helper Logic
+ //
+ always @(posedge clk)
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_IDLE: if (ena) {wrk_fsm_two_pass_pass, wrk_fsm_two_pass_pass_dly} <= {1'b0, 1'b0};
+ WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_two_pass_pass <= 1'b1;
+ WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_two_pass_pass_dly <= 1'b1;
+ endcase
+
+
//
// FSM Transition Logic
//
@@ -985,7 +1231,27 @@ module modexpng_general_worker
//
end
-
+ always @* begin
+ //
+ case (wrk_fsm_state)
+ WRK_FSM_STATE_IDLE: wrk_fsm_state_next_two_pass = ena ? WRK_FSM_STATE_LATENCY_PRE1_TP : WRK_FSM_STATE_IDLE;
+ WRK_FSM_STATE_LATENCY_PRE1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE2_TP ;
+ WRK_FSM_STATE_LATENCY_PRE2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE3_TP ;
+ WRK_FSM_STATE_LATENCY_PRE3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_PRE4_TP ;
+ WRK_FSM_STATE_LATENCY_PRE4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_BUSY_TP ;
+ WRK_FSM_STATE_BUSY_TP: wrk_fsm_state_next_two_pass = wrk_fsm_done_two_pass ? WRK_FSM_STATE_LATENCY_POST1_TP : WRK_FSM_STATE_BUSY_TP;
+ WRK_FSM_STATE_LATENCY_POST1_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST2_TP ;
+ WRK_FSM_STATE_LATENCY_POST2_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST3_TP ;
+ WRK_FSM_STATE_LATENCY_POST3_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_LATENCY_POST4_TP ;
+ WRK_FSM_STATE_LATENCY_POST4_TP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_HOLDOFF_TP ;
+ WRK_FSM_STATE_HOLDOFF_TP: wrk_fsm_state_next_two_pass = wrk_fsm_two_pass_pass_dly ? WRK_FSM_STATE_STOP : WRK_FSM_STATE_LATENCY_PRE1_TP;
+ WRK_FSM_STATE_STOP: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ;
+ default: wrk_fsm_state_next_two_pass = WRK_FSM_STATE_IDLE ;
+ endcase
+ //
+ end
+
+
//
// Ready Logic
//
diff --git a/rtl/modexpng_microcode.vh b/rtl/modexpng_microcode.vh
index f68c559..3493e26 100644
--- a/rtl/modexpng_microcode.vh
+++ b/rtl/modexpng_microcode.vh
@@ -39,8 +39,9 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_OUTPUT_FROM_NARROW = 5'd3;
* source and destination WIDE are don't care
*/
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 5'd4;
-localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_LADDERS_X2Y = 5'd5;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_CRT_Y2X = 5'd4;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_COPY_LADDERS_X2Y = 5'd5;
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_CROSS_LADDERS_X2Y = 5'd7;
/* CRT is don't care
* NPQ specifies the width of the operand
* AUX is don't care
@@ -53,6 +54,13 @@ localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_MULTIPLY = 5'd8;
* AUX = AUX_2 forces B input to 1 (AUX_1 reads from source NARROW as usual)
* LADDER specifies Montgomery ladder mode
*/
+localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_SUBTRACT = 5'd9;
+/* CRT is don't care
+ * NPQ specifies the width of the operand
+ * AUX is don't care
+ * LADDER is don't care
+ */
+
localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_INIT = 5'd10;
localparam [UOP_OPCODE_W -1:0] UOP_OPCODE_MODULAR_REDUCE_PROC = 5'd11;
/* CRT
diff --git a/rtl/modexpng_uop_rom.v b/rtl/modexpng_uop_rom.v
index 04f0c83..adc657a 100644
--- a/rtl/modexpng_uop_rom.v
+++ b/rtl/modexpng_uop_rom.v
@@ -21,35 +21,35 @@ module modexpng_uop_rom
6'd03: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_Y, BANK_WIDE_A, BANK_DNC }; //
6'd04: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
6'd05: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_WIDE_E, BANK_DNC }; //
- //
+ //
6'd06: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
6'd07: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
6'd08: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
6'd09: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_N_FACTOR, BANK_DNC, BANK_NARROW_A }; //
6'd10: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
6'd11: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_DNC, BANK_DNC, BANK_IN_1_M, BANK_DNC, BANK_NARROW_E }; //
- //
+ //
6'd12: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_A, BANK_NARROW_A, BANK_WIDE_B, BANK_NARROW_B }; //
6'd13: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_B, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
6'd14: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; //
- //
+ //
6'd15: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; //
- //
+ //
6'd16: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_X, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_XM }; //
6'd17: data <= {UOP_OPCODE_OUTPUT_FROM_NARROW, UOP_CRT_Y, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_OUT_YM }; //
- //
+ //
6'd18: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_1, UOP_LADDER_11, BANK_WIDE_E, BANK_NARROW_B, BANK_WIDE_C, BANK_NARROW_C }; //
- //
+ //
6'd19: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_C, BANK_DNC, BANK_NARROW_C }; //
- //
+ //
6'd20: data <= {UOP_OPCODE_COPY_CRT_Y2X, UOP_CRT_DNC, UOP_NPQ_N, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; //
- //
+ //
6'd21: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P, BANK_WIDE_N, BANK_DNC }; //
6'd22: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q, BANK_WIDE_N, BANK_DNC }; //
6'd23: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_WIDE_A, BANK_DNC }; //
6'd24: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_FACTOR, BANK_WIDE_A, BANK_DNC }; //
6'd25: data <= {UOP_OPCODE_INPUT_TO_WIDE, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_QINV, BANK_WIDE_E, BANK_DNC }; //
- //
+ //
6'd26: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
6'd27: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_Y, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_Q_COEFF, BANK_DNC, BANK_NARROW_COEFF}; //
6'd28: data <= {UOP_OPCODE_INPUT_TO_NARROW, UOP_CRT_X, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_DNC, BANK_DNC, BANK_IN_2_P_FACTOR, BANK_DNC, BANK_NARROW_A }; //
@@ -70,6 +70,14 @@ module modexpng_uop_rom
6'd38: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_1, UOP_LADDER_PQ, BANK_WIDE_C, BANK_NARROW_C, BANK_WIDE_C, BANK_NARROW_C }; //
6'd39: data <= {UOP_OPCODE_LADDER_STEP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
//
+ 6'd40: data <= {UOP_OPCODE_MODULAR_MULTIPLY, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_2, UOP_LADDER_11, BANK_WIDE_C, BANK_DNC, BANK_WIDE_D, BANK_NARROW_D }; //
+ //
+ 6'd41: data <= {UOP_OPCODE_PROPAGATE_CARRIES, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_DNC, BANK_NARROW_D }; //
+ //
+ 6'd42: data <= {UOP_OPCODE_CROSS_LADDERS_X2Y, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_WIDE_D, BANK_NARROW_D, BANK_WIDE_D, BANK_NARROW_D }; //
+ //
+ 6'd43: data <= {UOP_OPCODE_MODULAR_SUBTRACT, UOP_CRT_DNC, UOP_NPQ_PQ, UOP_AUX_DNC, UOP_LADDER_DNC, BANK_DNC, BANK_NARROW_D, BANK_WIDE_C, BANK_NARROW_C }; //
+ //
default: data <= {UOP_OPCODE_STOP, UOP_CRT_DNC, UOP_NPQ_DNC, UOP_AUX_DNC, UOP_LADDER_DNC, UOP_SEL_DNC_ALL }; //
endcase