aboutsummaryrefslogtreecommitdiff
path: root/rtl
diff options
context:
space:
mode:
Diffstat (limited to 'rtl')
-rw-r--r--rtl/_modexpng_mmm_dual_x8.v961
-rw-r--r--rtl/_modexpng_recombinator_block.v1225
-rw-r--r--rtl/_modexpng_storage_block.v219
-rw-r--r--rtl/_modexpng_storage_manager.v199
-rw-r--r--rtl/modexpng_core_top.v41
-rw-r--r--rtl/modexpng_mmm_dual.v28
-rw-r--r--rtl/modexpng_parameters.vh3
-rw-r--r--rtl/modexpng_recombinator_block.v5
-rw-r--r--rtl/modexpng_sdp_36k_wrapper.v2
-rw-r--r--rtl/modexpng_sdp_36k_x16_x32_wrapper.v75
-rw-r--r--rtl/modexpng_sdp_36k_x32_x16_wrapper.v73
-rw-r--r--rtl/modexpng_storage_block.v196
-rw-r--r--rtl/modexpng_tdp_36k_x16_x32_wrapper.v88
13 files changed, 469 insertions, 2646 deletions
diff --git a/rtl/_modexpng_mmm_dual_x8.v b/rtl/_modexpng_mmm_dual_x8.v
deleted file mode 100644
index ffd5ccf..0000000
--- a/rtl/_modexpng_mmm_dual_x8.v
+++ /dev/null
@@ -1,961 +0,0 @@
-module modexpng_mmm_dual_x8
-(
- clk, rst,
-
- ena, rdy,
-
- ladder_mode,
- word_index_last,
- word_index_last_minus1,
-
- sel_wide_in,
- sel_narrow_in,
- sel_wide_out,
- sel_narrow_out,
-
- rd_wide_xy_ena,
- rd_wide_xy_ena_aux,
- rd_wide_xy_bank,
- rd_wide_xy_bank_aux,
- rd_wide_xy_addr,
- rd_wide_xy_addr_aux,
- rd_wide_x_dout,
- rd_wide_y_dout,
- rd_wide_x_dout_aux,
- rd_wide_y_dout_aux,
-
- rd_narrow_xy_ena,
- rd_narrow_xy_bank,
- rd_narrow_xy_addr,
- rd_narrow_x_dout,
- rd_narrow_y_dout,
-
- rcmb_wide_xy_bank,
- rcmb_wide_xy_addr,
- rcmb_wide_x_dout,
- rcmb_wide_y_dout,
- rcmb_wide_xy_valid,
-
- rcmb_narrow_xy_bank,
- rcmb_narrow_xy_addr,
- rcmb_narrow_x_dout,
- rcmb_narrow_y_dout,
- rcmb_narrow_xy_valid,
-
- rcmb_xy_bank,
- rcmb_xy_addr,
- rcmb_x_dout,
- rcmb_y_dout,
- rcmb_xy_valid,
-
- rdct_ena, rdct_rdy
-);
-
-
- //
- // Headers
- //
- `include "../rtl/modexpng_mmm_fsm.vh"
- `include "../rtl/modexpng_parameters.vh"
-
-
- //
- // Ports
- //
- input clk;
- input rst;
-
- input ena;
- output rdy;
-
- input ladder_mode;
- input [ OP_ADDR_W -1:0] word_index_last;
- input [ OP_ADDR_W -1:0] word_index_last_minus1;
-
- input [ BANK_ADDR_W -1:0] sel_wide_in;
- input [ BANK_ADDR_W -1:0] sel_narrow_in;
- input [ BANK_ADDR_W -1:0] sel_wide_out;
- input [ BANK_ADDR_W -1:0] sel_narrow_out;
-
- output rd_wide_xy_ena;
- output rd_wide_xy_ena_aux;
- output [ BANK_ADDR_W -1:0] rd_wide_xy_bank;
- output [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
- output [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr;
- output [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux;
- input [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout;
- input [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout;
- input [ WORD_EXT_W -1:0] rd_wide_x_dout_aux;
- input [ WORD_EXT_W -1:0] rd_wide_y_dout_aux;
-
- output rd_narrow_xy_ena;
- output [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
- output [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
- input [ WORD_EXT_W -1:0] rd_narrow_x_dout;
- input [ WORD_EXT_W -1:0] rd_narrow_y_dout;
-
- output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
- output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr;
- output [ WORD_EXT_W -1:0] rcmb_wide_x_dout;
- output [ WORD_EXT_W -1:0] rcmb_wide_y_dout;
- output rcmb_wide_xy_valid;
-
- output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
- output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
- output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout;
- output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout;
- output rcmb_narrow_xy_valid;
-
- output [ BANK_ADDR_W -1:0] rcmb_xy_bank;
- output [ OP_ADDR_W -1:0] rcmb_xy_addr;
- output [ WORD_EXT_W -1:0] rcmb_x_dout;
- output [ WORD_EXT_W -1:0] rcmb_y_dout;
- output rcmb_xy_valid;
-
- output rdct_ena;
- input rdct_rdy;
-
-
- //
- // FSM Declaration
- //
- reg [MMM_FSM_STATE_W-1:0] fsm_state = MMM_FSM_STATE_IDLE;
- reg [MMM_FSM_STATE_W-1:0] fsm_state_next;
-
- wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_square;
- wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_triangle;
- wire [MMM_FSM_STATE_W-1:0] fsm_state_after_mult_rectangle;
-
-
- //
- // FSM Process
- //
- always @(posedge clk)
- //
- if (rst) fsm_state <= MMM_FSM_STATE_IDLE;
- else fsm_state <= fsm_state_next;
-
-
- //
- // Storage Control Interface
- //
- reg wide_xy_ena = 1'b0;
- reg wide_xy_ena_aux = 1'b0;
- reg [ BANK_ADDR_W -1:0] wide_xy_bank;
- reg [ BANK_ADDR_W -1:0] wide_xy_bank_aux;
- reg [OP_ADDR_W -1:0] wide_xy_addr[0:3];
- reg [OP_ADDR_W -1:0] wide_xy_addr_aux;
-
- reg narrow_xy_ena = 1'b0;
- reg [ BANK_ADDR_W -1:0] narrow_xy_bank;
- reg [OP_ADDR_W -1:0] narrow_xy_addr;
- reg [OP_ADDR_W -1:0] narrow_xy_addr_dly;
-
- assign rd_wide_xy_ena = wide_xy_ena;
- assign rd_wide_xy_ena_aux = wide_xy_ena_aux;
- assign rd_wide_xy_bank = wide_xy_bank;
- assign rd_wide_xy_bank_aux = wide_xy_bank_aux;
- assign rd_wide_xy_addr_aux = wide_xy_addr_aux;
-
- assign rd_narrow_xy_ena = narrow_xy_ena;
- assign rd_narrow_xy_bank = narrow_xy_bank;
- assign rd_narrow_xy_addr = narrow_xy_addr;
-
- genvar z;
- generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
- begin : gen_rd_wide_xy_addr
- assign rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W] = wide_xy_addr[z];
- end
- endgenerate
-
- //
- // Column Counter
- //
- reg [4:0] col_index; // current column index
- reg [4:0] col_index_prev; // delayed column index value
- reg [4:0] col_index_last; // index of the very last column
- reg [4:0] col_index_next; // precomputed next column index
- reg col_is_last; // flag set during the very last column
-
- always @(posedge clk)
- //
- col_index_prev <= col_index;
-
- //
- // Column Counter Increment Logic
- //
- always @(posedge clk)
- //
- case (fsm_state_next)
- //
- MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: begin
- col_index <= 5'd0;
- col_index_last <= word_index_last[7:3];
- col_index_next <= 5'd1;
- col_is_last <= 1'b0;
-
- end
- //
- MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: begin
- col_index <= col_index_next;
- col_is_last <= col_index_next == col_index_last;
- col_index_next <= col_index_next == col_index_last ? 5'd0 : col_index_next + 5'd1;
- end
- //
- endcase
-
-
- //
- // Completion Flags
- //
- wire square_almost_done_comb;
- reg square_almost_done_flop = 1'b0;
- reg square_surely_done_flop = 1'b0;
-
- wire triangle_almost_done_comb;
- reg triangle_almost_done_flop = 1'b0;
- reg triangle_surely_done_flop = 1'b0;
- reg triangle_tardy_done_flop = 1'b0;
-
- wire rectangle_almost_done_comb;
- reg rectangle_almost_done_flop = 1'b0;
- reg rectangle_surely_done_flop = 1'b0;
- reg rectangle_tardy_done_flop = 1'b0;
-
- assign square_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
- assign triangle_almost_done_comb = (narrow_xy_addr[2:0] == word_index_last_minus1[2:0]) && (narrow_xy_addr[7:3] == col_index);
- assign rectangle_almost_done_comb = narrow_xy_addr == word_index_last_minus1;
-
- //
- // Square Completion Flags
- //
- always @(posedge clk) begin
- //
- case (fsm_state)
- //
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY:
- square_almost_done_flop <= square_almost_done_comb;
- //
- default:
- square_almost_done_flop <= 1'b0;
- //
- endcase
- //
- square_surely_done_flop <= square_almost_done_flop;
- //
- end
-
- //
- // Triangle Completion Flags
- //
- always @(posedge clk) begin
- //
- case (fsm_state)
- //
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY:
- triangle_almost_done_flop <= triangle_almost_done_comb;
- //
- default:
- triangle_almost_done_flop <= 1'b0;
- //
- endcase
- //
- triangle_surely_done_flop <= triangle_almost_done_flop;
- triangle_tardy_done_flop <= triangle_surely_done_flop;
- //
- end
-
- //
- // Rectangle Completion Flags
- //
- always @(posedge clk) begin
- //
- case (fsm_state)
- //
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY:
- rectangle_almost_done_flop <= rectangle_almost_done_comb;
- //
- default:
- rectangle_almost_done_flop <= 1'b0;
- //
- endcase
- //
- rectangle_surely_done_flop <= rectangle_almost_done_flop;
- rectangle_tardy_done_flop <= rectangle_surely_done_flop;
- //
- end
-
-
- //
- // Narrow Storage Control Logic
- //
- always @(posedge clk)
- //
- if (rst) narrow_xy_ena <= 1'b0;
- else begin
- //
- // Narrow Address
- //
- case (fsm_state_next)
- //
- MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: narrow_xy_addr <= 8'd0;
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_addr <= !square_almost_done_flop ? narrow_xy_addr + 1'b1 : 8'd0;
- //
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_addr <= triangle_almost_done_flop || (col_is_last && triangle_surely_done_flop) ?
- 8'd0 : narrow_xy_addr + 1'b1;
- //
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: narrow_xy_addr <= 8'd0;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_addr <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
- 8'd1 : narrow_xy_addr + 1'b1;
- //
- default: narrow_xy_addr <= 8'dX;
- //
- endcase
- //
- // Narrow Bank
- //
- case (fsm_state_next)
- //
- MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_bank <= sel_narrow_in;
- //
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_bank <= col_is_last && (triangle_almost_done_flop || triangle_surely_done_flop) ?
- BANK_NARROW_EXT : BANK_NARROW_COEFF;
- //
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_bank <= rectangle_almost_done_flop || rectangle_surely_done_flop ?
- BANK_NARROW_EXT : BANK_NARROW_Q;
- //
- default: narrow_xy_bank <= 2'bXX;
- //
- endcase
- //
- case (fsm_state_next)
- //
- MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: narrow_xy_ena <= ~square_almost_done_flop;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: narrow_xy_ena <= !col_is_last ? ~triangle_almost_done_flop : ~triangle_surely_done_flop;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: narrow_xy_ena <= 1'b1;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: narrow_xy_ena <= ~rectangle_surely_done_flop;
- //
- default: narrow_xy_ena <= 1'b0;
- //
- endcase
- //
- end
-
-
- //
- // Wide Storage Control Logic
- //
-
- wire [2:0] wide_offset_rom[0:3];
-
- generate for (z=1; z<NUM_MULTS; z=z+2)
- begin : gen_wide_offset_rom
- assign wide_offset_rom[(z-1)/2] = z[2:0];
- end
- endgenerate
-
- function [7:0] wide_xy_addr_next;
- input [7:0] wide_xy_addr_current;
- input [7:0] wide_xy_addr_last;
- begin
- if (wide_xy_addr_current > 8'd0)
- wide_xy_addr_next = wide_xy_addr_current - 1'b1;
- else
- wide_xy_addr_next = wide_xy_addr_last;
- end
- endfunction
-
- integer j;
- always @(posedge clk)
- //
- if (rst) begin
- wide_xy_ena <= 1'b0;
- wide_xy_ena_aux <= 1'b0;
- end else begin
- //
- // Wide Address
- //
- for (j=0; j<(NUM_MULTS/2); j=j+1)
- //
- case (fsm_state_next)
- //
- // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
- //
- MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
- MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
- //
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
- //
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr[j] <= {5'd0, wide_offset_rom[j]};
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: wide_xy_addr[j] <= {col_index_next, wide_offset_rom[j]};
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_addr[j] <= wide_xy_addr_next(wide_xy_addr[j], word_index_last);
- //
- default: wide_xy_addr[j] <= 8'dX;
- endcase
- //
- // Wide Aux Address
- //
- case (fsm_state_next)
- //
- // this can be reworked by having 8 address regs instead of 4 and using shifts instead of subtractions!
- //
- MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
- MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
- //
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: wide_xy_addr_aux <= {5'd0, 3'd1};
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_addr_aux <= wide_xy_addr_next(wide_xy_addr_aux, word_index_last);
- //
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_addr_aux <= 8'dX;//{5'd0, 3'd0};
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_addr_aux <= rcmb_xy_valid ? rcmb_xy_addr : 8'dX;
- //recomb_fat_bram_xy_dout_valid && (recomb_fat_bram_xy_bank == BANK_FAT_ML) ?
- //mac_fat_bram_xy_addr[4] + 1'b1 : mac_fat_bram_xy_addr[4];
- //
- default: wide_xy_addr_aux <= 8'dX;
- endcase
- //
- // Wide Bank
- //
- case (fsm_state_next)
- MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank <= sel_wide_in;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank <= BANK_WIDE_L; // ? combine ?
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_L;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_bank <= BANK_WIDE_N;
- default: wide_xy_bank <= 3'bXXX;
- endcase
- //
- // Wide Aux Bank
- //
- case (fsm_state_next)
- MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: wide_xy_bank_aux <= sel_wide_in;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: wide_xy_bank_aux <= BANK_WIDE_H;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_bank_aux <= BANK_WIDE_L;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: if (rcmb_xy_valid) // rewrite using "Kolya-style" here (get rid of too many xxx's)
- case (rcmb_xy_bank)
- BANK_RCMB_ML: wide_xy_bank_aux <= BANK_WIDE_L;
- BANK_RCMB_MH: wide_xy_bank_aux <= BANK_WIDE_H;
- //BANK_RDCT_EXT: wide_xy_bank_aux <= BANK_WIDE_EXT; '3bXXX
- default: wide_xy_bank_aux <= 3'bXXX;
- endcase
- else wide_xy_bank_aux <= 3'bXXX;
- default: wide_xy_bank_aux <= 3'bXXX;
- endcase
- //
- // Wide Enable
- //
- case (fsm_state_next)
- MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: wide_xy_ena <= 1'b1;
- default: wide_xy_ena <= 1'b0;
- endcase
- //
- // Wide Aux Enable
- //
- case (fsm_state_next)
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: wide_xy_ena_aux <= 1'b1;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: wide_xy_ena_aux <= 1'b0;//1'b1;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: wide_xy_ena_aux <= rcmb_xy_valid;// && (recomb_fat_bram_xy_bank == BANK_FAT_ML);
- default: wide_xy_ena_aux <= 1'b0;
- endcase
- //
- end
-
-
- //
- // Delay Lines
- //
- always @(posedge clk)
- //
- narrow_xy_addr_dly <= narrow_xy_addr;
-
-
- //
- // DSP Array Logic
- //
- reg dsp_xy_ce_a = 1'b0;
- reg dsp_xy_ce_b = 1'b0;
- reg dsp_xy_ce_b_dly = 1'b0;
- reg dsp_xy_ce_m = 1'b0;
- reg dsp_xy_ce_p = 1'b0;
- reg dsp_xy_ce_mode = 1'b0;
-
- reg [9 -1:0] dsp_xy_mode_z = {9{1'b1}};
-
- wire [5*18-1:0] dsp_x_a;
- wire [5*18-1:0] dsp_y_a;
-
- reg [1*16-1:0] dsp_x_b;
- reg [1*16-1:0] dsp_y_b;
-
- reg [ 1:0] dsp_xy_b_carry;
-
- wire [9*47-1:0] dsp_x_p;
- wire [9*47-1:0] dsp_y_p;
-
- //generate for (z=0; z<(NUM_MULTS/2); z=z+1)
- //begin : gen_dsp_xy_a_split
- //assign dsp_x_a[18*z+:18] = rd_wide_x_dout[z];
- //assign dsp_y_a[18*z+:18] = rd_wide_y_dout[z];
- //end
- //endgenerate
-
- assign dsp_x_a = {rd_wide_x_dout_aux, rd_wide_x_dout};
- assign dsp_y_a = {rd_wide_y_dout_aux, rd_wide_y_dout};
-
- //assign dsp_x_a[18*4+:18] = rd_wide_x_dout_aux;
- //assign dsp_y_a[18*4+:18] = rd_wide_y_dout_aux;
-
- always @(posedge clk)
- //
- dsp_xy_ce_b_dly <= dsp_xy_ce_b;
-
-
- modexpng_dsp_array_block dsp_array_block_x
- (
- .clk (clk),
-
- .ce_a (dsp_xy_ce_a),
- .ce_b (dsp_xy_ce_b),
- .ce_m (dsp_xy_ce_m),
- .ce_p (dsp_xy_ce_p),
- .ce_mode (dsp_xy_ce_mode),
-
- .mode_z (dsp_xy_mode_z),
-
- .a (dsp_x_a),
- .b (dsp_x_b),
- .p (dsp_x_p)
- );
-
- modexpng_dsp_array_block dsp_array_block_y
- (
- .clk (clk),
-
- .ce_a (dsp_xy_ce_a),
- .ce_b (dsp_xy_ce_b),
- .ce_m (dsp_xy_ce_m),
- .ce_p (dsp_xy_ce_p),
- .ce_mode (dsp_xy_ce_mode),
-
- .mode_z (dsp_xy_mode_z),
-
- .a (dsp_y_a),
- .b (dsp_y_b),
- .p (dsp_y_p)
- );
-
-
-
-
- //
- // DSP Control Logic
- //
- reg narrow_xy_ena_dly1 = 1'b0;
- reg narrow_xy_ena_dly2 = 1'b0;
-
- always @(posedge clk)
- //
- if (rst) begin
- //
- narrow_xy_ena_dly1 <= 1'b0;
- narrow_xy_ena_dly2 <= 1'b0;
- //
- dsp_xy_ce_a <= 1'b0;
- dsp_xy_ce_b <= 1'b0;
- dsp_xy_ce_m <= 1'b0;
- dsp_xy_ce_p <= 1'b0;
- dsp_xy_ce_mode <= 1'b0;
- //
- end else begin
- //
- narrow_xy_ena_dly1 <= narrow_xy_ena;
- narrow_xy_ena_dly2 <= narrow_xy_ena_dly1;
- //
- dsp_xy_ce_a <= narrow_xy_ena_dly1 | narrow_xy_ena_dly2;
- dsp_xy_ce_b <= narrow_xy_ena_dly2;
- dsp_xy_ce_m <= dsp_xy_ce_b_dly;
- dsp_xy_ce_p <= dsp_xy_ce_m;
- dsp_xy_ce_mode <= dsp_xy_ce_b_dly;
- //
- end
-
- //
- // DSP Feed Logic
- //
- reg dsp_merge_xy_b;
-
- always @(posedge clk)
- //
- case (fsm_state)
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: dsp_merge_xy_b <= 1'b1;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: dsp_merge_xy_b <= 1'b0;
- endcase
-
- //
- // On-the-fly Carry Recombination
- //
- wire [17:0] rd_narrow_x_dout_carry = rd_narrow_x_dout + {{16{1'b0}}, dsp_xy_b_carry};
- wire [17:0] rd_narrow_y_dout_carry = rd_narrow_y_dout + {{16{1'b0}}, dsp_xy_b_carry};
- wire [17:0] rd_narrow_xy_dout_carry_mux = ladder_mode ? rd_narrow_y_dout_carry : rd_narrow_x_dout_carry;
-
- always @(posedge clk)
- //
- if (narrow_xy_ena_dly2) begin // rewrite
- //
- if (!dsp_merge_xy_b) begin
- dsp_x_b <= rd_narrow_x_dout[15:0];
- dsp_y_b <= rd_narrow_y_dout[15:0];
- dsp_xy_b_carry <= 2'b00;
- end else begin
- dsp_x_b <= rd_narrow_xy_dout_carry_mux[15:0];
- dsp_y_b <= rd_narrow_xy_dout_carry_mux[15:0];
- dsp_xy_b_carry <= rd_narrow_xy_dout_carry_mux[17:16];
- end
- //
- end else begin
- //
- dsp_x_b <= {16{1'bX}};
- dsp_y_b <= {16{1'bX}};
- //
- dsp_xy_b_carry <= 2'b00;
- //
- end
-
-
- reg [9 -1:0] dsp_xy_mode_z_adv1 = {9{1'b1}};
- reg [9 -1:0] dsp_xy_mode_z_adv2 = {9{1'b1}};
- reg [9 -1:0] dsp_xy_mode_z_adv3 = {9{1'b1}};
- reg [9 -1:0] dsp_xy_mode_z_adv4 = {9{1'b1}};
-
- function [NUM_MULTS:0] calc_mac_mode_z_square;
- input [ 4:0] col_index_value;
- input [ 7:0] narrow_xy_addr_value;
- begin
- if (narrow_xy_addr_value[7:3] == col_index_value)
- case (narrow_xy_addr_value[2:0])
- 3'b000: calc_mac_mode_z_square = {1'b1, 8'b11111110};
- 3'b001: calc_mac_mode_z_square = {1'b1, 8'b11111101};
- 3'b010: calc_mac_mode_z_square = {1'b1, 8'b11111011};
- 3'b011: calc_mac_mode_z_square = {1'b1, 8'b11110111};
- 3'b100: calc_mac_mode_z_square = {1'b1, 8'b11101111};
- 3'b101: calc_mac_mode_z_square = {1'b1, 8'b11011111};
- 3'b110: calc_mac_mode_z_square = {1'b1, 8'b10111111};
- 3'b111: calc_mac_mode_z_square = {1'b1, 8'b01111111};
- endcase
- else
- calc_mac_mode_z_square = {1'b1, {NUM_MULTS{1'b1}}};
- end
- endfunction
-
- function [NUM_MULTS:0] calc_mac_mode_z_rectangle;
- input [ 4:0] col_index_value;
- input [ 7:0] narrow_xy_addr_value;
- begin
- if (narrow_xy_addr_value[7:3] == col_index_value)
- case (narrow_xy_addr_value[2:0])
- 3'b000: calc_mac_mode_z_rectangle = {1'b1, 8'b11111110};
- 3'b001: calc_mac_mode_z_rectangle = {1'b1, 8'b11111101};
- 3'b010: calc_mac_mode_z_rectangle = {1'b1, 8'b11111011};
- 3'b011: calc_mac_mode_z_rectangle = {1'b1, 8'b11110111};
- 3'b100: calc_mac_mode_z_rectangle = {1'b1, 8'b11101111};
- 3'b101: calc_mac_mode_z_rectangle = {1'b1, 8'b11011111};
- 3'b110: calc_mac_mode_z_rectangle = {1'b1, 8'b10111111};
- 3'b111: calc_mac_mode_z_rectangle = {1'b1, 8'b01111111};
- endcase
- else
- calc_mac_mode_z_rectangle = {1'b1, {NUM_MULTS{1'b1}}};
- end
- endfunction
-
- always @(posedge clk)
- //
- case (fsm_state_next)
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}};
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_square(col_index_prev, narrow_xy_addr_dly);
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= {9{1'b1}};
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: dsp_xy_mode_z_adv4 <= {9{1'b0}}; // so easy
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: dsp_xy_mode_z_adv4 <= calc_mac_mode_z_rectangle(col_index_prev, narrow_xy_addr_dly);
- default: dsp_xy_mode_z_adv4 <= {9{1'b1}};
- endcase
-
- always @(posedge clk) begin
- dsp_xy_mode_z <= dsp_xy_mode_z_adv1;
- //
- dsp_xy_mode_z_adv1 <= dsp_xy_mode_z_adv2;
- dsp_xy_mode_z_adv2 <= dsp_xy_mode_z_adv3;
- dsp_xy_mode_z_adv3 <= dsp_xy_mode_z_adv4;
- end
-
-
-
-
-
- //
- // Recombinator
- //
- reg rcmb_ena = 1'b0;
- wire rcmb_rdy;
-
- modexpng_recombinator_block recombinator_block
- (
- .clk (clk),
- .rst (rst),
-
- .ena (rcmb_ena),
- .rdy (rcmb_rdy),
-
- .mmm_fsm_state_next (fsm_state_next),
-
- .word_index_last (word_index_last),
-
- .dsp_xy_ce_p (dsp_xy_ce_p),
- .dsp_x_p (dsp_x_p),
- .dsp_y_p (dsp_y_p),
-
- .col_index (col_index),
- .col_index_last (col_index_last),
-
- .rd_narrow_xy_addr (narrow_xy_addr),
- .rd_narrow_xy_bank (narrow_xy_bank),
-
- .rcmb_wide_xy_bank (rcmb_wide_xy_bank),
- .rcmb_wide_xy_addr (rcmb_wide_xy_addr),
- .rcmb_wide_x_dout (rcmb_wide_x_dout),
- .rcmb_wide_y_dout (rcmb_wide_y_dout),
- .rcmb_wide_xy_valid (rcmb_wide_xy_valid),
-
- .rcmb_narrow_xy_bank (rcmb_narrow_xy_bank),
- .rcmb_narrow_xy_addr (rcmb_narrow_xy_addr),
- .rcmb_narrow_x_dout (rcmb_narrow_x_dout),
- .rcmb_narrow_y_dout (rcmb_narrow_y_dout),
- .rcmb_narrow_xy_valid (rcmb_narrow_xy_valid),
-
- .rdct_narrow_xy_bank (rcmb_xy_bank),
- .rdct_narrow_xy_addr (rcmb_xy_addr),
- .rdct_narrow_x_dout (rcmb_x_dout),
- .rdct_narrow_y_dout (rcmb_y_dout),
- .rdct_narrow_xy_valid (rcmb_xy_valid)
-
- );
-
-
- //
- // Recombinator Enable Logic
- //
- always @(posedge clk)
- //
- if (rst) rcmb_ena <= 1'b0;
- else rcmb_ena <= dsp_xy_ce_a && !dsp_xy_ce_b && !dsp_xy_ce_m && !dsp_xy_ce_p;
-
-
- //
- // Handy Completion Flags
- //
- wire square_done = square_surely_done_flop;
- wire triangle_done = !col_is_last ? triangle_surely_done_flop : triangle_tardy_done_flop;
- wire rectangle_done = rectangle_tardy_done_flop;
-
-
- //
- // FSM Transition Logic
- //
- assign fsm_state_after_mult_square = col_is_last ? MMM_FSM_STATE_MULT_SQUARE_HOLDOFF : MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT;
- assign fsm_state_after_mult_triangle = col_is_last ? MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT;
- assign fsm_state_after_mult_rectangle = col_is_last ? MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT;
-
- always @* begin
- //
- fsm_state_next = MMM_FSM_STATE_IDLE;
- //
- case (fsm_state)
- MMM_FSM_STATE_IDLE: fsm_state_next = ena ? MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT : MMM_FSM_STATE_IDLE;
-
- MMM_FSM_STATE_MULT_SQUARE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG ;
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY ;
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: fsm_state_next = square_done ? MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT : MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY;
-
- MMM_FSM_STATE_MULT_SQUARE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG ;
- MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY ;
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: fsm_state_next = square_done ? fsm_state_after_mult_square : MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY;
-
- MMM_FSM_STATE_MULT_SQUARE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_SQUARE_HOLDOFF;
-
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG ;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY ;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: fsm_state_next = triangle_done ? MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY;
-
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG ;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY ;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: fsm_state_next = triangle_done ? fsm_state_after_mult_triangle : MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY;
-
- MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT : MMM_FSM_STATE_MULT_TRIANGLE_HOLDOFF;
-
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG ;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY ;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: fsm_state_next = rectangle_done ? MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT : MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY;
-
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_INIT: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG ;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
-
- MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? MMM_FSM_STATE_STOP : MMM_FSM_STATE_MULT_RECTANGLE_HOLDOFF;
-
- default: fsm_state_next = MMM_FSM_STATE_IDLE ;
-
- endcase
- //
- end
-
-
- //
- // Reductor Control Logic
- //
- reg rdct_ena_reg = 1'b0;
-
- assign rdct_ena = rdct_ena_reg;
-
- always @(posedge clk) // add reset!!!
- //
- case (fsm_state)
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1;
- default: rdct_ena_reg <= 1'b0;
- endcase
-
-
-
-endmodule
diff --git a/rtl/_modexpng_recombinator_block.v b/rtl/_modexpng_recombinator_block.v
deleted file mode 100644
index 61bf734..0000000
--- a/rtl/_modexpng_recombinator_block.v
+++ /dev/null
@@ -1,1225 +0,0 @@
-module modexpng_recombinator_block
-(
- clk, rst,
- ena, rdy,
- mmm_fsm_state_next,
- word_index_last,
- dsp_xy_ce_p,
- dsp_x_p, dsp_y_p,
- col_index, col_index_last,
- rd_narrow_xy_addr, rd_narrow_xy_bank,
- rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_dout, rcmb_wide_y_dout, rcmb_wide_xy_valid,
- rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_dout, rcmb_narrow_y_dout, rcmb_narrow_xy_valid,
- rdct_narrow_xy_bank, rdct_narrow_xy_addr, rdct_narrow_x_dout, rdct_narrow_y_dout, rdct_narrow_xy_valid
-);
-
-
- //
- // Headers
- //
- `include "../rtl/modexpng_parameters.vh"
- `include "../rtl/modexpng_mmm_fsm.vh"
-
-
- input clk;
- input rst;
- input ena;
- output rdy;
- input [ MMM_FSM_STATE_W -1:0] mmm_fsm_state_next;
- input [ OP_ADDR_W -1:0] word_index_last;
- input dsp_xy_ce_p;
- input [(NUM_MULTS+1) * MAC_W -1:0] dsp_x_p;
- input [(NUM_MULTS+1) * MAC_W -1:0] dsp_y_p;
- input [ COL_INDEX_W -1:0] col_index;
- input [ COL_INDEX_W -1:0] col_index_last;
-
- input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
- input [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
-
- output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
- output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr;
- output [ WORD_EXT_W -1:0] rcmb_wide_x_dout;
- output [ WORD_EXT_W -1:0] rcmb_wide_y_dout;
- output rcmb_wide_xy_valid;
-
- output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
- output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
- output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout;
- output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout;
- output rcmb_narrow_xy_valid;
-
- output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank;
- output [ OP_ADDR_W -1:0] rdct_narrow_xy_addr;
- output [ WORD_EXT_W -1:0] rdct_narrow_x_dout;
- output [ WORD_EXT_W -1:0] rdct_narrow_y_dout;
- output rdct_narrow_xy_valid;
-
-
- //
- // Latches
- //
- reg [MAC_W-1:0] dsp_x_p_latch[0:NUM_MULTS];
- reg [MAC_W-1:0] dsp_y_p_latch[0:NUM_MULTS];
-
-
- //
- // Mapping
- //
- wire [MAC_W-1:0] dsp_x_p_split[0:NUM_MULTS];
- wire [MAC_W-1:0] dsp_y_p_split[0:NUM_MULTS];
-
- genvar z;
- generate for (z=0; z<NUM_MULTS_AUX; z=z+1)
- begin : gen_dsp_xy_p_split
- assign dsp_x_p_split[z] = dsp_x_p[z*MAC_W +: MAC_W];
- assign dsp_y_p_split[z] = dsp_y_p[z*MAC_W +: MAC_W];
- end
- endgenerate
-
-
- //
- // Delays
- //
- reg dsp_xy_ce_p_dly1 = 1'b0;
-
- always @(posedge clk)
- //
- if (rst) dsp_xy_ce_p_dly1 <= 1'b0;
- else dsp_xy_ce_p_dly1 <= dsp_xy_ce_p;
-
-
- //
- // Registers
- //
-
- // valid
- reg xy_valid_lsb = 1'b0;
- reg xy_aux_lsb = 1'b0;
- reg xy_valid_msb = 1'b0;
-
- // bitmap
- reg [NUM_MULTS-1:0] xy_bitmap_lsb = {NUM_MULTS{1'b0}};
- reg [NUM_MULTS-1:0] xy_bitmap_msb = {NUM_MULTS{1'b0}};
-
- // index
- reg [2:0] xy_index_lsb = 3'dX;
-
- // purge
- reg xy_purge_lsb = 1'b0;
- reg xy_purge_msb = 1'b0;
-
- // valid - latch
- reg xy_valid_latch_lsb = 1'b0;
-
- // aux - latch
- reg xy_aux_latch_lsb = 1'b0;
-
- // bitmap - latch
- reg [NUM_MULTS-1:0] xy_bitmap_latch_lsb = MULT_BITMAP_ZEROES;
- reg [NUM_MULTS-1:0] xy_bitmap_latch_msb = MULT_BITMAP_ZEROES;
-
- // index - latch
- reg [MAC_INDEX_W-1:0] xy_index_latch_lsb = MAC_INDEX_DONT_CARE;
-
- // purge - index
- reg xy_purge_latch_lsb = 1'b0;
- reg xy_purge_latch_msb = 1'b0;
-
- //
- reg xy_valid_lsb_adv[1:6];
- reg xy_valid_msb_adv[1:6];
- reg xy_aux_lsb_adv[1:6];
- reg [NUM_MULTS-1:0] xy_bitmap_lsb_adv[1:6];
- reg [NUM_MULTS-1:0] xy_bitmap_msb_adv[1:6];
- reg [MAC_INDEX_W-1:0] xy_index_lsb_adv[1:6];
- reg [MAC_INDEX_W-1:0] xy_index_msb_adv[1:6];
- reg xy_purge_lsb_adv[1:6];
- reg xy_purge_msb_adv[1:6];
-
- reg [1:0] rcmb_mode;
-
- always @(posedge clk)
- //
- if (ena)
- //
- case (mmm_fsm_state_next)
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1;
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2;
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3;
- default: rcmb_mode <= 2'd0;
- endcase
-
-
- integer i;
- initial for (i=1; i<6; i=i+1) begin
- xy_valid_lsb_adv[i] = 1'b0;
- xy_valid_msb_adv[i] = 1'b0;
- xy_aux_lsb_adv[i] = 1'b0;
- xy_bitmap_lsb_adv[i] = {8{1'b0}};
- xy_bitmap_msb_adv[i] = {8{1'b0}};
- xy_index_lsb_adv[i] = 3'dX;
- xy_index_msb_adv[i] = 3'dX;
- xy_purge_lsb_adv[i] = 1'b0;
- xy_purge_msb_adv[i] = 1'b0;
- end
-
- function calc_square_triangle_valid_lsb;
- //
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [ OP_ADDR_W-1:0] narrow_xy_addr_value;
- begin
- //
- if (narrow_xy_addr_value[7:3] == col_index_value)
- calc_square_triangle_valid_lsb = 1'b1;
- else
- calc_square_triangle_valid_lsb = 1'b0;
- //
- end
- endfunction
-
- function calc_square_valid_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [ OP_ADDR_W-1:0] narrow_xy_addr_value;
- begin
- calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
- end
- endfunction
-
- function calc_triangle_valid_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
- end
- endfunction
-
- function calc_rectangle_valid_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- //
- if (narrow_xy_addr_value[7:3] == col_index_value)
- calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT;
- else
- calc_rectangle_valid_lsb = 1'b0;
- //
- end
- endfunction
-
- function calc_triangle_aux_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- //
- if (narrow_xy_bank_value == BANK_NARROW_EXT)
- calc_triangle_aux_lsb = 1'b1;
- else
- calc_triangle_aux_lsb = 1'b0;
- //
- end
- endfunction
-
- function [7:0] calc_square_triangle_bitmap_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- //
- if (narrow_xy_addr_value[7:3] == col_index_value)
- //
- case (narrow_xy_addr_value[2:0])
- 3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001;
- 3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010;
- 3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100;
- 3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000;
- 3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000;
- 3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000;
- 3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000;
- 3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000;
- endcase
- //
- else
- calc_square_triangle_bitmap_lsb = {8{1'b0}};
- //
- end
- endfunction
-
- function [7:0] calc_square_bitmap_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
- end
- endfunction
-
- function [7:0] calc_triangle_bitmap_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
- end
- endfunction
-
- function [7:0] calc_rectangle_bitmap_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- //
- if ((narrow_xy_addr_value[7:3] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT))
- //
- case (narrow_xy_addr_value[2:0])
- 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001;
- 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010;
- 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100;
- 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000;
- 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000;
- 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000;
- 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000;
- 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000;
- endcase
- //
- else
- calc_rectangle_bitmap_lsb = {8{1'b0}};
- //
- end
- endfunction
-
- /*
- * These can be simplified (the difference between square/triangle and
- * rectangle is that the bank is checked or not). A universal function would
- * accept a parameter that tells it whether it should check the bank or not.
- * Let's do it later, too early to optimize now, it seems.
- *
- *
- */
-
- function [2:0] calc_square_triangle_index_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- //
- if (narrow_xy_addr_value[7:3] == col_index_value)
- //
- case (narrow_xy_addr_value[2:0])
- 3'b000: calc_square_triangle_index_lsb = 3'd0;
- 3'b001: calc_square_triangle_index_lsb = 3'd1;
- 3'b010: calc_square_triangle_index_lsb = 3'd2;
- 3'b011: calc_square_triangle_index_lsb = 3'd3;
- 3'b100: calc_square_triangle_index_lsb = 3'd4;
- 3'b101: calc_square_triangle_index_lsb = 3'd5;
- 3'b110: calc_square_triangle_index_lsb = 3'd6;
- 3'b111: calc_square_triangle_index_lsb = 3'd7;
- endcase
- //
- else
- calc_square_triangle_index_lsb = 3'dX;
- //
- end
- endfunction
-
- function [2:0] calc_square_index_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
- end
- endfunction
-
- function [2:0] calc_triangle_index_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
- end
- endfunction
-
- function [2:0] calc_rectangle_index_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] slim_bram_xy_bank_value;
- input [7:0] slim_bram_xy_addr_value;
- begin
- //
- if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT))
- //
- case (slim_bram_xy_addr_value[2:0])
- 3'b000: calc_rectangle_index_lsb = 3'd0;
- 3'b001: calc_rectangle_index_lsb = 3'd1;
- 3'b010: calc_rectangle_index_lsb = 3'd2;
- 3'b011: calc_rectangle_index_lsb = 3'd3;
- 3'b100: calc_rectangle_index_lsb = 3'd4;
- 3'b101: calc_rectangle_index_lsb = 3'd5;
- 3'b110: calc_rectangle_index_lsb = 3'd6;
- 3'b111: calc_rectangle_index_lsb = 3'd7;
- endcase
- //
- else
- calc_rectangle_index_lsb = 3'dX;
- //
- end
- endfunction
-
- function calc_square_rectangle_purge_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- //
- if (narrow_xy_addr_value[7:3] == col_index_value)
- calc_square_rectangle_purge_lsb = narrow_xy_addr_value[7:3] == col_index_last_value;
- else
- calc_square_rectangle_purge_lsb = 1'b0;
- //
- end
- endfunction
-
- function calc_square_purge_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
- end
- endfunction
-
- function calc_rectangle_purge_lsb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- begin
- calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value);
- end
- endfunction
-
- function calc_square_valid_msb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- input [7:0] index_last_value;
- begin
- //
- if (narrow_xy_addr_value == index_last_value)
- calc_square_valid_msb = 1'b1;
- else
- calc_square_valid_msb = 1'b0;
- //
- end
- endfunction
-
- function calc_rectangle_valid_msb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- input [7:0] index_last_value;
- begin
- //
- if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT))
- calc_rectangle_valid_msb = 1'b1;
- else
- calc_rectangle_valid_msb = 1'b0;
- //
- end
- endfunction
-
- function [7:0] calc_square_bitmap_msb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- input [7:0] index_last_value;
- begin
- //
- if (narrow_xy_addr_value == index_last_value) begin
- calc_square_bitmap_msb[7] = col_index_value != col_index_last_value;
- calc_square_bitmap_msb[6:0] = 7'b1111111;
- end else
- calc_square_bitmap_msb[7:0] = 8'b00000000;
- //
- end
- endfunction
-
- function [7:0] calc_rectangle_bitmap_msb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- input [7:0] index_last_value;
- begin
- //
- if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) begin
- calc_rectangle_bitmap_msb[7:0] = 8'b11111111;
- end else
- calc_rectangle_bitmap_msb[7:0] = 8'b00000000;
- //
- end
- endfunction
-
- function calc_square_purge_msb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- input [7:0] index_last_value;
- begin
- //
- if (narrow_xy_addr_value == index_last_value)
- calc_square_purge_msb = col_index_value == col_index_last_value;
- else
- calc_square_purge_msb = 1'b0;
- //
- end
- endfunction
-
- function calc_rectangle_purge_msb;
- input [COL_INDEX_W-1:0] col_index_value;
- input [COL_INDEX_W-1:0] col_index_last_value;
- input [BANK_ADDR_W-1:0] narrow_xy_bank_value;
- input [7:0] narrow_xy_addr_value;
- input [7:0] index_last_value;
- begin
- //
- if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT))
- calc_rectangle_purge_msb = col_index_value == col_index_last_value;
- else
- calc_rectangle_purge_msb = 1'b0;
- //
- end
- endfunction
-
-
- reg rcmb_xy_lsb_ce = 1'b0;
- reg rcmb_xy_lsb_ce_aux;
- reg [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000;
- wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0];
- reg rcmb_xy_lsb_clr;
-
- reg [46:0] rcmb_x_lsb_din;
- reg [46:0] rcmb_y_lsb_din;
- wire [15:0] rcmb_x_lsb_dout;
- wire [15:0] rcmb_y_lsb_dout;
-
- reg rcmb_xy_msb_ce = 1'b0;
- reg [ 1:0] rcmb_xy_msb_ce_purge = 2'b00;
- wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0];
- reg rcmb_xy_msb_clr;
-
- reg [46:0] rcmb_x_msb_din;
- reg [46:0] rcmb_y_msb_din;
- wire [15:0] rcmb_x_msb_dout;
- wire [15:0] rcmb_y_msb_dout;
-
- modexpng_recombinator_cell recomb_x_lsb
- (
- .clk (clk),
- .ce (rcmb_xy_lsb_ce_combined),
- .clr (rcmb_xy_lsb_clr),
- .din (rcmb_x_lsb_din),
- .dout (rcmb_x_lsb_dout)
- );
- modexpng_recombinator_cell recomb_y_lsb
- (
- .clk (clk),
- .ce (rcmb_xy_lsb_ce_combined),
- .clr (rcmb_xy_lsb_clr),
- .din (rcmb_y_lsb_din),
- .dout (rcmb_y_lsb_dout)
- );
-
- modexpng_recombinator_cell recomb_x_msb
- (
- .clk (clk),
- .ce (rcmb_xy_msb_ce_combined),
- .clr (rcmb_xy_msb_clr),
- .din (rcmb_x_msb_din),
- .dout (rcmb_x_msb_dout)
- );
-
- modexpng_recombinator_cell recomb_y_msb
- (
- .clk (clk),
- .ce (rcmb_xy_msb_ce_combined),
- .clr (rcmb_xy_msb_clr),
- .din (rcmb_y_msb_din),
- .dout (rcmb_y_msb_dout)
- );
-
- always @(posedge clk) begin
- //
- rcmb_xy_lsb_ce <= xy_valid_latch_lsb;
- rcmb_xy_lsb_ce_aux <= xy_aux_latch_lsb;
- rcmb_xy_msb_ce <= xy_bitmap_latch_msb[0];
- //
- if (xy_purge_latch_lsb)
- rcmb_xy_lsb_ce_purge <= 3'b111;
- else
- rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]};
- //
- if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1])
- rcmb_xy_msb_ce_purge = 2'b11;
- else
- rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]};
- //
- end
-
-
- always @(posedge clk)
- //
- if (ena) begin
- rcmb_xy_lsb_clr <= 1'b1;
- rcmb_xy_msb_clr <= 1'b1;
- end else begin
- if (rcmb_xy_lsb_ce) rcmb_xy_lsb_clr <= 1'b0;
- if (rcmb_xy_msb_ce) rcmb_xy_msb_clr <= 1'b0;
- end
-
- always @(posedge clk)
- //
- if (xy_valid_latch_lsb) begin
- rcmb_x_lsb_din <= dsp_x_p_latch[xy_index_latch_lsb];
- rcmb_y_lsb_din <= dsp_y_p_latch[xy_index_latch_lsb];
- end else if (xy_aux_latch_lsb) begin
- rcmb_x_lsb_din <= dsp_x_p_latch[8];
- rcmb_y_lsb_din <= dsp_y_p_latch[8];
- end else begin
- rcmb_x_lsb_din <= {47{1'b0}};
- rcmb_y_lsb_din <= {47{1'b0}};
- end
-
- always @(posedge clk)
- //
- if (xy_bitmap_latch_msb[0]) begin
- rcmb_x_msb_din <= dsp_x_p_latch[0];
- rcmb_y_msb_din <= dsp_y_p_latch[0];
- end else begin
- rcmb_x_msb_din <= {47{1'b0}};
- rcmb_y_msb_din <= {47{1'b0}};
- end
-
-
- always @(posedge clk)
- //
- case (mmm_fsm_state_next)
- //
- MMM_FSM_STATE_MULT_SQUARE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_SQUARE_COL_N_BUSY: begin
- //
- xy_valid_lsb_adv [6] <= calc_square_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- xy_aux_lsb_adv [6] <= 1'b0;
- xy_bitmap_lsb_adv[6] <= calc_square_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- xy_index_lsb_adv [6] <= calc_square_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- xy_purge_lsb_adv [6] <= calc_square_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- //
- xy_valid_msb_adv [6] <= calc_square_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
- xy_bitmap_msb_adv[6] <= calc_square_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
- xy_purge_msb_adv [6] <= calc_square_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
- //
- end
- //
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_TRIANGLE_COL_N_BUSY: begin
- //
- xy_valid_lsb_adv [6] <= calc_triangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- xy_aux_lsb_adv [6] <= calc_triangle_aux_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- xy_bitmap_lsb_adv[6] <= calc_triangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- xy_index_lsb_adv [6] <= calc_triangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- xy_purge_lsb_adv [6] <= 1'b0;
- //
- xy_valid_msb_adv [6] <= 1'b0;
- xy_bitmap_msb_adv[6] <= {8{1'b0}};
- xy_purge_msb_adv [6] <= 1'b0;
- //
- end
- //
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_TRIG,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY,
- MMM_FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: begin
- //
- xy_valid_lsb_adv [6] <= calc_rectangle_valid_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- xy_aux_lsb_adv [6] <= 1'b0;
- xy_bitmap_lsb_adv[6] <= calc_rectangle_bitmap_lsb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- xy_index_lsb_adv [6] <= calc_rectangle_index_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- xy_purge_lsb_adv [6] <= calc_rectangle_purge_lsb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr);
- //
- xy_valid_msb_adv [6] <= calc_rectangle_valid_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
- xy_bitmap_msb_adv[6] <= calc_rectangle_bitmap_msb(col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
- xy_purge_msb_adv [6] <= calc_rectangle_purge_msb (col_index, col_index_last, rd_narrow_xy_bank, rd_narrow_xy_addr, word_index_last);
- //
- end
- //
- default: begin
- //
- xy_valid_lsb_adv [6] <= 1'b0;
- xy_aux_lsb_adv [6] <= 1'b0;
- xy_bitmap_lsb_adv[6] <= {8{1'b0}};
- xy_index_lsb_adv [6] <= 3'dX;
- xy_purge_lsb_adv [6] <= 1'b0;
- //
- xy_valid_msb_adv [6] <= 1'b0;
- xy_bitmap_msb_adv[6] <= {8{1'b0}};
- xy_purge_msb_adv [6] <= 1'b0;
- //
- end
- //
- endcase
-
-
- always @(posedge clk) begin
- //
- xy_valid_lsb <= xy_valid_lsb_adv [1];
- xy_aux_lsb <= xy_aux_lsb_adv [1];
- xy_bitmap_lsb <= xy_bitmap_lsb_adv[1];
- xy_index_lsb <= xy_index_lsb_adv [1];
- xy_purge_lsb <= xy_purge_lsb_adv [1];
- //
- xy_valid_latch_lsb <= xy_valid_lsb;
- xy_aux_latch_lsb <= xy_aux_lsb;
- xy_bitmap_latch_lsb <= xy_bitmap_lsb;
- xy_index_latch_lsb <= xy_index_lsb;
- xy_purge_latch_lsb <= xy_purge_lsb;
- //
- xy_valid_msb <= xy_valid_msb_adv[1];
- xy_bitmap_msb <= xy_bitmap_msb_adv[1];
- xy_purge_msb <= xy_purge_msb_adv[1];
- //
- if (xy_valid_msb) begin
- xy_bitmap_latch_msb <= xy_bitmap_msb;
- xy_purge_latch_msb <= xy_purge_msb;
- end else begin
- xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[7:1]};
- end
- //
- //
- for (i=1; i<6; i=i+1) begin
- xy_valid_lsb_adv [i] <= xy_valid_lsb_adv [i+1];
- xy_aux_lsb_adv [i] <= xy_aux_lsb_adv [i+1];
- xy_bitmap_lsb_adv[i] <= xy_bitmap_lsb_adv[i+1];
- xy_index_lsb_adv [i] <= xy_index_lsb_adv [i+1];
- xy_purge_lsb_adv [i] <= xy_purge_lsb_adv [i+1];
- //
- xy_valid_msb_adv [i] <= xy_valid_msb_adv [i+1];
- xy_bitmap_msb_adv[i] <= xy_bitmap_msb_adv[i+1];
- xy_purge_msb_adv [i] <= xy_purge_msb_adv [i+1];
- end
- //
- end
-
- always @(posedge clk)
- //
- if (xy_bitmap_latch_msb[1]) // only shift 7 times
- //
- for (i=0; i<8; i=i+1)
- //
- if (i < 7) begin
- dsp_x_p_latch[i] <= dsp_x_p_latch[i+1];
- dsp_y_p_latch[i] <= dsp_y_p_latch[i+1];
- end else begin
- dsp_x_p_latch[i] <= {47{1'bX}};
- dsp_y_p_latch[i] <= {47{1'bX}};
- end
- //
- else if (dsp_xy_ce_p_dly1) begin
- //
- for (i=0; i<8; i=i+1)
- //
- if (xy_bitmap_lsb[i]) begin
- dsp_x_p_latch[i] <= dsp_x_p_split[i];
- dsp_y_p_latch[i] <= dsp_y_p_split[i];
- end else if (xy_valid_msb && xy_bitmap_msb[i]) begin
- dsp_x_p_latch[i] <= dsp_x_p_split[i];
- dsp_y_p_latch[i] <= dsp_y_p_split[i];
- end
- //
- if (xy_aux_lsb) begin
- dsp_x_p_latch[8] <= dsp_x_p_split[8];
- dsp_y_p_latch[8] <= dsp_y_p_split[8];
- end
- //
- end
-
- reg rcmb_xy_lsb_valid = 1'b0;
- reg rcmb_xy_msb_valid = 1'b0;
-
- always @(posedge clk)
- //
- if (rst) begin
- rcmb_xy_lsb_valid <= 1'b0;
- rcmb_xy_msb_valid <= 1'b0;
- end else begin
- rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined;
- rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined;
- end
-
-
- reg [BANK_ADDR_W-1:0] wide_xy_bank;
- reg [ 7:0] wide_xy_addr;
- reg [17:0] wide_x_dout;
- reg [17:0] wide_y_dout;
- reg wide_xy_valid = 1'b0;
-
- reg [BANK_ADDR_W-1:0] narrow_xy_bank;
- reg [ 7:0] narrow_xy_addr;
- reg [17:0] narrow_x_dout;
- reg [17:0] narrow_y_dout;
- reg narrow_xy_valid = 1'b0;
-
- reg [BANK_ADDR_W-1:0] rdct_xy_bank;
- reg [ 7:0] rdct_xy_addr;
- reg [17:0] rdct_x_dout;
- reg [17:0] rdct_y_dout;
- reg rdct_xy_valid = 1'b0;
-
- reg [ 7:0] cnt_lsb;
- reg [ 7:0] cnt_msb;
-
- reg cnt_lsb_wrapped;
- reg cnt_msb_wrapped;
-
- reg [31:0] rcmb_xy_msb_carry_0;
- reg [31:0] rcmb_xy_msb_carry_1;
-
- reg [31:0] rcmb_xy_msb_delay_0;
- reg [31:0] rcmb_xy_msb_delay_1;
- reg [31:0] rcmb_xy_msb_delay_2;
-
- reg [ 7:0] rcmb_msb_cnt_delay_0 = 8'd0;
- reg [ 7:0] rcmb_msb_cnt_delay_1 = 8'd0;
- reg [ 7:0] rcmb_msb_cnt_delay_2 = 8'd0;
-
- reg rcmb_msb_flag_delay_0;
- reg rcmb_msb_flag_delay_1;
- reg rcmb_msb_flag_delay_2;
-
- assign rcmb_wide_xy_bank = wide_xy_bank;
- assign rcmb_wide_xy_addr = wide_xy_addr;
- assign rcmb_wide_x_dout = wide_x_dout;
- assign rcmb_wide_y_dout = wide_y_dout;
- assign rcmb_wide_xy_valid = wide_xy_valid;
-
- assign rcmb_narrow_xy_bank = narrow_xy_bank;
- assign rcmb_narrow_xy_addr = narrow_xy_addr;
- assign rcmb_narrow_x_dout = narrow_x_dout;
- assign rcmb_narrow_y_dout = narrow_y_dout;
- assign rcmb_narrow_xy_valid = narrow_xy_valid;
-
- assign rdct_narrow_xy_bank = rdct_xy_bank;
- assign rdct_narrow_xy_addr = rdct_xy_addr;
- assign rdct_narrow_x_dout = rdct_x_dout;
- assign rdct_narrow_y_dout = rdct_y_dout;
- assign rdct_narrow_xy_valid = rdct_xy_valid;
-
- reg rdy_reg = 1'b1;
- reg rdy_adv = 1'b1;
-
- assign rdy = rdy_reg;
-
-
- always @(posedge clk)
- //
- if (ena) rdy_reg <= 1'b0;
- else rdy_reg <= rdy_adv;
-
- task advance_rcmb_msb_delay;
- input [15:0] dout_x;
- input [15:0] dout_y;
- input [ 7:0] cnt;
- input flag;
- begin
- //
- rcmb_xy_msb_delay_0 <= {dout_y, dout_x};
- rcmb_xy_msb_delay_1 <= rcmb_xy_msb_delay_0;
- rcmb_xy_msb_delay_2 <= rcmb_xy_msb_delay_1;
- //
- rcmb_msb_cnt_delay_0 <= cnt;
- rcmb_msb_cnt_delay_1 <= rcmb_msb_cnt_delay_0;
- rcmb_msb_cnt_delay_2 <= rcmb_msb_cnt_delay_1;
- //
- rcmb_msb_flag_delay_0 <= flag;
- rcmb_msb_flag_delay_1 <= rcmb_msb_flag_delay_0;
- rcmb_msb_flag_delay_2 <= rcmb_msb_flag_delay_1;
- //
- end
- endtask
-
- task shift_rcmb_msb_carry;
- input [15:0] dout_x;
- input [15:0] dout_y;
- begin
- rcmb_xy_msb_carry_0 <= {dout_y, dout_x};
- rcmb_xy_msb_carry_1 <= rcmb_xy_msb_carry_0;
- end
- endtask
-
- task _update_wide;
- input [BANK_ADDR_W-1:0] bank;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
- input valid;
- begin
- wide_xy_bank <= bank;
- wide_xy_addr <= addr;
- wide_x_dout <= dout_x;
- wide_y_dout <= dout_y;
- wide_xy_valid <= valid;
- end
- endtask
-
- task _update_narrow;
- input [BANK_ADDR_W-1:0] bank;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
- input valid;
- begin
- narrow_xy_bank <= bank;
- narrow_xy_addr <= addr;
- narrow_x_dout <= dout_x;
- narrow_y_dout <= dout_y;
- narrow_xy_valid <= valid;
- end
- endtask
-
- task _update_rdct;
- input [BANK_ADDR_W-1:0] bank;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
- input valid;
- begin
- rdct_xy_bank <= bank;
- rdct_xy_addr <= addr;
- rdct_x_dout <= dout_x;
- rdct_y_dout <= dout_y;
- rdct_xy_valid <= valid;
- end
- endtask
-
- task set_wide;
- input [BANK_ADDR_W-1:0] bank;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
- begin
- _update_wide(bank, addr, dout_x, dout_y, 1'b1);
- end
- endtask
-
- task set_narrow;
- input [BANK_ADDR_W-1:0] bank;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
- begin
- _update_narrow(bank, addr, dout_x, dout_y, 1'b1);
- end
- endtask
-
- task set_rdct;
- input [BANK_ADDR_W-1:0] bank;
- input [ 7:0] addr;
- input [17:0] dout_x;
- input [17:0] dout_y;
- begin
- _update_rdct(bank, addr, dout_x, dout_y, 1'b1);
- end
- endtask
-
- task clear_wide;
- begin
- _update_wide(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
- end
- endtask
-
- task clear_narrow;
- begin
- _update_narrow(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
- end
- endtask
-
- task clear_rdct;
- begin
- _update_rdct(BANK_DONT_CARE, 8'hXX, {18{1'bX}}, {18{1'bX}}, 1'b0);
- end
- endtask
-
- task _set_cnt_lsb;
- input [7:0] cnt;
- input wrapped;
- begin
- cnt_lsb <= cnt;
- cnt_lsb_wrapped <= wrapped;
- end
- endtask
-
- task _set_cnt_msb;
- input [7:0] cnt;
- input wrapped;
- begin
- cnt_msb <= cnt;
- cnt_msb_wrapped <= wrapped;
- end
- endtask
-
- task inc_cnt_lsb;
- begin
- if (cnt_lsb == word_index_last)
- _set_cnt_lsb(8'd0, 1'b1);
- else
- _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped);
- end
- endtask
-
- task inc_cnt_both;
- begin
- inc_cnt_lsb;
- inc_cnt_msb;
- end
- endtask
-
- task inc_cnt_msb;
- begin
- if (cnt_msb == word_index_last)
- _set_cnt_msb(8'd0, 1'b1);
- else
- _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped);
- end
- endtask
-
- task clr_cnt_lsb;
- begin
- _set_cnt_lsb(8'd0, 1'b0);
- end
- endtask
-
- task clr_cnt_msb;
- begin
- _set_cnt_msb(8'd0, 1'b0);
- end
- endtask
-
-
-
- wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid};
-
- always @(posedge clk)
- //
- if (ena) begin
- clr_cnt_lsb();
- clr_cnt_msb();
- end else if (!rdy)
- //
- case (rcmb_mode)
- 2'd1: recombine_square();
- 2'd2: recombine_triangle();
- 2'd3: recombine_rectangle();
- endcase
-
- wire [17:0] rcmb_x_lsb_dout_pad = {2'b00, rcmb_x_lsb_dout};
- wire [17:0] rcmb_y_lsb_dout_pad = {2'b00, rcmb_y_lsb_dout};
-
- wire [17:0] rcmb_x_msb_dout_pad = {2'b00, rcmb_x_msb_dout};
- wire [17:0] rcmb_y_msb_dout_pad = {2'b00, rcmb_y_msb_dout};
-
- wire [17:0] rcmb_x_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[15: 0]};
- wire [17:0] rcmb_y_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[31:16]};
-
- wire [17:0] rcmb_x_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_x_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[15: 0]}};
- wire [17:0] rcmb_y_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_y_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[31:16]}};
-
-
- task recombine_square;
- //
- begin
- //
- case (rcmb_xy_valid)
- //
- 2'b01: inc_cnt_lsb;
- 2'b10: inc_cnt_msb;
- 2'b11: inc_cnt_both;
- //
- endcase
- //
- case (rcmb_xy_valid)
- //
- 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
- else clear_wide;
- //
- 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
- //
- 2'b10: if (cnt_msb < 8'd2) clear_wide;
- else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
- //
- 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad);
- else set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
- //
- endcase
- //
- case (rcmb_xy_valid)
- //
- 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
- 2'b10: if (cnt_msb < 8'd2) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
- //
- 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
- if (cnt_lsb_wrapped) shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}});
- end
- //
- endcase
- //
- end
- //
- endtask
-
-
- task recombine_triangle;
- //
- begin
- //
- case (rcmb_xy_valid)
- //
- 2'b01: inc_cnt_lsb();
- //
- endcase
- //
- case (rcmb_xy_valid)
- //
- 2'b00: clear_narrow;
- 2'b01: if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
- else set_narrow(BANK_NARROW_EXT, 8'd1, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
- 2'b10: clear_narrow;
- 2'b11: clear_narrow;
- //
- endcase
- //
- end
- //
- endtask
-
-
- task recombine_rectangle;
- //
- begin
- //
- case (rcmb_xy_valid)
- //
- 2'b01: inc_cnt_lsb;
- 2'b10: inc_cnt_msb;
- 2'b11: inc_cnt_both;
- //
- endcase
-// //
- case (rcmb_xy_valid)
-// //
- 2'b00: if (rcmb_msb_flag_delay_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad);
- else clear_rdct;
- 2'b01: set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad);
- 2'b10: if (!cnt_msb_wrapped) begin
- if (cnt_msb < 8'd2) clear_rdct;
- else set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
- end else set_rdct(BANK_RCMB_EXT, 8'd0, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad);
-
- 2'b11: set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad);
-// //
- endcase
-// //
- case (rcmb_xy_valid)
-// //
- 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
- 2'b10: begin
- if ((cnt_msb < 8'd2) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout);
- if (cnt_msb_wrapped) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0);
- end
-// //
- 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1);
- shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}});
- end
-// //
- endcase
- //
- end
- //
- endtask
-
-
- always @(posedge clk)
- //
- if (ena) begin
- rdy_adv <= 1'b0;
- end else if (!rdy_reg) begin
- //
- case (rcmb_mode)
- //
- 2'd1: case (rcmb_xy_valid)
- //
- 2'b00: begin
- //
- if (rcmb_msb_flag_delay_2) begin
- //
- rdy_adv <= ~rcmb_msb_flag_delay_1;
- //
- end
- //
- end
- //
- endcase
- //
- 2'd2: case (rcmb_xy_valid)
- //
- 2'b01: rdy_adv <= cnt_lsb_wrapped; //
- //
- endcase
- //
- 2'd3: case (rcmb_xy_valid)
- //
- 2'b00: begin
- //
- if (rcmb_msb_flag_delay_2) begin
- //
- rdy_adv <= ~rcmb_msb_flag_delay_1;
- //
- end
- //
- end
- //
- endcase
- //
- endcase
- //
- end
-
-
-
- // add ready for mode=3
-endmodule
diff --git a/rtl/_modexpng_storage_block.v b/rtl/_modexpng_storage_block.v
deleted file mode 100644
index d6ef1ee..0000000
--- a/rtl/_modexpng_storage_block.v
+++ /dev/null
@@ -1,219 +0,0 @@
-module modexpng_storage_block
-(
- clk, rst,
-
- wr_wide_xy_ena,
- wr_wide_xy_bank,
- wr_wide_xy_addr,
- wr_wide_x_din,
- wr_wide_y_din,
-
- wr_narrow_xy_ena,
- wr_narrow_xy_bank,
- wr_narrow_xy_addr,
- wr_narrow_x_din,
- wr_narrow_y_din,
-
- rd_wide_xy_ena,
- rd_wide_xy_ena_aux,
- rd_wide_xy_bank,
- rd_wide_xy_bank_aux,
- rd_wide_xy_addr,
- rd_wide_xy_addr_aux,
- rd_wide_x_dout,
- rd_wide_y_dout,
- rd_wide_x_dout_aux,
- rd_wide_y_dout_aux,
-
- rd_narrow_xy_ena,
- rd_narrow_xy_bank,
- rd_narrow_xy_addr,
- rd_narrow_x_dout,
- rd_narrow_y_dout
-);
-
-
- //
- // Headers
- //
- `include "modexpng_parameters.vh"
-
-
- //
- // Ports
- //
- input clk;
- input rst;
-
- input wr_wide_xy_ena;
- input [BANK_ADDR_W -1:0] wr_wide_xy_bank;
- input [ OP_ADDR_W -1:0] wr_wide_xy_addr;
- input [ WORD_EXT_W -1:0] wr_wide_x_din;
- input [ WORD_EXT_W -1:0] wr_wide_y_din;
-
- input wr_narrow_xy_ena;
- input [BANK_ADDR_W -1:0] wr_narrow_xy_bank;
- input [ OP_ADDR_W -1:0] wr_narrow_xy_addr;
- input [ WORD_EXT_W -1:0] wr_narrow_x_din;
- input [ WORD_EXT_W -1:0] wr_narrow_y_din;
-
- input rd_wide_xy_ena;
- input rd_wide_xy_ena_aux;
- input [ BANK_ADDR_W -1:0] rd_wide_xy_bank;
- input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
- input [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr;
- input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux;
- output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout;
- output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout;
- output [ WORD_EXT_W -1:0] rd_wide_x_dout_aux;
- output [ WORD_EXT_W -1:0] rd_wide_y_dout_aux;
-
- input rd_narrow_xy_ena;
- input [BANK_ADDR_W -1:0] rd_narrow_xy_bank;
- input [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
- output [ WORD_EXT_W -1:0] rd_narrow_x_dout;
- output [ WORD_EXT_W -1:0] rd_narrow_y_dout;
-
-
- //
- // Internal Registers
- //
- reg rd_wide_xy_reg_ena = 1'b0;
- reg rd_wide_xy_reg_ena_aux = 1'b0;
- reg rd_narrow_xy_reg_ena = 1'b0;
-
- always @(posedge clk) begin
- //
- rd_wide_xy_reg_ena <= rst ? 1'b0 : rd_wide_xy_ena;
- rd_wide_xy_reg_ena_aux <= rst ? 1'b0 : rd_wide_xy_ena_aux;
- rd_narrow_xy_reg_ena <= rst ? 1'b0 : rd_narrow_xy_ena;
- //
- end
-
-
- //
- // Helper Signals
- //
- wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_wide_xy_offset;
- wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset[0:NUM_MULTS_HALF-1];
- wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_wide_xy_offset_aux;
- wire [BANK_ADDR_W + OP_ADDR_W -1:0] wr_narrow_xy_offset;
- wire [BANK_ADDR_W + OP_ADDR_W -1:0] rd_narrow_xy_offset;
-
- assign wr_wide_xy_offset = {wr_wide_xy_bank, wr_wide_xy_addr};
- assign rd_wide_xy_offset_aux = {rd_wide_xy_bank_aux, rd_wide_xy_addr_aux};
- assign wr_narrow_xy_offset = {wr_narrow_xy_bank, wr_narrow_xy_addr};
- assign rd_narrow_xy_offset = {rd_narrow_xy_bank, rd_narrow_xy_addr};
-
- //
- // "Wide" Storage
- //
- genvar z;
- generate for (z=0; z<NUM_MULTS_HALF; z=z+1)
- begin : gen_wide_bram
- //
- assign rd_wide_xy_offset[z] = {rd_wide_xy_bank, rd_wide_xy_addr[z*OP_ADDR_W +: OP_ADDR_W]};
- //
- modexpng_sdp_36k_wrapper wide_bram_x
- (
- .clk (clk),
-
- .ena (wr_wide_xy_ena),
- .wea (wr_wide_xy_ena),
- .addra (wr_wide_xy_offset),
- .dina (wr_wide_x_din),
-
- .enb (rd_wide_xy_ena),
- .regceb (rd_wide_xy_reg_ena),
- .addrb (rd_wide_xy_offset[z]),
- .doutb (rd_wide_x_dout[z*WORD_EXT_W +: WORD_EXT_W])
- );
- //
- modexpng_sdp_36k_wrapper wide_bram_y
- (
- .clk (clk),
-
- .ena (wr_wide_xy_ena),
- .wea (wr_wide_xy_ena),
- .addra (wr_wide_xy_offset),
- .dina (wr_wide_y_din),
-
- .enb (rd_wide_xy_ena),
- .regceb (rd_wide_xy_reg_ena),
- .addrb (rd_wide_xy_offset[z]),
- .doutb (rd_wide_y_dout[z*WORD_EXT_W +: WORD_EXT_W])
- );
- //
- end
- endgenerate
-
-
- //
- // Auxilary Storage
- //
- modexpng_sdp_36k_wrapper wide_bram_x_aux
- (
- .clk (clk),
-
- .ena (wr_wide_xy_ena),
- .wea (wr_wide_xy_ena),
- .addra (wr_wide_xy_offset),
- .dina (wr_wide_x_din),
-
- .enb (rd_wide_xy_ena_aux),
- .regceb (rd_wide_xy_reg_ena_aux),
- .addrb (rd_wide_xy_offset_aux),
- .doutb (rd_wide_x_dout_aux)
- );
- //
- modexpng_sdp_36k_wrapper wide_bram_y_aux
- (
- .clk (clk),
-
- .ena (wr_wide_xy_ena),
- .wea (wr_wide_xy_ena),
- .addra (wr_wide_xy_offset),
- .dina (wr_wide_y_din),
-
- .enb (rd_wide_xy_ena_aux),
- .regceb (rd_wide_xy_reg_ena_aux),
- .addrb (rd_wide_xy_offset_aux),
- .doutb (rd_wide_y_dout_aux)
- );
-
-
- //
- // "Narrow" Storage
- //
- modexpng_sdp_36k_wrapper narrow_bram_x
- (
- .clk (clk),
-
- .ena (wr_narrow_xy_ena),
- .wea (wr_narrow_xy_ena),
- .addra (wr_narrow_xy_offset),
- .dina (wr_narrow_x_din),
-
- .enb (rd_narrow_xy_ena),
- .regceb (rd_narrow_xy_reg_ena),
- .addrb (rd_narrow_xy_offset),
- .doutb (rd_narrow_x_dout)
- );
-
- modexpng_sdp_36k_wrapper narrow_bram_y
- (
- .clk (clk),
-
- .ena (wr_narrow_xy_ena),
- .wea (wr_narrow_xy_ena),
- .addra (wr_narrow_xy_offset),
- .dina (wr_narrow_y_din),
-
- .enb (rd_narrow_xy_ena),
- .regceb (rd_narrow_xy_reg_ena),
- .addrb (rd_narrow_xy_offset),
- .doutb (rd_narrow_y_dout)
- );
-
-
-endmodule
diff --git a/rtl/_modexpng_storage_manager.v b/rtl/_modexpng_storage_manager.v
deleted file mode 100644
index 958596a..0000000
--- a/rtl/_modexpng_storage_manager.v
+++ /dev/null
@@ -1,199 +0,0 @@
-module modexpng_storage_manager
-(
- clk, rst,
-
- wr_wide_xy_ena,
- wr_wide_xy_bank,
- wr_wide_xy_addr,
- wr_wide_x_din,
- wr_wide_y_din,
-
- wr_narrow_xy_ena,
- wr_narrow_xy_bank,
- wr_narrow_xy_addr,
- wr_narrow_x_din,
- wr_narrow_y_din,
-
- ext_wide_xy_ena,
- ext_wide_xy_bank,
- ext_wide_xy_addr,
- ext_wide_x_din,
- ext_wide_y_din,
-
- ext_narrow_xy_ena,
- ext_narrow_xy_bank,
- ext_narrow_xy_addr,
- ext_narrow_x_din,
- ext_narrow_y_din,
-
- rcmb_wide_xy_ena,
- rcmb_wide_xy_bank,
- rcmb_wide_xy_addr,
- rcmb_wide_x_din,
- rcmb_wide_y_din,
-
- rcmb_narrow_xy_ena,
- rcmb_narrow_xy_bank,
- rcmb_narrow_xy_addr,
- rcmb_narrow_x_din,
- rcmb_narrow_y_din
-);
-
-
- //
- // Headers
- //
- `include "../rtl/modexpng_parameters.vh"
-
-
- //
- // Ports
- //
- input clk;
- input rst;
-
- output wr_wide_xy_ena;
- output [BANK_ADDR_W -1:0] wr_wide_xy_bank;
- output [ OP_ADDR_W -1:0] wr_wide_xy_addr;
- output [ WORD_EXT_W -1:0] wr_wide_x_din;
- output [ WORD_EXT_W -1:0] wr_wide_y_din;
-
- output wr_narrow_xy_ena;
- output [BANK_ADDR_W -1:0] wr_narrow_xy_bank;
- output [ OP_ADDR_W -1:0] wr_narrow_xy_addr;
- output [ WORD_EXT_W -1:0] wr_narrow_x_din;
- output [ WORD_EXT_W -1:0] wr_narrow_y_din;
-
- input ext_wide_xy_ena;
- input [BANK_ADDR_W -1:0] ext_wide_xy_bank;
- input [ OP_ADDR_W -1:0] ext_wide_xy_addr;
- input [ WORD_EXT_W -1:0] ext_wide_x_din;
- input [ WORD_EXT_W -1:0] ext_wide_y_din;
-
- input ext_narrow_xy_ena;
- input [BANK_ADDR_W -1:0] ext_narrow_xy_bank;
- input [ OP_ADDR_W -1:0] ext_narrow_xy_addr;
- input [ WORD_EXT_W -1:0] ext_narrow_x_din;
- input [ WORD_EXT_W -1:0] ext_narrow_y_din;
-
- input rcmb_wide_xy_ena;
- input [BANK_ADDR_W -1:0] rcmb_wide_xy_bank;
- input [ OP_ADDR_W -1:0] rcmb_wide_xy_addr;
- input [ WORD_EXT_W -1:0] rcmb_wide_x_din;
- input [ WORD_EXT_W -1:0] rcmb_wide_y_din;
-
- input rcmb_narrow_xy_ena;
- input [BANK_ADDR_W -1:0] rcmb_narrow_xy_bank;
- input [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr;
- input [ WORD_EXT_W -1:0] rcmb_narrow_x_din;
- input [ WORD_EXT_W -1:0] rcmb_narrow_y_din;
-
- reg wr_wide_xy_ena_reg = 1'b0;
- reg [BANK_ADDR_W -1:0] wr_wide_xy_bank_reg;
- reg [ OP_ADDR_W -1:0] wr_wide_xy_addr_reg;
- reg [ WORD_EXT_W -1:0] wr_wide_x_din_reg;
- reg [ WORD_EXT_W -1:0] wr_wide_y_din_reg;
-
- reg wr_narrow_xy_ena_reg = 1'b0;
- reg [BANK_ADDR_W -1:0] wr_narrow_xy_bank_reg;
- reg [ OP_ADDR_W -1:0] wr_narrow_xy_addr_reg;
- reg [ WORD_EXT_W -1:0] wr_narrow_x_din_reg;
- reg [ WORD_EXT_W -1:0] wr_narrow_y_din_reg;
-
- task _update_wide;
- input xy_ena;
- input [BANK_ADDR_W -1:0] xy_bank;
- input [ OP_ADDR_W -1:0] xy_addr;
- input [ WORD_EXT_W -1:0] x_din;
- input [ WORD_EXT_W -1:0] y_din;
- begin
- wr_wide_xy_ena_reg <= xy_ena;
- wr_wide_xy_bank_reg <= xy_bank;
- wr_wide_xy_addr_reg <= xy_addr;
- wr_wide_x_din_reg <= x_din;
- wr_wide_y_din_reg <= y_din;
- end
- endtask
-
- task _update_narrow;
- input xy_ena;
- input [BANK_ADDR_W -1:0] xy_bank;
- input [ OP_ADDR_W -1:0] xy_addr;
- input [ WORD_EXT_W -1:0] x_din;
- input [ WORD_EXT_W -1:0] y_din;
- begin
- wr_narrow_xy_ena_reg <= xy_ena;
- wr_narrow_xy_bank_reg <= xy_bank;
- wr_narrow_xy_addr_reg <= xy_addr;
- wr_narrow_x_din_reg <= x_din;
- wr_narrow_y_din_reg <= y_din;
- end
- endtask
-
- task enable_wide;
- input [BANK_ADDR_W -1:0] xy_bank;
- input [ OP_ADDR_W -1:0] xy_addr;
- input [ WORD_EXT_W -1:0] x_din;
- input [ WORD_EXT_W -1:0] y_din;
- begin
- _update_wide(1'b1, xy_bank, xy_addr, x_din, y_din);
- end
- endtask
-
- task enable_narrow;
- input [BANK_ADDR_W -1:0] xy_bank;
- input [ OP_ADDR_W -1:0] xy_addr;
- input [ WORD_EXT_W -1:0] x_din;
- input [ WORD_EXT_W -1:0] y_din;
- begin
- _update_narrow(1'b1, xy_bank, xy_addr, x_din, y_din);
- end
- endtask
-
- task disable_wide;
- begin
- _update_wide(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
- end
- endtask
-
- task disable_narrow;
- begin
- _update_narrow(1'b0, BANK_DONT_CARE, OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE);
- end
- endtask
-
- always @(posedge clk)
- //
- if (rst) disable_wide;
- else begin
- //
- if (ext_wide_xy_ena) enable_wide(ext_wide_xy_bank, ext_wide_xy_addr, ext_wide_x_din, ext_wide_y_din);
- else if (rcmb_wide_xy_ena) enable_wide(rcmb_wide_xy_bank, rcmb_wide_xy_addr, rcmb_wide_x_din, rcmb_wide_y_din);
- else disable_wide;
- //
- end
-
- always @(posedge clk)
- //
- if (rst) disable_narrow;
- else begin
- //
- if (ext_narrow_xy_ena) enable_narrow(ext_narrow_xy_bank, ext_narrow_xy_addr, ext_narrow_x_din, ext_narrow_y_din);
- else if (rcmb_narrow_xy_ena) enable_narrow(rcmb_narrow_xy_bank, rcmb_narrow_xy_addr, rcmb_narrow_x_din, rcmb_narrow_y_din);
- else disable_narrow;
- //
- end
-
- assign wr_wide_xy_ena = wr_wide_xy_ena_reg;
- assign wr_wide_xy_bank = wr_wide_xy_bank_reg;
- assign wr_wide_xy_addr = wr_wide_xy_addr_reg;
- assign wr_wide_x_din = wr_wide_x_din_reg;
- assign wr_wide_y_din = wr_wide_y_din_reg;
-
- assign wr_narrow_xy_ena = wr_narrow_xy_ena_reg;
- assign wr_narrow_xy_bank = wr_narrow_xy_bank_reg;
- assign wr_narrow_xy_addr = wr_narrow_xy_addr_reg;
- assign wr_narrow_x_din = wr_narrow_x_din_reg;
- assign wr_narrow_y_din = wr_narrow_y_din_reg;
-
-endmodule
diff --git a/rtl/modexpng_core_top.v b/rtl/modexpng_core_top.v
new file mode 100644
index 0000000..e834d37
--- /dev/null
+++ b/rtl/modexpng_core_top.v
@@ -0,0 +1,41 @@
+module modexpng_core_top
+(
+ clk, clk_bus,
+ rst,
+ next, valid,
+ word_index_last,
+ bus_cs,
+ bus_we,
+ bus_addr,
+ bus_data_wr,
+ bus_data_rd
+);
+
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input clk_bus;
+
+ input rst;
+
+ input next;
+ output valid;
+
+ input bus_cs;
+ input bus_we;
+ input [4 * (BANK_ADDR_W + BUS_OP_ADDR_W) -1:0] bus_addr;
+ input [ BUS_DATA_W -1:0] bus_data_wr;
+ output [ BUS_DATA_W -1:0] bus_data_rd;
+
+
+
+
+endmodule
diff --git a/rtl/modexpng_mmm_dual.v b/rtl/modexpng_mmm_dual.v
index df0f823..babd565 100644
--- a/rtl/modexpng_mmm_dual.v
+++ b/rtl/modexpng_mmm_dual.v
@@ -45,7 +45,7 @@ module modexpng_mmm_dual
rcmb_y_dout,
rcmb_xy_valid,
- rdct_ena
+ rdct_ena, rdct_rdy
);
@@ -110,6 +110,7 @@ module modexpng_mmm_dual
output rcmb_xy_valid;
output rdct_ena;
+ input rdct_rdy;
//
@@ -928,7 +929,11 @@ module modexpng_mmm_dual
FSM_STATE_MULT_RECTANGLE_COL_N_TRIG: fsm_state_next = FSM_STATE_MULT_RECTANGLE_COL_N_BUSY ;
FSM_STATE_MULT_RECTANGLE_COL_N_BUSY: fsm_state_next = rectangle_done ? fsm_state_after_mult_rectangle : FSM_STATE_MULT_RECTANGLE_COL_N_BUSY;
- FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_STOP : FSM_STATE_MULT_RECTANGLE_HOLDOFF;
+ FSM_STATE_MULT_RECTANGLE_HOLDOFF: fsm_state_next = rcmb_rdy ? FSM_STATE_WAIT_REDUCTOR : FSM_STATE_MULT_RECTANGLE_HOLDOFF;
+
+ FSM_STATE_WAIT_REDUCTOR: fsm_state_next = rdct_rdy ? FSM_STATE_STOP : FSM_STATE_WAIT_REDUCTOR;
+
+ FSM_STATE_STOP: fsm_state_next = FSM_STATE_IDLE ;
default: fsm_state_next = FSM_STATE_IDLE ;
@@ -944,13 +949,28 @@ module modexpng_mmm_dual
assign rdct_ena = rdct_ena_reg;
- always @(posedge clk) // add reset!!!
+ always @(posedge clk)
//
- case (fsm_state)
+ if (rst) rdct_ena_reg <= 1'b0;
+ else case (fsm_state)
FSM_STATE_MULT_RECTANGLE_COL_0_INIT: rdct_ena_reg <= 1'b1;
default: rdct_ena_reg <= 1'b0;
endcase
+ //
+ // Ready Logic
+ //
+ reg rdy_reg = 1'b1;
+
+ assign rdy = rdy_reg;
+
+ always @(posedge clk)
+ //
+ if (rst) rdy_reg <= 1'b1;
+ else begin
+ if (rdy && ena) rdy_reg <= 1'b0;
+ if (!rdy && (fsm_state == FSM_STATE_STOP)) rdy_reg <= 1'b1;
+ end
endmodule
diff --git a/rtl/modexpng_parameters.vh b/rtl/modexpng_parameters.vh
index 514fc21..6d63735 100644
--- a/rtl/modexpng_parameters.vh
+++ b/rtl/modexpng_parameters.vh
@@ -17,6 +17,9 @@ endfunction
localparam WORD_W = 16;
localparam WORD_EXT_W = 18;
localparam MAC_W = 47;
+localparam BUS_DATA_W = 32;
+
+localparam BUS_OP_ADDR_W = cryptech_clog2(MAX_OP_W / BUS_DATA_W);
localparam MAX_OP_W = 4096;
diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v
index de60d1f..fe4ffb9 100644
--- a/rtl/modexpng_recombinator_block.v
+++ b/rtl/modexpng_recombinator_block.v
@@ -1218,8 +1218,5 @@ module modexpng_recombinator_block
endcase
//
end
-
-
-
- // add ready for mode=3
+
endmodule
diff --git a/rtl/modexpng_sdp_36k_wrapper.v b/rtl/modexpng_sdp_36k_wrapper.v
index 0295697..433ee55 100644
--- a/rtl/modexpng_sdp_36k_wrapper.v
+++ b/rtl/modexpng_sdp_36k_wrapper.v
@@ -13,7 +13,7 @@ module modexpng_sdp_36k_wrapper
//
// Headers
//
- `include "../rtl/modexpng_parameters.vh"
+ `include "modexpng_parameters.vh"
//
diff --git a/rtl/modexpng_sdp_36k_x16_x32_wrapper.v b/rtl/modexpng_sdp_36k_x16_x32_wrapper.v
new file mode 100644
index 0000000..69c5383
--- /dev/null
+++ b/rtl/modexpng_sdp_36k_x16_x32_wrapper.v
@@ -0,0 +1,75 @@
+module modexpng_sdp_36k_x16_x32_wrapper
+(
+ clk, clk_bus,
+
+ ena, wea,
+ addra, dina,
+
+ enb, regceb,
+ addrb, doutb
+);
+
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input clk_bus;
+
+ input ena;
+ input wea;
+ input [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addra;
+ input [ BUD_DATA_W -1:0] dina;
+
+ input enb;
+ input regceb;
+ input [BANK_ADDR_W + OP_ADDR_W -1:0] addrb;
+ output [ WORD_W -1:0] doutb;
+
+
+ //
+ // BRAM_SDP_MACRO
+ //
+ BRAM_SDP_MACRO #
+ (
+ .DEVICE ("7SERIES"),
+
+ .BRAM_SIZE ("36Kb"),
+
+ .WRITE_WIDTH (BUD_DATA_W),
+ .READ_WIDTH (WORD_W),
+
+ .DO_REG (0),
+ .WRITE_MODE ("READ_FIRST"),
+
+ .SRVAL (72'h000000000000000000),
+ .INIT (72'h000000000000000000),
+
+ .INIT_FILE ("NONE"),
+ .SIM_COLLISION_CHECK ("NONE")
+ )
+ BRAM_SDP_MACRO_inst
+ (
+ .RST (1'b0),
+
+ .WRCLK (clk_bus),
+ .WREN (ena),
+ .WE ({4{wea}}),
+ .WRADDR (addra),
+ .DI (dina),
+
+ .RDCLK (clk),
+ .RDEN (enb),
+ .REGCE (regceb),
+ .RDADDR (addrb),
+ .DO (doutb)
+ );
+
+
+endmodule
diff --git a/rtl/modexpng_sdp_36k_x32_x16_wrapper.v b/rtl/modexpng_sdp_36k_x32_x16_wrapper.v
new file mode 100644
index 0000000..ff86802
--- /dev/null
+++ b/rtl/modexpng_sdp_36k_x32_x16_wrapper.v
@@ -0,0 +1,73 @@
+module modexpng_sdp_36k_x32_x16_wrapper
+(
+ clk, clk_bus,
+
+ ena, wea,
+ addra, dina,
+
+ enb,
+ addrb, doutb
+);
+
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input clk_bus;
+
+ input ena;
+ input wea;
+ input [BANK_ADDR_W + OP_ADDR_W -1:0] addra;
+ input [ WORD_W -1:0] dina;
+
+ input enb;
+ input [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addrb;
+ output [ BUS_DATA_W -1:0] doutb;
+
+
+ //
+ // BRAM_SDP_MACRO
+ //
+ BRAM_SDP_MACRO #
+ (
+ .DEVICE ("7SERIES"),
+
+ .BRAM_SIZE ("36Kb"),
+
+ .WRITE_WIDTH (WORD_W),
+ .READ_WIDTH (BUS_DATA_W),
+
+ .DO_REG (0),
+ .WRITE_MODE ("READ_FIRST"),
+
+ .SRVAL (72'h000000000000000000),
+ .INIT (72'h000000000000000000),
+
+ .INIT_FILE ("NONE"),
+ .SIM_COLLISION_CHECK ("NONE")
+ )
+ BRAM_SDP_MACRO_inst
+ (
+ .RST (1'b0),
+
+ .WRCLK (clk),
+ .WREN (ena),
+ .WE ({2{wea}}),
+ .WRADDR (addra),
+ .DI (dina),
+
+ .RDCLK (clk_bus),
+ .RDEN (enb),
+ .REGCE (1'b0),
+ .RDADDR (addrb),
+ .DO (doutb)
+ );
+
+endmodule
diff --git a/rtl/modexpng_storage_block.v b/rtl/modexpng_storage_block.v
index be04c7c..d5b9b24 100644
--- a/rtl/modexpng_storage_block.v
+++ b/rtl/modexpng_storage_block.v
@@ -1,6 +1,6 @@
module modexpng_storage_block
(
- clk, rst,
+ clk, clk_bus, rst,
wr_wide_xy_ena,
wr_wide_xy_bank,
@@ -29,7 +29,26 @@ module modexpng_storage_block
rd_narrow_xy_bank,
rd_narrow_xy_addr,
rd_narrow_x_dout,
- rd_narrow_y_dout
+ rd_narrow_y_dout,
+
+ bus_cs,
+ bus_we,
+ bus_addr,
+ bus_data_wr,
+ bus_data_rd,
+
+ in_1_en,
+ in_1_addr,
+ in_1_dout,
+
+ in_2_en,
+ in_2_addr,
+ in_2_dout,
+
+ out_en,
+ out_we,
+ out_addr,
+ out_din
);
//
@@ -37,47 +56,71 @@ module modexpng_storage_block
//
`include "modexpng_parameters.vh"
+
//
// Ports
//
- input clk;
- input rst;
-
- input wr_wide_xy_ena;
- input [ BANK_ADDR_W -1:0] wr_wide_xy_bank;
- input [ OP_ADDR_W -1:0] wr_wide_xy_addr;
- input [ WORD_EXT_W -1:0] wr_wide_x_din;
- input [ WORD_EXT_W -1:0] wr_wide_y_din;
-
- input wr_narrow_xy_ena;
- input [ BANK_ADDR_W -1:0] wr_narrow_xy_bank;
- input [ OP_ADDR_W -1:0] wr_narrow_xy_addr;
- input [ WORD_EXT_W -1:0] wr_narrow_x_din;
- input [ WORD_EXT_W -1:0] wr_narrow_y_din;
-
- input rd_wide_xy_ena;
- input rd_wide_xy_ena_aux;
- input [ BANK_ADDR_W -1:0] rd_wide_xy_bank;
- input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
- input [NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr;
- input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux;
- output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout;
- output [NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout;
- output [ WORD_EXT_W -1:0] rd_wide_x_dout_aux;
- output [ WORD_EXT_W -1:0] rd_wide_y_dout_aux;
-
- input rd_narrow_xy_ena;
- input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
- input [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
- output [ WORD_EXT_W -1:0] rd_narrow_x_dout;
- output [ WORD_EXT_W -1:0] rd_narrow_y_dout;
+ input clk;
+ input clk_bus;
+ input rst;
+ input wr_wide_xy_ena;
+ input [ BANK_ADDR_W -1:0] wr_wide_xy_bank;
+ input [ OP_ADDR_W -1:0] wr_wide_xy_addr;
+ input [ WORD_EXT_W -1:0] wr_wide_x_din;
+ input [ WORD_EXT_W -1:0] wr_wide_y_din;
+
+ input wr_narrow_xy_ena;
+ input [ BANK_ADDR_W -1:0] wr_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] wr_narrow_xy_addr;
+ input [ WORD_EXT_W -1:0] wr_narrow_x_din;
+ input [ WORD_EXT_W -1:0] wr_narrow_y_din;
+
+ input rd_wide_xy_ena;
+ input rd_wide_xy_ena_aux;
+ input [ BANK_ADDR_W -1:0] rd_wide_xy_bank;
+ input [ BANK_ADDR_W -1:0] rd_wide_xy_bank_aux;
+ input [ NUM_MULTS_HALF * OP_ADDR_W -1:0] rd_wide_xy_addr;
+ input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux;
+ output [ NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_x_dout;
+ output [ NUM_MULTS_HALF * WORD_EXT_W -1:0] rd_wide_y_dout;
+ output [ WORD_EXT_W -1:0] rd_wide_x_dout_aux;
+ output [ WORD_EXT_W -1:0] rd_wide_y_dout_aux;
+
+ input rd_narrow_xy_ena;
+ input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank;
+ input [ OP_ADDR_W -1:0] rd_narrow_xy_addr;
+ output [ WORD_EXT_W -1:0] rd_narrow_x_dout;
+ output [ WORD_EXT_W -1:0] rd_narrow_y_dout;
+
+ input bus_cs;
+ input bus_we;
+ input [2 + BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr;
+ input [ BUS_DATA_W -1:0] bus_data_wr;
+ output [ BUS_DATA_W -1:0] bus_data_rd;
+
+ input in_1_en;
+ input [ BANK_ADDR_W + OP_ADDR_W -1:0] in_1_addr;
+ output [ WORD_W -1:0] in_1_dout;
+
+ input in_2_en;
+ input [ BANK_ADDR_W + OP_ADDR_W -1:0] in_2_addr;
+ output [ WORD_W -1:0] in_2_dout;
+
+ input out_en;
+ input out_we;
+ input [ BANK_ADDR_W + OP_ADDR_W -1:0] out_addr;
+ input [ WORD_W -1:0] out_din;
+
+
//
// Internal Registers
//
reg rd_wide_xy_reg_ena = 1'b0;
reg rd_wide_xy_reg_ena_aux = 1'b0;
reg rd_narrow_xy_reg_ena = 1'b0;
+ reg in_1_reg_en = 1'b0;
+ reg in_2_reg_en = 1'b0;
always @(posedge clk)
//
@@ -85,10 +128,14 @@ module modexpng_storage_block
rd_wide_xy_reg_ena <= 1'b0;
rd_wide_xy_reg_ena_aux <= 1'b0;
rd_narrow_xy_reg_ena <= 1'b0;
+ in_1_reg_en <= 1'b0;
+ in_2_reg_en <= 1'b0;
end else begin
rd_wide_xy_reg_ena <= rd_wide_xy_ena;
rd_wide_xy_reg_ena_aux <= rd_wide_xy_ena_aux;
rd_narrow_xy_reg_ena <= rd_narrow_xy_ena;
+ in_1_reg_en <= in_1_en;
+ in_2_reg_en <= in_2_en;
end
//
@@ -213,4 +260,87 @@ module modexpng_storage_block
.doutb (rd_narrow_y_dout)
);
+ //
+ // INPUT, OUTPUT Storage Buffers
+ //
+ wire [ 2 -1:0] bus_addr_msb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W +: 2];
+ wire [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] bus_addr_lsb = bus_addr[BANK_ADDR_W + BUS_OP_ADDR_W -1:0];
+ reg [ 2 -1:0] bus_addr_msb_dly;
+ wire [ BUS_DATA_W -1:0] bus_data_rd_input_1;
+ wire [ BUS_DATA_W -1:0] bus_data_rd_output;
+
+ wire bus_data_wr_input_1 = bus_data_wr && (bus_addr_msb == 2'd0);
+ wire bus_data_wr_input_2 = bus_data_wr && (bus_addr_msb == 2'd1);
+
+ /* INPUT_1 */
+ modexpng_sdp_36k_x16_x32_wrapper bram_input_1
+ (
+ .clk (clk), // core clock
+ .clk_bus (clk_bus), // bus clock
+
+ .ena (bus_cs), // bus side read-write
+ .wea (bus_data_wr_input_1), //
+ .addra (bus_addr_lsb), //
+ .dina (bus_data_wr), //
+ .douta (bus_data_rd_input_1), //
+
+ .enb (in_1_en), // core side read-only
+ .regceb (in_1_reg_en), //
+ .addrb (in_1_addr), //
+ .doutb (in_1_dout) //
+ );
+
+
+ /* INPUT_2 */
+ modexpng_sdp_36k_x16_x32_wrapper bram_input_2
+ (
+ .clk (clk), // core clock
+ .clk_bus (clk_bus), // bus clock
+
+ .ena (bus_cs), // bus side write-only
+ .wea (bus_data_wr_input_2), //
+ .addra (bus_addr_lsb), //
+ .dina (bus_data_wr), //
+
+ .enb (in_2_en), // core side read-only
+ .regceb (in_2_reg_en), //
+ .addrb (in_2_addr), //
+ .doutb (in_2_dout) //
+ );
+
+
+ /* OUTPUT */
+ modexpng_sdp_36k_x32_x16_wrapper bram_output
+ (
+ .clk (clk), // core clock
+ .clk_bus (clk_bus), // bus clock
+
+ .ena (out_en), // core side write-only
+ .wea (out_we), //
+ .addra (out_addr), //
+ .dina (out_din), //
+
+ .enb (bus_cs), // bus side read-only
+ .addrb (bus_addr_lsb), //
+ .doutb (bus_data_rd_output) //
+ );
+
+ reg [31: 0] bus_data_rd_mux;
+ assign bus_data_rd = bus_data_rd_mux;
+
+ always @(posedge clk_bus)
+ bus_addr_msb_dly <= bus_addr_msb;
+
+ always @(*)
+ //
+ case (bus_addr_msb_dly)
+ //
+ 2'd0: bus_data_rd_mux = bus_data_rd_input_1;
+ 2'd1: bus_data_rd_mux = 32'hDEADC0DE;
+ 2'd2: bus_data_rd_mux = bus_data_rd_output;
+ 2'd3: bus_data_rd_mux = 32'hDEADC0DE;
+ //
+ endcase
+
endmodule
+
diff --git a/rtl/modexpng_tdp_36k_x16_x32_wrapper.v b/rtl/modexpng_tdp_36k_x16_x32_wrapper.v
new file mode 100644
index 0000000..37a5cbc
--- /dev/null
+++ b/rtl/modexpng_tdp_36k_x16_x32_wrapper.v
@@ -0,0 +1,88 @@
+module modexpng_sdp_36k_x16_x32_wrapper
+(
+ clk, clk_bus,
+
+ ena, wea,
+ addra, dina, douta,
+
+ enb, regceb,
+ addrb, doutb
+);
+
+
+ //
+ // Headers
+ //
+ `include "modexpng_parameters.vh"
+
+
+ //
+ // Ports
+ //
+ input clk;
+ input clk_bus;
+
+ input ena;
+ input wea;
+ input [BANK_ADDR_W + BUS_OP_ADDR_W -1:0] addra;
+ input [ BUD_DATA_W -1:0] dina;
+ output [ BUD_DATA_W -1:0] douta;
+
+ input enb;
+ input regceb;
+ input [BANK_ADDR_W + OP_ADDR_W -1:0] addrb;
+ output [ WORD_W -1:0] doutb;
+
+
+ //
+ // BRAM_TDP_MACRO
+ //
+ BRAM_TDP_MACRO #
+ (
+ .DEVICE ("7SERIES"),
+ .BRAM_SIZE ("36Kb"),
+
+ .WRITE_WIDTH_A (BUD_DATA_W),
+ .READ_WIDTH_A (BUD_DATA_W),
+
+ .WRITE_WIDTH_B (WORD_W),
+ .READ_WIDTH_B (WORD_W),
+
+ .DOA_REG (0),
+ .DOB_REG (1),
+
+ .WRITE_MODE_A ("READ_FIRST"),
+ .WRITE_MODE_B ("READ_FIRST"),
+
+ .SRVAL_A (36'h000000000),
+ .SRVAL_B (36'h000000000),
+
+ .INIT_A (36'h000000000),
+ .INIT_B (36'h000000000),
+
+ .INIT_FILE ("NONE"),
+ .SIM_COLLISION_CHECK ("NONE"),
+ )
+ BRAM_TDP_MACRO_inst
+ (
+ .RSTA (1'b0),
+ .RSTB (1'b0),
+
+ .CLKA (clk_bus),
+ .ENA (ena),
+ .REGCEA (1'b0),
+ .WEA ({4{wea}}),
+ .ADDRA (),
+ .DIA (),
+ .DOA (),
+
+ .CLKB (clk),
+ .ENB (enb),
+ .REGCEB (regceb),
+ .WEB ({2{1'b0}}),
+ .ADDRB (addrb),
+ .DIB ({WORD_W{1'b0}}),
+ .DOB (doutb)
+ );
+
+endmodule