diff options
Diffstat (limited to 'rtl')
-rw-r--r-- | rtl/modexpng_recombinator_block.v | 1244 | ||||
-rw-r--r-- | rtl/modexpng_recombinator_cell.v | 94 |
2 files changed, 676 insertions, 662 deletions
diff --git a/rtl/modexpng_recombinator_block.v b/rtl/modexpng_recombinator_block.v index 077ae47..f6e23e5 100644 --- a/rtl/modexpng_recombinator_block.v +++ b/rtl/modexpng_recombinator_block.v @@ -53,58 +53,58 @@ module modexpng_recombinator_block `include "modexpng_mmm_dual_fsm.vh" - input clk; - input rst_n; - input ena; - output rdy; - input [MMM_FSM_STATE_W-1:0] fsm_state_next; - input [7:0] word_index_last; - input dsp_xy_ce_p; - input [9*47-1:0] dsp_x_p; - input [9*47-1:0] dsp_y_p; - input [ 4:0] col_index; - input [ 4:0] col_index_last; + input clk; + input rst_n; + input ena; + output rdy; + input [MMM_FSM_STATE_W -1:0] fsm_state_next; + input [ OP_ADDR_W -1:0] word_index_last; + input dsp_xy_ce_p; + input [ MAC_W * NUM_MULTS_AUX -1:0] dsp_x_p; + input [ MAC_W * NUM_MULTS_AUX -1:0] dsp_y_p; + input [ COL_INDEX_W -1:0] col_index; + input [ COL_INDEX_W -1:0] col_index_last; - input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; - input [ 7:0] rd_narrow_xy_addr; + input [ BANK_ADDR_W -1:0] rd_narrow_xy_bank; + input [ OP_ADDR_W -1:0] rd_narrow_xy_addr; - output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; - output [ 7:0] rcmb_wide_xy_addr; - output [ 17:0] rcmb_wide_x_dout; - output [ 17:0] rcmb_wide_y_dout; - output rcmb_wide_xy_valid; + output [ BANK_ADDR_W -1:0] rcmb_wide_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_wide_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_wide_x_dout; + output [ WORD_EXT_W -1:0] rcmb_wide_y_dout; + output rcmb_wide_xy_valid; - output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; - output [ 7:0] rcmb_narrow_xy_addr; - output [ 17:0] rcmb_narrow_x_dout; - output [ 17:0] rcmb_narrow_y_dout; - output rcmb_narrow_xy_valid; + output [ BANK_ADDR_W -1:0] rcmb_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rcmb_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rcmb_narrow_x_dout; + output [ WORD_EXT_W -1:0] rcmb_narrow_y_dout; + output rcmb_narrow_xy_valid; - output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank; - output [ 7:0] rdct_narrow_xy_addr; - output [ 17:0] rdct_narrow_x_dout; - output [ 17:0] rdct_narrow_y_dout; - output rdct_narrow_xy_valid; + output [ BANK_ADDR_W -1:0] rdct_narrow_xy_bank; + output [ OP_ADDR_W -1:0] rdct_narrow_xy_addr; + output [ WORD_EXT_W -1:0] rdct_narrow_x_dout; + output [ WORD_EXT_W -1:0] rdct_narrow_y_dout; + output rdct_narrow_xy_valid; // // Latches // - reg [1*47-1:0] dsp_x_p_latch[0:8]; - reg [1*47-1:0] dsp_y_p_latch[0:8]; + reg [MAC_W-1:0] dsp_x_p_latch[0:NUM_MULTS_AUX-1]; + reg [MAC_W-1:0] dsp_y_p_latch[0:NUM_MULTS_AUX-1]; // // Mapping // - wire [46:0] dsp_x_p_split[0:8]; - wire [46:0] dsp_y_p_split[0:8]; + wire [MAC_W-1:0] dsp_x_p_split[0:NUM_MULTS_AUX-1]; + wire [MAC_W-1:0] dsp_y_p_split[0:NUM_MULTS_AUX-1]; genvar z; - generate for (z=0; z<(NUM_MULTS+1); z=z+1) + generate for (z=0; z<NUM_MULTS_AUX; z=z+1) begin : gen_dsp_xy_p_split - assign dsp_x_p_split[z] = dsp_x_p[47*z+:47]; - assign dsp_y_p_split[z] = dsp_y_p[47*z+:47]; + assign dsp_x_p_split[z] = dsp_x_p[z*MAC_W +: MAC_W]; + assign dsp_y_p_split[z] = dsp_y_p[z*MAC_W +: MAC_W]; end endgenerate @@ -125,211 +125,200 @@ module modexpng_recombinator_block // // valid - reg xy_valid_lsb = 1'b0; - reg xy_aux_lsb = 1'b0; - reg xy_valid_msb = 1'b0; + reg xy_valid_lsb = 1'b0; + reg xy_aux_lsb = 1'b0; + reg xy_valid_msb = 1'b0; // bitmap - reg [7:0] xy_bitmap_lsb = {8{1'b0}}; - reg [7:0] xy_bitmap_msb = {8{1'b0}}; + reg [NUM_MULTS-1:0] xy_bitmap_lsb = {NUM_MULTS{1'b0}}; + reg [NUM_MULTS-1:0] xy_bitmap_msb = {NUM_MULTS{1'b0}}; // index - reg [2:0] xy_index_lsb = 3'dX; + reg [MAC_INDEX_W-1:0] xy_index_lsb; // purge - reg xy_purge_lsb = 1'b0; - reg xy_purge_msb = 1'b0; + reg xy_purge_lsb = 1'b0; + reg xy_purge_msb = 1'b0; // valid - latch - reg xy_valid_latch_lsb = 1'b0; + reg xy_valid_latch_lsb = 1'b0; // aux - latch - reg xy_aux_latch_lsb = 1'b0; + reg xy_aux_latch_lsb = 1'b0; // bitmap - latch - reg [7:0] xy_bitmap_latch_lsb = {8{1'b0}}; - reg [7:0] xy_bitmap_latch_msb = {8{1'b0}}; + reg [NUM_MULTS-1:0] xy_bitmap_latch_lsb = {NUM_MULTS{1'b0}}; + reg [NUM_MULTS-1:0] xy_bitmap_latch_msb = {NUM_MULTS{1'b0}}; // index - latch - reg [2:0] xy_index_latch_lsb = 3'dX; + reg [MAC_INDEX_W-1:0] xy_index_latch_lsb; // purge - index reg xy_purge_latch_lsb = 1'b0; reg xy_purge_latch_msb = 1'b0; - // - reg xy_valid_lsb_adv[1:6]; - reg xy_valid_msb_adv[1:6]; - reg xy_aux_lsb_adv[1:6]; - reg [7:0] xy_bitmap_lsb_adv[1:6]; - reg [7:0] xy_bitmap_msb_adv[1:6]; - reg [2:0] xy_index_lsb_adv[1:6]; - reg [2:0] xy_index_msb_adv[1:6]; - reg xy_purge_lsb_adv[1:6]; - reg xy_purge_msb_adv[1:6]; + + // + // Anticipatory Values + // + reg xy_valid_lsb_adv [1:6]; + reg xy_valid_msb_adv [1:6]; + reg xy_aux_lsb_adv [1:6]; + reg [NUM_MULTS -1:0] xy_bitmap_lsb_adv[1:6]; + reg [NUM_MULTS -1:0] xy_bitmap_msb_adv[1:6]; + reg [MAC_INDEX_W -1:0] xy_index_lsb_adv [1:6]; + reg [MAC_INDEX_W -1:0] xy_index_msb_adv [1:6]; + reg xy_purge_lsb_adv [1:6]; + reg xy_purge_msb_adv [1:6]; - reg [1:0] rcmb_mode; - + + // + // Recombination Mode + // + localparam [1:0] RCMB_MODE_UNUSED = 2'd0; + localparam [1:0] RCMB_MODE_SQUARE = 2'd1; + localparam [1:0] RCMB_MODE_TRIANGLE = 2'd2; + localparam [1:0] RCMB_MODE_RECTANGLE = 2'd3; + + reg [1:0] rcmb_mode = RCMB_MODE_UNUSED; + always @(posedge clk) // if (ena) // case (fsm_state_next) - MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= 2'd1; - MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= 2'd2; - MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= 2'd3; - default: rcmb_mode <= 2'd0; + MMM_FSM_STATE_MULT_SQUARE_COL_0_BUSY: rcmb_mode <= RCMB_MODE_SQUARE; + MMM_FSM_STATE_MULT_TRIANGLE_COL_0_BUSY: rcmb_mode <= RCMB_MODE_TRIANGLE; + MMM_FSM_STATE_MULT_RECTANGLE_COL_0_BUSY: rcmb_mode <= RCMB_MODE_RECTANGLE; + default: rcmb_mode <= RCMB_MODE_UNUSED; endcase - + + // + // Initialization + // integer i; - initial for (i=1; i<6; i=i+1) begin - xy_valid_lsb_adv[i] = 1'b0; - xy_valid_msb_adv[i] = 1'b0; - xy_aux_lsb_adv[i] = 1'b0; - xy_bitmap_lsb_adv[i] = {8{1'b0}}; - xy_bitmap_msb_adv[i] = {8{1'b0}}; - xy_index_lsb_adv[i] = 3'dX; - xy_index_msb_adv[i] = 3'dX; - xy_purge_lsb_adv[i] = 1'b0; - xy_purge_msb_adv[i] = 1'b0; + initial for (i=1; i<=6; i=i+1) begin + xy_valid_lsb_adv [i] = 1'b0; + xy_valid_msb_adv [i] = 1'b0; + xy_aux_lsb_adv [i] = 1'b0; + xy_bitmap_lsb_adv[i] = {NUM_MULTS{1'b0}}; + xy_bitmap_msb_adv[i] = {NUM_MULTS{1'b0}}; + xy_index_lsb_adv [i] = {MAC_INDEX_W{1'bX}}; + xy_index_msb_adv [i] = {MAC_INDEX_W{1'bX}}; + xy_purge_lsb_adv [i] = 1'b0; + xy_purge_msb_adv [i] = 1'b0; end - function calc_square_triangle_valid_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_square_triangle_valid_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - // - if (narrow_xy_addr_value[7:3] == col_index_value) - calc_square_triangle_valid_lsb = 1'b1; - else - calc_square_triangle_valid_lsb = 1'b0; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + // + if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_square_triangle_valid_lsb = 1'b1; + else calc_square_triangle_valid_lsb = 1'b0; + // endfunction - function calc_square_valid_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_square_valid_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + calc_square_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); endfunction - function calc_triangle_valid_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_triangle_valid_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + calc_triangle_valid_lsb = calc_square_triangle_valid_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); endfunction - function calc_rectangle_valid_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_rectangle_valid_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - // - if (narrow_xy_addr_value[7:3] == col_index_value) - calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT; - else - calc_rectangle_valid_lsb = 1'b0; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + // + if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_rectangle_valid_lsb = narrow_xy_bank_value != BANK_NARROW_EXT; + else calc_rectangle_valid_lsb = 1'b0; + // endfunction - function calc_triangle_aux_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_triangle_aux_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - // - if (narrow_xy_bank_value == BANK_NARROW_EXT) - calc_triangle_aux_lsb = 1'b1; - else - calc_triangle_aux_lsb = 1'b0; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + // + if (narrow_xy_bank_value == BANK_NARROW_EXT) calc_triangle_aux_lsb = 1'b1; + else calc_triangle_aux_lsb = 1'b0; + // endfunction - function [7:0] calc_square_triangle_bitmap_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + // + // TODO: This will need some generic replacement defined in modexpng_parameters.vh + // in case anything different from NUM_MULTS = 8 is used. + // + function [ NUM_MULTS -1:0] calc_square_triangle_bitmap_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - // - if (narrow_xy_addr_value[7:3] == col_index_value) - // - case (narrow_xy_addr_value[2:0]) - 3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001; - 3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010; - 3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100; - 3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000; - 3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000; - 3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000; - 3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000; - 3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000; - endcase - // - else - calc_square_triangle_bitmap_lsb = {8{1'b0}}; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + // + if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) + case (narrow_xy_addr_value[MAC_INDEX_W-1:0]) + 3'b000: calc_square_triangle_bitmap_lsb = 8'b00000001; + 3'b001: calc_square_triangle_bitmap_lsb = 8'b00000010; + 3'b010: calc_square_triangle_bitmap_lsb = 8'b00000100; + 3'b011: calc_square_triangle_bitmap_lsb = 8'b00001000; + 3'b100: calc_square_triangle_bitmap_lsb = 8'b00010000; + 3'b101: calc_square_triangle_bitmap_lsb = 8'b00100000; + 3'b110: calc_square_triangle_bitmap_lsb = 8'b01000000; + 3'b111: calc_square_triangle_bitmap_lsb = 8'b10000000; + endcase + else calc_square_triangle_bitmap_lsb = 8'b00000000; + // endfunction - function [7:0] calc_square_bitmap_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function [ NUM_MULTS -1:0] calc_square_bitmap_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + calc_square_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); endfunction - function [7:0] calc_triangle_bitmap_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function [ NUM_MULTS -1:0] calc_triangle_bitmap_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + calc_triangle_bitmap_lsb = calc_square_triangle_bitmap_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); endfunction - function [7:0] calc_rectangle_bitmap_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; - input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - // - if ((narrow_xy_addr_value[7:3] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT)) - // - case (narrow_xy_addr_value[2:0]) - 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001; - 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010; - 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100; - 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000; - 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000; - 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000; - 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000; - 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000; - endcase - // - else - calc_rectangle_bitmap_lsb = {8{1'b0}}; - // - end + function [ NUM_MULTS -1:0] calc_rectangle_bitmap_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; + input [BANK_ADDR_W -1:0] narrow_xy_bank_value; + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + // + if ((narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) && (narrow_xy_bank_value != BANK_NARROW_EXT)) + case (narrow_xy_addr_value[MAC_INDEX_W-1:0]) + 3'b000: calc_rectangle_bitmap_lsb = 8'b00000001; + 3'b001: calc_rectangle_bitmap_lsb = 8'b00000010; + 3'b010: calc_rectangle_bitmap_lsb = 8'b00000100; + 3'b011: calc_rectangle_bitmap_lsb = 8'b00001000; + 3'b100: calc_rectangle_bitmap_lsb = 8'b00010000; + 3'b101: calc_rectangle_bitmap_lsb = 8'b00100000; + 3'b110: calc_rectangle_bitmap_lsb = 8'b01000000; + 3'b111: calc_rectangle_bitmap_lsb = 8'b10000000; + endcase + else calc_rectangle_bitmap_lsb = 8'b00000000; + // endfunction /* @@ -337,269 +326,243 @@ module modexpng_recombinator_block * rectangle is that the bank is checked or not). A universal function would * accept a parameter that tells it whether it should check the bank or not. * Let's do it later, too early to optimize now, it seems. - * - * */ - function [2:0] calc_square_triangle_index_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function [MAC_INDEX_W -1:0] calc_square_triangle_index_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - // - if (narrow_xy_addr_value[7:3] == col_index_value) - // - case (narrow_xy_addr_value[2:0]) - 3'b000: calc_square_triangle_index_lsb = 3'd0; - 3'b001: calc_square_triangle_index_lsb = 3'd1; - 3'b010: calc_square_triangle_index_lsb = 3'd2; - 3'b011: calc_square_triangle_index_lsb = 3'd3; - 3'b100: calc_square_triangle_index_lsb = 3'd4; - 3'b101: calc_square_triangle_index_lsb = 3'd5; - 3'b110: calc_square_triangle_index_lsb = 3'd6; - 3'b111: calc_square_triangle_index_lsb = 3'd7; - endcase - // - else - calc_square_triangle_index_lsb = 3'dX; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + // + if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) + case (narrow_xy_addr_value[MAC_INDEX_W-1:0]) + 3'b000: calc_square_triangle_index_lsb = 3'd0; + 3'b001: calc_square_triangle_index_lsb = 3'd1; + 3'b010: calc_square_triangle_index_lsb = 3'd2; + 3'b011: calc_square_triangle_index_lsb = 3'd3; + 3'b100: calc_square_triangle_index_lsb = 3'd4; + 3'b101: calc_square_triangle_index_lsb = 3'd5; + 3'b110: calc_square_triangle_index_lsb = 3'd6; + 3'b111: calc_square_triangle_index_lsb = 3'd7; + endcase + else calc_square_triangle_index_lsb = 3'dX; + // endfunction - function [2:0] calc_square_index_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function [MAC_INDEX_W -1:0] calc_square_index_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + calc_square_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); endfunction - function [2:0] calc_triangle_index_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function [MAC_INDEX_W -1:0] calc_triangle_index_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + calc_triangle_index_lsb = calc_square_triangle_index_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); endfunction - function [2:0] calc_rectangle_index_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function [MAC_INDEX_W -1:0] calc_rectangle_index_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] slim_bram_xy_bank_value; - input [7:0] slim_bram_xy_addr_value; - begin - // - if ((slim_bram_xy_addr_value[7:3] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT)) - // - case (slim_bram_xy_addr_value[2:0]) - 3'b000: calc_rectangle_index_lsb = 3'd0; - 3'b001: calc_rectangle_index_lsb = 3'd1; - 3'b010: calc_rectangle_index_lsb = 3'd2; - 3'b011: calc_rectangle_index_lsb = 3'd3; - 3'b100: calc_rectangle_index_lsb = 3'd4; - 3'b101: calc_rectangle_index_lsb = 3'd5; - 3'b110: calc_rectangle_index_lsb = 3'd6; - 3'b111: calc_rectangle_index_lsb = 3'd7; - endcase - // - else - calc_rectangle_index_lsb = 3'dX; - // - end + input [ OP_ADDR_W -1:0] slim_bram_xy_addr_value; + // + if ((slim_bram_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) && (slim_bram_xy_bank_value != BANK_NARROW_EXT)) + case (slim_bram_xy_addr_value[MAC_INDEX_W-1:0]) + 3'b000: calc_rectangle_index_lsb = 3'd0; + 3'b001: calc_rectangle_index_lsb = 3'd1; + 3'b010: calc_rectangle_index_lsb = 3'd2; + 3'b011: calc_rectangle_index_lsb = 3'd3; + 3'b100: calc_rectangle_index_lsb = 3'd4; + 3'b101: calc_rectangle_index_lsb = 3'd5; + 3'b110: calc_rectangle_index_lsb = 3'd6; + 3'b111: calc_rectangle_index_lsb = 3'd7; + endcase + else calc_rectangle_index_lsb = 3'dX; + // endfunction - function calc_square_rectangle_purge_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_square_rectangle_purge_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - // - if (narrow_xy_addr_value[7:3] == col_index_value) - calc_square_rectangle_purge_lsb = narrow_xy_addr_value[7:3] == col_index_last_value; - else - calc_square_rectangle_purge_lsb = 1'b0; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + // + if (narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_value) calc_square_rectangle_purge_lsb = narrow_xy_addr_value[OP_ADDR_W-1:MAC_INDEX_W] == col_index_last_value; + else calc_square_rectangle_purge_lsb = 1'b0; + // endfunction - function calc_square_purge_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_square_purge_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + calc_square_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); endfunction - function calc_rectangle_purge_lsb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_rectangle_purge_lsb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - begin - calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + calc_rectangle_purge_lsb = calc_square_rectangle_purge_lsb(col_index_value, col_index_last_value, narrow_xy_bank_value, narrow_xy_addr_value); endfunction - function calc_square_valid_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_square_valid_msb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - input [7:0] index_last_value; - begin - // - if (narrow_xy_addr_value == index_last_value) - calc_square_valid_msb = 1'b1; - else - calc_square_valid_msb = 1'b0; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + input [ OP_ADDR_W -1:0] index_last_value; + // + if (narrow_xy_addr_value == index_last_value) calc_square_valid_msb = 1'b1; + else calc_square_valid_msb = 1'b0; + // endfunction - function calc_rectangle_valid_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_rectangle_valid_msb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - input [7:0] index_last_value; - begin - // - if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) - calc_rectangle_valid_msb = 1'b1; - else - calc_rectangle_valid_msb = 1'b0; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + input [ OP_ADDR_W -1:0] index_last_value; + // + if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_valid_msb = 1'b1; + else calc_rectangle_valid_msb = 1'b0; + // endfunction - function [7:0] calc_square_bitmap_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function [ NUM_MULTS -1:0] calc_square_bitmap_msb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - input [7:0] index_last_value; - begin - // - if (narrow_xy_addr_value == index_last_value) begin - calc_square_bitmap_msb[7] = col_index_value != col_index_last_value; - calc_square_bitmap_msb[6:0] = 7'b1111111; - end else - calc_square_bitmap_msb[7:0] = 8'b00000000; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + input [ OP_ADDR_W -1:0] index_last_value; + // + if (narrow_xy_addr_value == index_last_value) calc_square_bitmap_msb = {col_index_value != col_index_last_value, 7'b1111111}; + else calc_square_bitmap_msb = 8'b00000000; + // endfunction - function [7:0] calc_rectangle_bitmap_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function [ NUM_MULTS -1:0] calc_rectangle_bitmap_msb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - input [7:0] index_last_value; - begin - // - if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) begin - calc_rectangle_bitmap_msb[7:0] = 8'b11111111; - end else - calc_rectangle_bitmap_msb[7:0] = 8'b00000000; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + input [ OP_ADDR_W -1:0] index_last_value; + // + if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_bitmap_msb = 8'b11111111; + else calc_rectangle_bitmap_msb = 8'b00000000; + // endfunction - function calc_square_purge_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_square_purge_msb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - input [7:0] index_last_value; - begin - // - if (narrow_xy_addr_value == index_last_value) - calc_square_purge_msb = col_index_value == col_index_last_value; - else - calc_square_purge_msb = 1'b0; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + input [ OP_ADDR_W -1:0] index_last_value; + // + if (narrow_xy_addr_value == index_last_value) calc_square_purge_msb = col_index_value == col_index_last_value; + else calc_square_purge_msb = 1'b0; + // endfunction - function calc_rectangle_purge_msb; - input [4:0] col_index_value; - input [4:0] col_index_last_value; + function calc_rectangle_purge_msb; + input [COL_INDEX_W -1:0] col_index_value; + input [COL_INDEX_W -1:0] col_index_last_value; input [BANK_ADDR_W -1:0] narrow_xy_bank_value; - input [7:0] narrow_xy_addr_value; - input [7:0] index_last_value; - begin - // - if ((narrow_xy_addr_value == 8'd1) && (narrow_xy_bank_value == BANK_NARROW_EXT)) - calc_rectangle_purge_msb = col_index_value == col_index_last_value; - else - calc_rectangle_purge_msb = 1'b0; - // - end + input [ OP_ADDR_W -1:0] narrow_xy_addr_value; + input [ OP_ADDR_W -1:0] index_last_value; + // + if ((narrow_xy_addr_value == OP_ADDR_ONE) && (narrow_xy_bank_value == BANK_NARROW_EXT)) calc_rectangle_purge_msb = col_index_value == col_index_last_value; + else calc_rectangle_purge_msb = 1'b0; + // endfunction + + // + // Recombinator Cell Instances + // + reg [WORD_W -1:0] rcmb_x_msb_carry_0; + reg [WORD_W -1:0] rcmb_y_msb_carry_0; + reg [WORD_W -1:0] rcmb_x_msb_carry_1; + reg [WORD_W -1:0] rcmb_y_msb_carry_1; reg rcmb_xy_lsb_ce = 1'b0; - reg rcmb_xy_lsb_ce_aux; + reg rcmb_xy_lsb_ce_aux = 1'b0; reg [ 2:0] rcmb_xy_lsb_ce_purge = 3'b000; wire rcmb_xy_lsb_ce_combined = rcmb_xy_lsb_ce | rcmb_xy_lsb_ce_aux | rcmb_xy_lsb_ce_purge[0]; reg rcmb_xy_lsb_clr; + wire rcmb_xy_lsb_cry = !rcmb_xy_lsb_ce_purge[2] && (rcmb_xy_lsb_ce_purge[1] || rcmb_xy_lsb_ce_purge[0]); - reg [46:0] rcmb_x_lsb_din; - reg [46:0] rcmb_y_lsb_din; - wire [15:0] rcmb_x_lsb_dout; - wire [15:0] rcmb_y_lsb_dout; + reg [ MAC_W -1:0] rcmb_x_lsb_din; + reg [ MAC_W -1:0] rcmb_y_lsb_din; + wire [WORD_W -1:0] rcmb_x_lsb_dout; + wire [WORD_W -1:0] rcmb_y_lsb_dout; + wire [WORD_EXT_W -2:0] rcmb_x_lsb_dout_ext; + wire [WORD_EXT_W -2:0] rcmb_y_lsb_dout_ext; reg rcmb_xy_msb_ce = 1'b0; reg [ 1:0] rcmb_xy_msb_ce_purge = 2'b00; wire rcmb_xy_msb_ce_combined = rcmb_xy_msb_ce | rcmb_xy_msb_ce_purge[0]; reg rcmb_xy_msb_clr; - reg [46:0] rcmb_x_msb_din; - reg [46:0] rcmb_y_msb_din; - wire [15:0] rcmb_x_msb_dout; - wire [15:0] rcmb_y_msb_dout; + reg [ MAC_W -1:0] rcmb_x_msb_din; + reg [ MAC_W -1:0] rcmb_y_msb_din; + wire [WORD_W -1:0] rcmb_x_msb_dout; + wire [WORD_W -1:0] rcmb_y_msb_dout; modexpng_recombinator_cell recomb_x_lsb ( - .clk (clk), - .ce (rcmb_xy_lsb_ce_combined), - .clr (rcmb_xy_lsb_clr), - .din (rcmb_x_lsb_din), - .dout (rcmb_x_lsb_dout) + .clk (clk), + .ce (rcmb_xy_lsb_ce_combined), + .clr (rcmb_xy_lsb_clr), + .cry (rcmb_xy_lsb_cry), + .cin (rcmb_x_msb_carry_1), + .din (rcmb_x_lsb_din), + .dout (rcmb_x_lsb_dout), + .dout_ext (rcmb_x_lsb_dout_ext) ); modexpng_recombinator_cell recomb_y_lsb ( - .clk (clk), - .ce (rcmb_xy_lsb_ce_combined), - .clr (rcmb_xy_lsb_clr), - .din (rcmb_y_lsb_din), - .dout (rcmb_y_lsb_dout) + .clk (clk), + .ce (rcmb_xy_lsb_ce_combined), + .clr (rcmb_xy_lsb_clr), + .cry (rcmb_xy_lsb_cry), + .cin (rcmb_y_msb_carry_1), + .din (rcmb_y_lsb_din), + .dout (rcmb_y_lsb_dout), + .dout_ext (rcmb_y_lsb_dout_ext) ); modexpng_recombinator_cell recomb_x_msb ( - .clk (clk), - .ce (rcmb_xy_msb_ce_combined), - .clr (rcmb_xy_msb_clr), - .din (rcmb_x_msb_din), - .dout (rcmb_x_msb_dout) + .clk (clk), + .ce (rcmb_xy_msb_ce_combined), + .clr (rcmb_xy_msb_clr), + .cry (1'b0), + .cin (WORD_ZERO), + .din (rcmb_x_msb_din), + .dout (rcmb_x_msb_dout), + .dout_ext () ); modexpng_recombinator_cell recomb_y_msb ( - .clk (clk), - .ce (rcmb_xy_msb_ce_combined), - .clr (rcmb_xy_msb_clr), - .din (rcmb_y_msb_din), - .dout (rcmb_y_msb_dout) + .clk (clk), + .ce (rcmb_xy_msb_ce_combined), + .clr (rcmb_xy_msb_clr), + .cry (1'b0), + .cin (WORD_ZERO), + .din (rcmb_y_msb_din), + .dout (rcmb_y_msb_dout), + .dout_ext () ); always @(posedge clk) begin @@ -608,19 +571,14 @@ module modexpng_recombinator_block rcmb_xy_lsb_ce_aux <= xy_aux_latch_lsb; rcmb_xy_msb_ce <= xy_bitmap_latch_msb[0]; // - if (xy_purge_latch_lsb) - rcmb_xy_lsb_ce_purge <= 3'b111; - else - rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]}; + if (xy_purge_latch_lsb) rcmb_xy_lsb_ce_purge <= 3'b111; + else rcmb_xy_lsb_ce_purge <= {1'b0, rcmb_xy_lsb_ce_purge[2:1]}; // - if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1]) - rcmb_xy_msb_ce_purge <= 2'b11; - else - rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]}; + if (xy_purge_latch_msb && xy_bitmap_latch_msb[0] && !xy_bitmap_latch_msb[1]) rcmb_xy_msb_ce_purge <= 2'b11; + else rcmb_xy_msb_ce_purge <= {1'b0, rcmb_xy_msb_ce_purge[1]}; // end - always @(posedge clk) // if (ena) begin @@ -637,11 +595,11 @@ module modexpng_recombinator_block rcmb_x_lsb_din <= dsp_x_p_latch[xy_index_latch_lsb]; rcmb_y_lsb_din <= dsp_y_p_latch[xy_index_latch_lsb]; end else if (xy_aux_latch_lsb) begin - rcmb_x_lsb_din <= dsp_x_p_latch[8]; - rcmb_y_lsb_din <= dsp_y_p_latch[8]; + rcmb_x_lsb_din <= dsp_x_p_latch[NUM_MULTS_AUX-1]; + rcmb_y_lsb_din <= dsp_y_p_latch[NUM_MULTS_AUX-1]; end else begin - rcmb_x_lsb_din <= {47{1'b0}}; - rcmb_y_lsb_din <= {47{1'b0}}; + rcmb_x_lsb_din <= {MAC_W{1'b0}}; + rcmb_y_lsb_din <= {MAC_W{1'b0}}; end always @(posedge clk) @@ -650,8 +608,8 @@ module modexpng_recombinator_block rcmb_x_msb_din <= dsp_x_p_latch[0]; rcmb_y_msb_din <= dsp_y_p_latch[0]; end else begin - rcmb_x_msb_din <= {47{1'b0}}; - rcmb_y_msb_din <= {47{1'b0}}; + rcmb_x_msb_din <= {MAC_W{1'b0}}; + rcmb_y_msb_din <= {MAC_W{1'b0}}; end @@ -688,7 +646,7 @@ module modexpng_recombinator_block xy_purge_lsb_adv [6] <= 1'b0; // xy_valid_msb_adv [6] <= 1'b0; - xy_bitmap_msb_adv[6] <= {8{1'b0}}; + xy_bitmap_msb_adv[6] <= {NUM_MULTS{1'b0}}; xy_purge_msb_adv [6] <= 1'b0; // end @@ -714,12 +672,12 @@ module modexpng_recombinator_block // xy_valid_lsb_adv [6] <= 1'b0; xy_aux_lsb_adv [6] <= 1'b0; - xy_bitmap_lsb_adv[6] <= {8{1'b0}}; - xy_index_lsb_adv [6] <= 3'dX; + xy_bitmap_lsb_adv[6] <= {NUM_MULTS{1'b0}}; + xy_index_lsb_adv [6] <= {MAC_INDEX_W{1'bX}}; xy_purge_lsb_adv [6] <= 1'b0; // xy_valid_msb_adv [6] <= 1'b0; - xy_bitmap_msb_adv[6] <= {8{1'b0}}; + xy_bitmap_msb_adv[6] <= {NUM_MULTS{1'b0}}; xy_purge_msb_adv [6] <= 1'b0; // end @@ -749,7 +707,7 @@ module modexpng_recombinator_block xy_bitmap_latch_msb <= xy_bitmap_msb; xy_purge_latch_msb <= xy_purge_msb; end else begin - xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[7:1]}; + xy_bitmap_latch_msb <= {1'b0, xy_bitmap_latch_msb[NUM_MULTS-1:1]}; end // // @@ -771,19 +729,19 @@ module modexpng_recombinator_block // if (xy_bitmap_latch_msb[1]) // only shift 7 times // - for (i=0; i<8; i=i+1) + for (i=0; i<NUM_MULTS; i=i+1) // - if (i < 7) begin + if (i < (NUM_MULTS-1)) begin dsp_x_p_latch[i] <= dsp_x_p_latch[i+1]; dsp_y_p_latch[i] <= dsp_y_p_latch[i+1]; end else begin - dsp_x_p_latch[i] <= {47{1'bX}}; - dsp_y_p_latch[i] <= {47{1'bX}}; + dsp_x_p_latch[i] <= {MAC_W{1'bX}}; + dsp_y_p_latch[i] <= {MAC_W{1'bX}}; end // else if (dsp_xy_ce_p_dly1) begin // - for (i=0; i<8; i=i+1) + for (i=0; i<NUM_MULTS; i=i+1) // if (xy_bitmap_lsb[i]) begin dsp_x_p_latch[i] <= dsp_x_p_split[i]; @@ -794,12 +752,26 @@ module modexpng_recombinator_block end // if (xy_aux_lsb) begin - dsp_x_p_latch[8] <= dsp_x_p_split[8]; - dsp_y_p_latch[8] <= dsp_y_p_split[8]; + dsp_x_p_latch[NUM_MULTS_AUX-1] <= dsp_x_p_split[NUM_MULTS_AUX-1]; + dsp_y_p_latch[NUM_MULTS_AUX-1] <= dsp_y_p_split[NUM_MULTS_AUX-1]; end // end + + reg rcmb_xy_lsb_ce_combined_dly = 1'b0; + reg rcmb_xy_msb_ce_combined_dly = 1'b0; + + always @(posedge clk or negedge rst_n) + // + if (!rst_n) begin + rcmb_xy_lsb_ce_combined_dly <= 1'b0; + rcmb_xy_msb_ce_combined_dly <= 1'b0; + end else begin + rcmb_xy_lsb_ce_combined_dly <= rcmb_xy_lsb_ce_combined; + rcmb_xy_msb_ce_combined_dly <= rcmb_xy_msb_ce_combined; + end + reg rcmb_xy_lsb_valid = 1'b0; reg rcmb_xy_msb_valid = 1'b0; @@ -809,50 +781,58 @@ module modexpng_recombinator_block rcmb_xy_lsb_valid <= 1'b0; rcmb_xy_msb_valid <= 1'b0; end else begin - rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined; - rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined; + rcmb_xy_lsb_valid <= rcmb_xy_lsb_ce_combined_dly; + rcmb_xy_msb_valid <= rcmb_xy_msb_ce_combined_dly; end - reg [ BANK_ADDR_W -1:0] wide_xy_bank; - reg [ 7:0] wide_xy_addr; - reg [17:0] wide_x_dout; - reg [17:0] wide_y_dout; - reg wide_xy_valid = 1'b0; + // + // Output Registers + // + reg [BANK_ADDR_W -1:0] wide_xy_bank; + reg [ OP_ADDR_W -1:0] wide_xy_addr; + reg [ WORD_EXT_W -1:0] wide_x_dout; + reg [ WORD_EXT_W -1:0] wide_y_dout; + reg wide_xy_valid = 1'b0; + + reg [BANK_ADDR_W -1:0] narrow_xy_bank; + reg [ OP_ADDR_W -1:0] narrow_xy_addr; + reg [ WORD_EXT_W -1:0] narrow_x_dout; + reg [ WORD_EXT_W -1:0] narrow_y_dout; + reg narrow_xy_valid = 1'b0; - reg [ BANK_ADDR_W -1:0] narrow_xy_bank; - reg [ 7:0] narrow_xy_addr; - reg [17:0] narrow_x_dout; - reg [17:0] narrow_y_dout; - reg narrow_xy_valid = 1'b0; + reg [BANK_ADDR_W -1:0] rdct_xy_bank; + reg [ OP_ADDR_W -1:0] rdct_xy_addr; + reg [ WORD_EXT_W -1:0] rdct_x_dout; + reg [ WORD_EXT_W -1:0] rdct_y_dout; + reg rdct_xy_valid = 1'b0; - reg [ BANK_ADDR_W -1:0] rdct_xy_bank; - reg [ 7:0] rdct_xy_addr; - reg [17:0] rdct_x_dout; - reg [17:0] rdct_y_dout; - reg rdct_xy_valid = 1'b0; - reg [ 7:0] cnt_lsb; - reg [ 7:0] cnt_msb; + // + // Internal Counters + // + reg [OP_ADDR_W -1:0] cnt_lsb; + reg [OP_ADDR_W -1:0] cnt_msb; - reg cnt_lsb_wrapped; - reg cnt_msb_wrapped; - - reg [31:0] rcmb_xy_msb_carry_0; - reg [31:0] rcmb_xy_msb_carry_1; + reg cnt_lsb_wrapped; + reg cnt_msb_wrapped; reg [31:0] rcmb_xy_msb_delay_0; reg [31:0] rcmb_xy_msb_delay_1; reg [31:0] rcmb_xy_msb_delay_2; - reg [ 7:0] rcmb_msb_cnt_delay_0 = 8'd0; - reg [ 7:0] rcmb_msb_cnt_delay_1 = 8'd0; - reg [ 7:0] rcmb_msb_cnt_delay_2 = 8'd0; + reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_0 = OP_ADDR_ZERO; + reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_1 = OP_ADDR_ZERO; + reg [OP_ADDR_W -1:0] rcmb_msb_cnt_delay_2 = OP_ADDR_ZERO; - reg rcmb_msb_flag_delay_0; - reg rcmb_msb_flag_delay_1; - reg rcmb_msb_flag_delay_2; + reg rcmb_msb_flag_delay_0 = 1'b0; + reg rcmb_msb_flag_delay_1 = 1'b0; + reg rcmb_msb_flag_delay_2 = 1'b0; + + // + // Output Port Mapping + // assign rcmb_wide_xy_bank = wide_xy_bank; assign rcmb_wide_xy_addr = wide_xy_addr; assign rcmb_wide_x_dout = wide_x_dout; @@ -870,23 +850,30 @@ module modexpng_recombinator_block assign rdct_narrow_x_dout = rdct_x_dout; assign rdct_narrow_y_dout = rdct_y_dout; assign rdct_narrow_xy_valid = rdct_xy_valid; - + + + // + // Ready Logic + // reg rdy_reg = 1'b1; reg rdy_adv = 1'b1; - + assign rdy = rdy_reg; - - + always @(posedge clk) // if (ena) rdy_reg <= 1'b0; else rdy_reg <= rdy_adv; - + + + // + // Helper Tasks + // task advance_rcmb_msb_delay; - input [15:0] dout_x; - input [15:0] dout_y; - input [ 7:0] cnt; - input flag; + input [ WORD_W -1:0] dout_x; + input [ WORD_W -1:0] dout_y; + input [OP_ADDR_W -1:0] cnt; + input flag; begin // rcmb_xy_msb_delay_0 <= {dout_y, dout_x}; @@ -905,134 +892,119 @@ module modexpng_recombinator_block endtask task shift_rcmb_msb_carry; - input [15:0] dout_x; - input [15:0] dout_y; + input [WORD_W -1:0] dout_x; + input [WORD_W -1:0] dout_y; begin - rcmb_xy_msb_carry_0 <= {dout_y, dout_x}; - rcmb_xy_msb_carry_1 <= rcmb_xy_msb_carry_0; + rcmb_x_msb_carry_0 <= dout_x; + rcmb_y_msb_carry_0 <= dout_y; + rcmb_x_msb_carry_1 <= rcmb_x_msb_carry_0; + rcmb_y_msb_carry_1 <= rcmb_y_msb_carry_0; end endtask task _update_wide; - input [ BANK_ADDR_W -1:0] bank; - input [ 7:0] addr; - input [17:0] dout_x; - input [17:0] dout_y; - input valid; + input [BANK_ADDR_W -1:0] bank; + input [ OP_ADDR_W -1:0] addr; + input [ WORD_EXT_W -1:0] dout_x; + input [ WORD_EXT_W -1:0] dout_y; + input valid; begin - wide_xy_bank <= bank; - wide_xy_addr <= addr; - wide_x_dout <= dout_x; - wide_y_dout <= dout_y; + wide_xy_bank <= bank; + wide_xy_addr <= addr; + wide_x_dout <= dout_x; + wide_y_dout <= dout_y; wide_xy_valid <= valid; end endtask task _update_narrow; - input [ BANK_ADDR_W -1:0] bank; - input [ 7:0] addr; - input [17:0] dout_x; - input [17:0] dout_y; - input valid; + input [BANK_ADDR_W -1:0] bank; + input [ OP_ADDR_W -1:0] addr; + input [ WORD_EXT_W -1:0] dout_x; + input [ WORD_EXT_W -1:0] dout_y; + input valid; begin - narrow_xy_bank <= bank; - narrow_xy_addr <= addr; - narrow_x_dout <= dout_x; - narrow_y_dout <= dout_y; + narrow_xy_bank <= bank; + narrow_xy_addr <= addr; + narrow_x_dout <= dout_x; + narrow_y_dout <= dout_y; narrow_xy_valid <= valid; end endtask task _update_rdct; - input [ BANK_ADDR_W -1:0] bank; - input [ 7:0] addr; - input [17:0] dout_x; - input [17:0] dout_y; - input valid; + input [BANK_ADDR_W -1:0] bank; + input [ OP_ADDR_W -1:0] addr; + input [ WORD_EXT_W -1:0] dout_x; + input [ WORD_EXT_W -1:0] dout_y; + input valid; begin - rdct_xy_bank <= bank; - rdct_xy_addr <= addr; - rdct_x_dout <= dout_x; - rdct_y_dout <= dout_y; + rdct_xy_bank <= bank; + rdct_xy_addr <= addr; + rdct_x_dout <= dout_x; + rdct_y_dout <= dout_y; rdct_xy_valid <= valid; end endtask task set_wide; - input [ BANK_ADDR_W -1:0] bank; - input [ 7:0] addr; - input [17:0] dout_x; - input [17:0] dout_y; - begin - _update_wide(bank, addr, dout_x, dout_y, 1'b1); - end + input [BANK_ADDR_W -1:0] bank; + input [ OP_ADDR_W -1:0] addr; + input [ WORD_EXT_W -1:0] dout_x; + input [ WORD_EXT_W -1:0] dout_y; + _update_wide(bank, addr, dout_x, dout_y, 1'b1); endtask task set_narrow; - input [ BANK_ADDR_W -1:0] bank; - input [ 7:0] addr; - input [17:0] dout_x; - input [17:0] dout_y; - begin - _update_narrow(bank, addr, dout_x, dout_y, 1'b1); - end + input [BANK_ADDR_W -1:0] bank; + input [ OP_ADDR_W -1:0] addr; + input [ WORD_EXT_W -1:0] dout_x; + input [ WORD_EXT_W -1:0] dout_y; + _update_narrow(bank, addr, dout_x, dout_y, 1'b1); endtask task set_rdct; - input [ BANK_ADDR_W -1:0] bank; - input [ 7:0] addr; - input [17:0] dout_x; - input [17:0] dout_y; - begin - _update_rdct(bank, addr, dout_x, dout_y, 1'b1); - end + input [BANK_ADDR_W -1:0] bank; + input [ OP_ADDR_W -1:0] addr; + input [ WORD_EXT_W -1:0] dout_x; + input [ WORD_EXT_W -1:0] dout_y; + _update_rdct(bank, addr, dout_x, dout_y, 1'b1); endtask task clear_wide; - begin - _update_wide(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0); - end + _update_wide(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0); endtask task clear_narrow; - begin - _update_narrow(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0); - end + _update_narrow(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0); endtask task clear_rdct; - begin - _update_rdct(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0); - end + _update_rdct(BANK_DNC, OP_ADDR_DNC, WORD_EXT_DNC, WORD_EXT_DNC, 1'b0); endtask task _set_cnt_lsb; - input [7:0] cnt; - input wrapped; - begin - cnt_lsb <= cnt; - cnt_lsb_wrapped <= wrapped; - end + input [OP_ADDR_W-1:0] cnt; + input wrapped; + {cnt_lsb, cnt_lsb_wrapped} <= {cnt, wrapped}; endtask task _set_cnt_msb; - input [7:0] cnt; - input wrapped; - begin - cnt_msb <= cnt; - cnt_msb_wrapped <= wrapped; - end + input [OP_ADDR_W-1:0] cnt; + input wrapped; + {cnt_msb, cnt_msb_wrapped} <= {cnt, wrapped}; endtask task inc_cnt_lsb; - begin - if (cnt_lsb == word_index_last) - _set_cnt_lsb(8'd0, 1'b1); - else - _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped); - end + if (cnt_lsb == word_index_last) _set_cnt_lsb(OP_ADDR_ZERO, 1'b1); + else _set_cnt_lsb(cnt_lsb + 1'b1, cnt_lsb_wrapped); endtask + task inc_cnt_msb; + if (cnt_msb == word_index_last) _set_cnt_msb(OP_ADDR_ZERO, 1'b1); + else _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped); + endtask + task inc_cnt_both; begin inc_cnt_lsb; @@ -1040,31 +1012,18 @@ module modexpng_recombinator_block end endtask - task inc_cnt_msb; - begin - if (cnt_msb == word_index_last) - _set_cnt_msb(8'd0, 1'b1); - else - _set_cnt_msb(cnt_msb + 1'b1, cnt_msb_wrapped); - end - endtask - task clr_cnt_lsb; - begin - _set_cnt_lsb(8'd0, 1'b0); - end + _set_cnt_lsb(OP_ADDR_ZERO, 1'b0); endtask task clr_cnt_msb; - begin - _set_cnt_msb(8'd0, 1'b0); - end + _set_cnt_msb(OP_ADDR_ZERO, 1'b0); endtask + - - - wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid}; - + // + // Main Process + // always @(posedge clk) // if (ena) begin @@ -1073,58 +1032,75 @@ module modexpng_recombinator_block end else if (!rdy) // case (rcmb_mode) - 2'd1: recombine_square(); - 2'd2: recombine_triangle(); - 2'd3: recombine_rectangle(); + RCMB_MODE_SQUARE: recombine_square(); + RCMB_MODE_TRIANGLE: recombine_triangle(); + RCMB_MODE_RECTANGLE: recombine_rectangle(); endcase - wire [17:0] rcmb_x_lsb_dout_pad = {2'b00, rcmb_x_lsb_dout}; - wire [17:0] rcmb_y_lsb_dout_pad = {2'b00, rcmb_y_lsb_dout}; + + // + // Padding + // + wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_pad = {CARRY_ZERO, rcmb_x_lsb_dout}; + wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_pad = {CARRY_ZERO, rcmb_y_lsb_dout}; + + wire [WORD_EXT_W-1:0] rcmb_x_lsb_dout_ext_pad = {1'b0, rcmb_x_lsb_dout_ext}; + wire [WORD_EXT_W-1:0] rcmb_y_lsb_dout_ext_pad = {1'b0, rcmb_y_lsb_dout_ext}; + + wire [WORD_EXT_W-1:0] rcmb_x_msb_dout_pad = {CARRY_ZERO, rcmb_x_msb_dout}; + wire [WORD_EXT_W-1:0] rcmb_y_msb_dout_pad = {CARRY_ZERO, rcmb_y_msb_dout}; - wire [17:0] rcmb_x_msb_dout_pad = {2'b00, rcmb_x_msb_dout}; - wire [17:0] rcmb_y_msb_dout_pad = {2'b00, rcmb_y_msb_dout}; + wire [WORD_EXT_W-1:0] rcmb_x_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[15: 0]}; + wire [WORD_EXT_W-1:0] rcmb_y_msb_delay_2_pad = {CARRY_ZERO, rcmb_xy_msb_delay_2[31:16]}; - wire [17:0] rcmb_x_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[15: 0]}; - wire [17:0] rcmb_y_msb_delay_2_pad = {2'b00, rcmb_xy_msb_delay_2[31:16]}; + + // + // Handy Signal + // + wire [1:0] rcmb_xy_valid = {rcmb_xy_msb_valid, rcmb_xy_lsb_valid}; - wire [17:0] rcmb_x_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_x_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[15: 0]}}; - wire [17:0] rcmb_y_lsb_msb_carry_1_pad = {1'b0, {1'b0, rcmb_y_lsb_dout} + {1'b0, rcmb_xy_msb_carry_1[31:16]}}; - - + + // + // Recombination Task - Square + // task recombine_square; // begin // case (rcmb_xy_valid) - // 2'b01: inc_cnt_lsb; 2'b10: inc_cnt_msb; 2'b11: inc_cnt_both; - // endcase // case (rcmb_xy_valid) // - 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); - else clear_wide; + 2'b00: if (rcmb_msb_flag_delay_2) set_wide(BANK_WIDE_H, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); + else clear_wide; // - 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b01: set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); // - 2'b10: if (cnt_msb < 8'd2) clear_wide; - else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + 2'b10: if (cnt_msb < OP_ADDR_TWO) clear_wide; + else set_wide(BANK_WIDE_H, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); // - 2'b11: if (cnt_lsb_wrapped) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); - else set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + 2'b11: if (!cnt_lsb_wrapped) set_wide(BANK_WIDE_L, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + else begin + if (cnt_lsb == OP_ADDR_ZERO) set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + else set_wide(BANK_WIDE_H, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad); + end // endcase // case (rcmb_xy_valid) // - 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); - 2'b10: if (cnt_msb < 8'd2) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); + 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0); + // + 2'b01: if (rcmb_xy_lsb_cry) shift_rcmb_msb_carry(WORD_DNC, WORD_DNC); + // + 2'b10: if (cnt_msb < OP_ADDR_TWO) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); // 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1); - if (cnt_lsb_wrapped) shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}}); + if (rcmb_xy_lsb_cry) shift_rcmb_msb_carry(WORD_DNC, WORD_DNC); end // endcase @@ -1134,23 +1110,27 @@ module modexpng_recombinator_block endtask + // + // Recombination Task - Triangle + // task recombine_triangle; // begin // case (rcmb_xy_valid) - // 2'b01: inc_cnt_lsb(); - // endcase // case (rcmb_xy_valid) // - 2'b00: clear_narrow; - 2'b01: if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); - else set_narrow(BANK_NARROW_EXT, 8'd1, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); - 2'b10: clear_narrow; - 2'b11: clear_narrow; + 2'b00: clear_narrow; + // + 2'b01: if (!cnt_lsb_wrapped) set_narrow(BANK_NARROW_Q, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + else set_narrow(BANK_NARROW_EXT, OP_ADDR_EXT_Q, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + // + 2'b10: clear_narrow; + // + 2'b11: clear_narrow; // endcase // @@ -1159,44 +1139,49 @@ module modexpng_recombinator_block endtask + // + // Recombination Task - Rectangle + // task recombine_rectangle; // begin // case (rcmb_xy_valid) - // - 2'b01: inc_cnt_lsb; - 2'b10: inc_cnt_msb; - 2'b11: inc_cnt_both; - // + 2'b01: inc_cnt_lsb; + 2'b10: inc_cnt_msb; + 2'b11: inc_cnt_both; endcase -// // + // case (rcmb_xy_valid) -// // + // 2'b00: if (rcmb_msb_flag_delay_2) set_rdct(BANK_RCMB_MH, rcmb_msb_cnt_delay_2, rcmb_x_msb_delay_2_pad, rcmb_y_msb_delay_2_pad); else clear_rdct; + // 2'b01: set_rdct(BANK_RCMB_ML, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); - 2'b10: if (!cnt_msb_wrapped) begin - if (cnt_msb < 8'd2) clear_rdct; - else set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); - end else set_rdct(BANK_RCMB_EXT, 8'd0, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); - - 2'b11: set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_msb_carry_1_pad, rcmb_y_lsb_msb_carry_1_pad); -// // + // + 2'b10: if (!cnt_msb_wrapped) begin + if (cnt_msb < OP_ADDR_TWO) clear_rdct; + else set_rdct(BANK_RCMB_MH, cnt_msb, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + end else set_rdct(BANK_RCMB_EXT, OP_ADDR_ZERO, rcmb_x_msb_dout_pad, rcmb_y_msb_dout_pad); + // + 2'b11: if (cnt_lsb == OP_ADDR_ZERO) set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_pad, rcmb_y_lsb_dout_pad); + else set_rdct(BANK_RCMB_MH, cnt_lsb, rcmb_x_lsb_dout_ext_pad, rcmb_y_lsb_dout_ext_pad); + // endcase -// // + // case (rcmb_xy_valid) -// // - 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); - 2'b10: begin - if ((cnt_msb < 8'd2) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); - if (cnt_msb_wrapped) advance_rcmb_msb_delay(16'hXXXX, 16'hXXXX, 8'd0, 1'b0); - end -// // - 2'b11: begin advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1); - shift_rcmb_msb_carry({16{1'bX}}, {16{1'bX}}); - end -// // + // + 2'b00: if (rcmb_msb_flag_delay_2) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0); + // + 2'b01: if (rcmb_xy_lsb_cry) shift_rcmb_msb_carry(WORD_DNC, WORD_DNC); + // + 2'b10: begin + if ((cnt_msb < OP_ADDR_TWO) && !cnt_msb_wrapped) shift_rcmb_msb_carry(rcmb_x_msb_dout, rcmb_y_msb_dout); + if (cnt_msb_wrapped) advance_rcmb_msb_delay(WORD_DNC, WORD_DNC, OP_ADDR_ZERO, 1'b0); + end + // + 2'b11: advance_rcmb_msb_delay(rcmb_x_msb_dout, rcmb_y_msb_dout, cnt_msb, 1'b1); + // endcase // end @@ -1204,50 +1189,19 @@ module modexpng_recombinator_block endtask + // + // Completion Logic + // always @(posedge clk) // - if (ena) begin - rdy_adv <= 1'b0; - end else if (!rdy_reg) begin + if (ena) rdy_adv <= 1'b0; + else if (!rdy_reg) // case (rcmb_mode) - // - 2'd1: case (rcmb_xy_valid) - // - 2'b00: begin - // - if (rcmb_msb_flag_delay_2) begin - // - rdy_adv <= ~rcmb_msb_flag_delay_1; - // - end - // - end - // - endcase - // - 2'd2: case (rcmb_xy_valid) - // - 2'b01: rdy_adv <= cnt_lsb_wrapped; // - // - endcase - // - 2'd3: case (rcmb_xy_valid) - // - 2'b00: begin - // - if (rcmb_msb_flag_delay_2) begin - // - rdy_adv <= ~rcmb_msb_flag_delay_1; - // - end - // - end - // - endcase - // + RCMB_MODE_SQUARE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase + RCMB_MODE_TRIANGLE: case (rcmb_xy_valid) 2'b01: rdy_adv <= cnt_lsb_wrapped; endcase + RCMB_MODE_RECTANGLE: case (rcmb_xy_valid) 2'b00: if (rcmb_msb_flag_delay_2) rdy_adv <= ~rcmb_msb_flag_delay_1; endcase endcase - // - end - + + endmodule diff --git a/rtl/modexpng_recombinator_cell.v b/rtl/modexpng_recombinator_cell.v index ef0ca2d..9761d9c 100644 --- a/rtl/modexpng_recombinator_cell.v +++ b/rtl/modexpng_recombinator_cell.v @@ -33,14 +33,19 @@ module modexpng_recombinator_cell ( clk, - ce, clr, - din, dout + ce, clr, cry, + cin, + din, dout, dout_ext ); + // // Headers // - `include "../rtl/modexpng_parameters.vh" + `include "modexpng_parameters.vh" + `include "modexpng_dsp48e1.vh" + `include "modexpng_dsp_slice_primitives.vh" + // // Ports @@ -48,25 +53,80 @@ module modexpng_recombinator_cell input clk; input ce; input clr; + input cry; + input [WORD_W -1:0] cin; input [ MAC_W -1:0] din; output [WORD_W -1:0] dout; + output [WORD_W :0] dout_ext; + + + // + // din <=> {z[13:0], y[15:0], x[15:0]} + // + wire [WORD_W -3:0] din_z = din[3 * WORD_W -3 : 2 * WORD_W]; // [45:32] + wire [WORD_W -1:0] din_y = din[2 * WORD_W -1 : WORD_W]; // [31:16] + wire [WORD_W -1:0] din_x = din[ WORD_W -1 : 0]; // [15: 0] + + + // + // Delayed Clock Enable + // + reg ce_dly = 1'b0; + always @(posedge clk) ce_dly <= ce; + + + // + // DSP Slice Buses + // + wire [DSP48E1_A_W-1:0] a_int; + wire [DSP48E1_B_W-1:0] b_int; + wire [DSP48E1_C_W-1:0] c_int; + wire [DSP48E1_P_W-1:0] p_int; - reg [WORD_W -2:0] z; - reg [WORD_W :0] y; - reg [WORD_W +1:0] x; - - assign dout = x[WORD_W-1:0]; + assign {a_int, b_int} = {{(DSP48E1_C_W-WORD_W){1'b0}}, cin}; + assign {c_int} = {din_z, 1'b0, din_y, 1'b1, din_x}; + - wire [WORD_W -2:0] din_z = din[3*WORD_W -2 :2*WORD_W]; // [46:32] - wire [WORD_W -1:0] din_y = din[2*WORD_W -1 : WORD_W]; // [31:16] - wire [WORD_W -1:0] din_x = din[ WORD_W -1 : 0]; // [15: 0] + // + // Combinational OPMODE Switch + // + reg [DSP48E1_OPMODE_W-1:0] opmode; - always @(posedge clk) + always @(clr, cry) // - if (ce) begin - z <= din_z; - y <= clr ? {1'b0, din_y} : {1'b0, din_y} + {2'b00, z}; - x <= clr ? {2'b00, din_x} : {2'b00, din_x} + {1'b0, y} + {WORD_ZERO, x[WORD_EXT_W-1:WORD_W]}; - end + casez ({clr, cry}) // clr has priority over cry! + 2'b1?: opmode = DSP48E1_OPMODE_Z0_YC_X0; + 2'b00: opmode = DSP48E1_OPMODE_ZP17_YC_X0; + 2'b01: opmode = DSP48E1_OPMODE_ZP17_YC_XAB; + endcase + + + // + // DSP Slice Instance + // + `MODEXPNG_DSP_SLICE_ADDSUB dsp_inst + ( + .clk (clk), + .ce_abc (ce), + .ce_p (ce_dly), + .ce_ctrl (ce), + .x ({a_int, b_int}), + .y (c_int), + .p (p_int), + .op_mode (opmode), + .alu_mode (DSP48E1_ALUMODE_Z_PLUS_X_AND_Y_AND_CIN), + .carry_in_sel (DSP48E1_CARRYINSEL_CARRYIN), + .casc_p_in (), + .casc_p_out (), + .carry_out () + ); + + + // + // Output Mapping + // + assign dout = {p_int[WORD_W-1:0]}; + assign dout_ext = {p_int[WORD_W+1], dout}; + endmodule |