From 71f70252dfc7e41103dde420a721be8aa48486d5 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov (Meister)" Date: Tue, 1 Oct 2019 16:18:33 +0300 Subject: Redesigned core architecture, unified bank structure. All storage blocks now have eight 4kbit entries and occupy one 36K BRAM tile. --- rtl/_modexpng_reductor.v | 252 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 rtl/_modexpng_reductor.v (limited to 'rtl/_modexpng_reductor.v') diff --git a/rtl/_modexpng_reductor.v b/rtl/_modexpng_reductor.v new file mode 100644 index 0000000..25cf394 --- /dev/null +++ b/rtl/_modexpng_reductor.v @@ -0,0 +1,252 @@ +module modexpng_reductor +( + clk, rst, + ena, rdy, + word_index_last, + rd_wide_xy_addr_aux, rd_wide_xy_bank_aux, rd_wide_x_dout_aux, rd_wide_y_dout_aux, + rcmb_final_xy_bank, rcmb_final_xy_addr, rcmb_final_x_dout, rcmb_final_y_dout, rcmb_final_xy_valid, + rdct_final_xy_addr, rdct_final_x_dout, rdct_final_y_dout, rdct_final_xy_valid +); + + // + // Headers + // + `include "../rtl/modexpng_parameters.vh" + + + input clk; + input rst; + // + input ena; + output rdy; + // + input [ OP_ADDR_W -1:0] word_index_last; + // + input [BANK_ADDR_W -1:0] rd_wide_xy_bank_aux; + input [ OP_ADDR_W -1:0] rd_wide_xy_addr_aux; + input [ WORD_EXT_W -1:0] rd_wide_x_dout_aux; + input [ WORD_EXT_W -1:0] rd_wide_y_dout_aux; + // + input [BANK_ADDR_W -1:0] rcmb_final_xy_bank; + input [ OP_ADDR_W -1:0] rcmb_final_xy_addr; + input [ WORD_EXT_W -1:0] rcmb_final_x_dout; + input [ WORD_EXT_W -1:0] rcmb_final_y_dout; + input rcmb_final_xy_valid; + // + output [ OP_ADDR_W -1:0] rdct_final_xy_addr; + output [ WORD_EXT_W -1:0] rdct_final_x_dout; + output [ WORD_EXT_W -1:0] rdct_final_y_dout; + output rdct_final_xy_valid; + + + // + // Ready + // + reg rdy_reg = 1'b1; + reg busy_now = 1'b0; + + assign rdy = rdy_reg; + + always @(posedge clk) + // + if (rst) rdy_reg <= 1'b1; + else begin + if (rdy && ena) rdy_reg <= 1'b0; + if (!rdy && !busy_now) rdy_reg <= 1'b1; + end + + + // + // Pipeline (Delay Match) + // + reg rcmb_xy_valid_dly1 = 1'b0; + reg rcmb_xy_valid_dly2 = 1'b0; + reg rcmb_xy_valid_dly3 = 1'b0; + + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly1; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly2; + reg [BANK_ADDR_W -1:0] rcmb_xy_bank_dly3; + + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly1; + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly2; + reg [ OP_ADDR_W -1:0] rcmb_xy_addr_dly3; + + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly1; + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly2; + reg [ WORD_EXT_W -1:0] rcmb_x_dout_dly3; + + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly1; + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly2; + reg [ WORD_EXT_W -1:0] rcmb_y_dout_dly3; + + always @(posedge clk) + // + if (rst) begin + rcmb_xy_valid_dly1 <= 1'b0; + rcmb_xy_valid_dly2 <= 1'b0; + rcmb_xy_valid_dly3 <= 1'b0; + end else begin + rcmb_xy_valid_dly1 <= rcmb_final_xy_valid; + rcmb_xy_valid_dly2 <= rcmb_xy_valid_dly1; + rcmb_xy_valid_dly3 <= rcmb_xy_valid_dly2; + end + + + always @(posedge clk) begin + // + if (rcmb_final_xy_valid) begin + rcmb_xy_bank_dly1 <= rcmb_final_xy_bank; + rcmb_xy_addr_dly1 <= rcmb_final_xy_addr; + rcmb_x_dout_dly1 <= rcmb_final_x_dout; + rcmb_y_dout_dly1 <= rcmb_final_y_dout; + end + // + if (rcmb_xy_valid_dly1) begin + rcmb_xy_bank_dly2 <= rcmb_xy_bank_dly1; + rcmb_xy_addr_dly2 <= rcmb_xy_addr_dly1; + rcmb_x_dout_dly2 <= rcmb_x_dout_dly1; + rcmb_y_dout_dly2 <= rcmb_y_dout_dly1; + end + // + if (rcmb_xy_valid_dly2) begin + rcmb_xy_bank_dly3 <= rcmb_xy_bank_dly2; + rcmb_xy_addr_dly3 <= rcmb_xy_addr_dly2; + rcmb_x_dout_dly3 <= rcmb_x_dout_dly2; + rcmb_y_dout_dly3 <= rcmb_y_dout_dly2; + end + // + end + + + // + // Carry Logic + // + reg [RDCT_CARRY_W -1:0] rcmb_x_lsb_carry; + reg [WORD_W -1:0] rcmb_x_lsb_dummy; + reg [WORD_EXT_W -1:0] rcmb_x_lsb_dout; + + reg [RDCT_CARRY_W -1:0] rcmb_y_lsb_carry; + reg [WORD_W -1:0] rcmb_y_lsb_dummy; + reg [WORD_EXT_W -1:0] rcmb_y_lsb_dout; + + + // + // Carry Computation + // + always @(posedge clk) + // + if (ena) begin + rcmb_x_lsb_carry <= RDCT_CARRY_ZEROES; + rcmb_y_lsb_carry <= RDCT_CARRY_ZEROES; + end else if (rcmb_xy_valid_dly3) + // + case (rcmb_xy_bank_dly3) + + BANK_RCMB_ML: begin + {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; + {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; + end + + BANK_RCMB_MH: + if (rcmb_xy_addr_dly3 == OP_ADDR_ZERO) begin + {rcmb_x_lsb_carry, rcmb_x_lsb_dummy} <= rcmb_x_dout_dly3 + rd_wide_x_dout_aux + rcmb_x_lsb_carry; + {rcmb_y_lsb_carry, rcmb_y_lsb_dummy} <= rcmb_y_dout_dly3 + rd_wide_y_dout_aux + rcmb_y_lsb_carry; + end + + endcase + + + // + // Reduction + // + reg [ OP_ADDR_W -1:0] rdct_xy_addr; + reg [WORD_EXT_W -1:0] rdct_x_dout; + reg [WORD_EXT_W -1:0] rdct_y_dout; + reg rdct_xy_valid = 1'b0; + + assign rdct_final_xy_addr = rdct_xy_addr; + assign rdct_final_x_dout = rdct_x_dout; + assign rdct_final_y_dout = rdct_y_dout; + assign rdct_final_xy_valid = rdct_xy_valid; + + task _update_rdct; + input [ OP_ADDR_W -1:0] addr; + input [WORD_EXT_W -1:0] dout_x; + input [WORD_EXT_W -1:0] dout_y; + input valid; + begin + rdct_xy_addr <= addr; + rdct_x_dout <= dout_x; + rdct_y_dout <= dout_y; + rdct_xy_valid <= valid; + end + endtask + + task set_rdct; + input [ OP_ADDR_W -1:0] addr; + input [WORD_EXT_W -1:0] dout_x; + input [WORD_EXT_W -1:0] dout_y; + begin + _update_rdct(addr, dout_x, dout_y, 1'b1); + end + endtask + + task clear_rdct; + begin + _update_rdct(OP_ADDR_DONT_CARE, WORD_EXT_DONT_CARE, WORD_EXT_DONT_CARE, 1'b0); + end + endtask + + + // + // Helper Wires + // + wire [WORD_EXT_W -1:0] sum_rdct_x = rcmb_x_dout_dly3 + rd_wide_x_dout_aux; + wire [WORD_EXT_W -1:0] sum_rdct_y = rcmb_y_dout_dly3 + rd_wide_y_dout_aux; + + wire [WORD_EXT_W -1:0] sum_rdct_x_carry = sum_rdct_x + {WORD_NULL, rcmb_x_lsb_carry}; + wire [WORD_EXT_W -1:0] sum_rdct_y_carry = sum_rdct_y + {WORD_NULL, rcmb_y_lsb_carry}; + + + // + // + // + always @(posedge clk) + // + if (rst) clear_rdct; + else begin + // + clear_rdct; + // + if (busy_now && rcmb_xy_valid_dly3) + // + case (rcmb_xy_bank_dly3) + + BANK_RCMB_MH: + if (rcmb_xy_addr_dly3 == OP_ADDR_ONE) + set_rdct(OP_ADDR_ZERO, sum_rdct_x_carry, sum_rdct_y_carry); + else if (rcmb_xy_addr_dly3 > OP_ADDR_ONE) + set_rdct(rcmb_xy_addr_dly3 - 1'b1, sum_rdct_x, sum_rdct_y); + + BANK_RCMB_EXT: + set_rdct(word_index_last, rcmb_x_dout_dly3, rcmb_y_dout_dly3); + + endcase + // + end + + + + // + // Busy + // + always @(posedge clk) + // + if (rst) busy_now <= 1'b0; + else begin + if (rdy && ena) busy_now <= 1'b1; + //if (!rdy && !busy_now) rdy <= 1'b1; + end + + +endmodule -- cgit v1.2.3