diff options
Diffstat (limited to 'src/rtl')
-rw-r--r-- | src/rtl/adder.v (renamed from src/rtl/adder32.v) | 42 | ||||
-rw-r--r-- | src/rtl/blockmem1r1w.v | 22 | ||||
-rw-r--r-- | src/rtl/blockmem2r1w.v | 25 | ||||
-rw-r--r-- | src/rtl/blockmem2r1wptr.v | 44 | ||||
-rw-r--r-- | src/rtl/blockmem2rptr1w.v | 41 | ||||
-rw-r--r-- | src/rtl/modexp.v | 78 | ||||
-rw-r--r-- | src/rtl/modexp_core.v | 256 | ||||
-rw-r--r-- | src/rtl/montprod.v | 777 | ||||
-rw-r--r-- | src/rtl/residue.v | 402 | ||||
-rw-r--r-- | src/rtl/shl.v (renamed from src/rtl/shl32.v) | 31 | ||||
-rw-r--r-- | src/rtl/shr.v (renamed from src/rtl/shr32.v) | 25 |
11 files changed, 914 insertions, 829 deletions
diff --git a/src/rtl/adder32.v b/src/rtl/adder.v index d9cac45..fa8ed8c 100644 --- a/src/rtl/adder32.v +++ b/src/rtl/adder.v @@ -1,11 +1,12 @@ //====================================================================== // -// adder32.v -// --------- -// 32bit adder with carry in / carry out +// adder.v +// ------- +// Adder with separate carry in and carry out. Used in the montprod +// amd residue modules of the modexp core. // // -// Author: Peter Magnusson +// Author: Peter Magnusson, Joachim Strömbergson // Copyright (c) 2015, NORDUnet A/S All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -36,19 +37,28 @@ // //====================================================================== +module adder #(parameter OPW = 32) + ( + input [(OPW - 1) : 0] a, + input [(OPW - 1) : 0] b, + input carry_in, -module adder32( - input [31 : 0] a, - input [31 : 0] b, - input carry_in, - output wire [31 : 0] sum, - output wire carry_out); + output wire [(OPW - 1) : 0] sum, + output wire carry_out + ); - reg [32 : 0] adder_result; + reg [(OPW) : 0] adder_result; - assign sum = adder_result[31:0]; - assign carry_out = adder_result[32]; + assign sum = adder_result[(OPW - 1) : 0]; + assign carry_out = adder_result[(OPW)]; - always @(a, b, carry_in) - adder_result = {1'b0, a} + {1'b0, b} + {32'b0, carry_in}; -endmodule + always @* + begin + adder_result = {1'b0, a} + {1'b0, b} + {{OPW{1'b0}}, carry_in}; + end + +endmodule // adder + +//====================================================================== +// EOF adder.v +//====================================================================== diff --git a/src/rtl/blockmem1r1w.v b/src/rtl/blockmem1r1w.v index 1d84369..6856e0a 100644 --- a/src/rtl/blockmem1r1w.v +++ b/src/rtl/blockmem1r1w.v @@ -7,6 +7,9 @@ // // The memory is used in the modexp core. // +// paremeter OPW is operand word width in bits. +// parameter ADW is address width in bits. +// // // Author: Joachim Strombergson // Copyright (c) 2015, NORDUnet A/S All rights reserved. @@ -39,19 +42,20 @@ // //====================================================================== -module blockmem1r1w( - input wire clk, +module blockmem1r1w #(parameter OPW = 32, parameter ADW = 8) + ( + input wire clk, - input wire [07 : 0] read_addr, - output wire [31 : 0] read_data, + input wire [(ADW - 1) : 0] read_addr, + output wire [(OPW - 1) : 0] read_data, - input wire wr, - input wire [07 : 0] write_addr, - input wire [31 : 0] write_data + input wire wr, + input wire [(ADW - 1) : 0] write_addr, + input wire [(OPW - 1) : 0] write_data ); - reg [31 : 0] mem [0 : 255]; - reg [31 : 0] tmp_read_data; + reg [(OPW - 1) : 0] mem [0 : ((2**ADW) - 1)]; + reg [(OPW - 1) : 0] tmp_read_data; assign read_data = tmp_read_data; diff --git a/src/rtl/blockmem2r1w.v b/src/rtl/blockmem2r1w.v index 252764f..aa44101 100644 --- a/src/rtl/blockmem2r1w.v +++ b/src/rtl/blockmem2r1w.v @@ -39,23 +39,24 @@ // //====================================================================== -module blockmem2r1w( - input wire clk, +module blockmem2r1w #(parameter OPW = 32, parameter ADW = 8) + ( + input wire clk, - input wire [07 : 0] read_addr0, - output wire [31 : 0] read_data0, + input wire [(ADW - 1) : 0] read_addr0, + output wire [(OPW - 1) : 0] read_data0, - input wire [07 : 0] read_addr1, - output wire [31 : 0] read_data1, + input wire [(ADW - 1) : 0] read_addr1, + output wire [(OPW - 1) : 0] read_data1, - input wire wr, - input wire [07 : 0] write_addr, - input wire [31 : 0] write_data + input wire wr, + input wire [(ADW - 1) : 0] write_addr, + input wire [(OPW - 1) : 0] write_data ); - reg [31 : 0] mem [0 : 255]; - reg [31 : 0] tmp_read_data0; - reg [31 : 0] tmp_read_data1; + reg [(OPW - 1) : 0] mem [0 : ((2**ADW) - 1)]; + reg [(OPW - 1) : 0] tmp_read_data0; + reg [(OPW - 1) : 0] tmp_read_data1; assign read_data0 = tmp_read_data0; assign read_data1 = tmp_read_data1; diff --git a/src/rtl/blockmem2r1wptr.v b/src/rtl/blockmem2r1wptr.v index 41efc85..2435cfd 100644 --- a/src/rtl/blockmem2r1wptr.v +++ b/src/rtl/blockmem2r1wptr.v @@ -8,6 +8,12 @@ // when the cs signal is set. The pointer is reset to zero when // the rst signal is asserted. // +// +// NOTE: This memory needs to be rebuilt if interface 0 is changed +// to use bigger operand widths and fewer words than interface 1. +// This adaption is NOT automatic. +// +// // The memory is used in the modexp core. // // @@ -42,32 +48,32 @@ // //====================================================================== -module blockmem2r1wptr( - input wire clk, - input wire reset_n, - - input wire [07 : 0] read_addr0, - output wire [31 : 0] read_data0, +module blockmem2r1wptr #(parameter OPW = 32, parameter ADW = 8) + ( + input wire clk, + input wire reset_n, - output wire [31 : 0] read_data1, + input wire [(ADW - 1) : 0] read_addr0, + output wire [(OPW - 1) : 0] read_data0, - input wire rst, - input wire cs, - input wire wr, - input wire [31 : 0] write_data + output wire [31 : 0] read_data1, + input wire rst, + input wire cs, + input wire wr, + input wire [31 : 0] write_data ); //---------------------------------------------------------------- // Memories and regs including update variables and write enable. //---------------------------------------------------------------- - reg [31 : 0] mem [0 : 255]; - reg [31 : 0] tmp_read_data0; - reg [31 : 0] tmp_read_data1; + reg [(OPW - 1) : 0] mem [0 : ((2**ADW) - 1)]; + reg [(OPW - 1) : 0] tmp_read_data0; + reg [31 : 0] tmp_read_data1; - reg [7 : 0] ptr_reg; - reg [7 : 0] ptr_new; - reg ptr_we; + reg [7 : 0] ptr_reg; + reg [7 : 0] ptr_new; + reg ptr_we; //---------------------------------------------------------------- @@ -94,10 +100,10 @@ module blockmem2r1wptr( //---------------------------------------------------------------- - // reg_update + // ptr_update //---------------------------------------------------------------- always @ (posedge clk or negedge reset_n) - begin : reg_mem_update + begin : ptr_update if (!reset_n) ptr_reg <= 8'h00; diff --git a/src/rtl/blockmem2rptr1w.v b/src/rtl/blockmem2rptr1w.v index 4eb529e..a1d7448 100644 --- a/src/rtl/blockmem2rptr1w.v +++ b/src/rtl/blockmem2rptr1w.v @@ -9,6 +9,11 @@ // The memory is used in the modexp core. // // +// NOTE: This memory needs to be rebuilt if interface 0 is changed +// to use bigger operand widths and fewer words than interface 1. +// This adaption is NOT automatic. +// +// // Author: Joachim Strombergson // Copyright (c) 2015, NORDUnet A/S All rights reserved. // @@ -40,33 +45,33 @@ // //====================================================================== -module blockmem2rptr1w( - input wire clk, - input wire reset_n, - - input wire [07 : 0] read_addr0, - output wire [31 : 0] read_data0, +module blockmem2rptr1w #(parameter OPW = 32, parameter ADW = 8) + ( + input wire clk, + input wire reset_n, - output wire [31 : 0] read_data1, + input wire [(ADW - 1) : 0] read_addr0, + output wire [(OPW - 1) : 0] read_data0, - input wire rst, - input wire cs, - input wire wr, - input wire [07 : 0] write_addr, - input wire [31 : 0] write_data + output wire [31 : 0] read_data1, + input wire rst, + input wire cs, + input wire wr, + input wire [07 : 0] write_addr, + input wire [31 : 0] write_data ); //---------------------------------------------------------------- // Memories and regs including update variables and write enable. //---------------------------------------------------------------- - reg [31 : 0] mem [0 : 255]; - reg [31 : 0] tmp_read_data0; - reg [31 : 0] tmp_read_data1; + reg [(OPW - 1) : 0] mem [0 : ((2**ADW) - 1)]; + reg [(OPW - 1) : 0] tmp_read_data0; + reg [31 : 0] tmp_read_data1; - reg [7 : 0] ptr_reg; - reg [7 : 0] ptr_new; - reg ptr_we; + reg [7 : 0] ptr_reg; + reg [7 : 0] ptr_new; + reg ptr_we; //---------------------------------------------------------------- diff --git a/src/rtl/modexp.v b/src/rtl/modexp.v index 2af987f..ebda365 100644 --- a/src/rtl/modexp.v +++ b/src/rtl/modexp.v @@ -72,6 +72,15 @@ module modexp( //---------------------------------------------------------------- // Internal constant and parameter definitions. //---------------------------------------------------------------- + + // The operand width is the internal operand width in bits. + // The address width is the size of the address space used. This + // value must be balances with OPERAND_WIDTH to allow a total + // of 8192 bits of data. OPERAND_WIDTH * (ADDRESS_WIDTH ** 2) + // is the formula. Note that the API data with is always 32 bits. + localparam OPERAND_WIDTH = 32; + localparam ADDRESS_WIDTH = 8; + localparam ADDR_NAME0 = 8'h00; localparam ADDR_NAME1 = 8'h01; localparam ADDR_VERSION = 8'h02; @@ -161,40 +170,41 @@ module modexp( //---------------------------------------------------------------- // core instantiations. //---------------------------------------------------------------- - modexp_core core_inst( - .clk(clk), - .reset_n(reset_n), - - .start(start_reg), - .ready(ready), - - .exponent_length(exponent_length_reg), - .modulus_length(modulus_length_reg), - - .cycles(cycles), - - .exponent_mem_api_cs(exponent_mem_api_cs), - .exponent_mem_api_wr(exponent_mem_api_wr), - .exponent_mem_api_rst(exponent_mem_api_rst), - .exponent_mem_api_write_data(write_data), - .exponent_mem_api_read_data(exponent_mem_api_read_data), - - .modulus_mem_api_cs(modulus_mem_api_cs), - .modulus_mem_api_wr(modulus_mem_api_wr), - .modulus_mem_api_rst(modulus_mem_api_rst), - .modulus_mem_api_write_data(write_data), - .modulus_mem_api_read_data(modulus_mem_api_read_data), - - .message_mem_api_cs(message_mem_api_cs), - .message_mem_api_wr(message_mem_api_wr), - .message_mem_api_rst(message_mem_api_rst), - .message_mem_api_write_data(write_data), - .message_mem_api_read_data(message_mem_api_read_data), - - .result_mem_api_cs(result_mem_api_cs), - .result_mem_api_rst(result_mem_api_rst), - .result_mem_api_read_data(result_mem_api_read_data) - ); + modexp_core #(.OPW(OPERAND_WIDTH), .ADW(ADDRESS_WIDTH)) + core_inst( + .clk(clk), + .reset_n(reset_n), + + .start(start_reg), + .ready(ready), + + .exponent_length(exponent_length_reg), + .modulus_length(modulus_length_reg), + + .cycles(cycles), + + .exponent_mem_api_cs(exponent_mem_api_cs), + .exponent_mem_api_wr(exponent_mem_api_wr), + .exponent_mem_api_rst(exponent_mem_api_rst), + .exponent_mem_api_write_data(write_data), + .exponent_mem_api_read_data(exponent_mem_api_read_data), + + .modulus_mem_api_cs(modulus_mem_api_cs), + .modulus_mem_api_wr(modulus_mem_api_wr), + .modulus_mem_api_rst(modulus_mem_api_rst), + .modulus_mem_api_write_data(write_data), + .modulus_mem_api_read_data(modulus_mem_api_read_data), + + .message_mem_api_cs(message_mem_api_cs), + .message_mem_api_wr(message_mem_api_wr), + .message_mem_api_rst(message_mem_api_rst), + .message_mem_api_write_data(write_data), + .message_mem_api_read_data(message_mem_api_read_data), + + .result_mem_api_cs(result_mem_api_cs), + .result_mem_api_rst(result_mem_api_rst), + .result_mem_api_read_data(result_mem_api_read_data) + ); //---------------------------------------------------------------- diff --git a/src/rtl/modexp_core.v b/src/rtl/modexp_core.v index b8c4403..c1a88d1 100644 --- a/src/rtl/modexp_core.v +++ b/src/rtl/modexp_core.v @@ -54,7 +54,8 @@ // //====================================================================== -module modexp_core( +module modexp_core #(parameter OPW = 32, parameter ADW = 8) + ( input wire clk, input wire reset_n, @@ -235,7 +236,8 @@ module modexp_core( reg residue_valid_new; reg residue_valid_int_validated; - wire [7 : 0] length_m1; + wire [7 : 0] modulus_length_m1; + wire [7 : 0] exponent_length_m1; //---------------------------------------------------------------- @@ -244,127 +246,137 @@ module modexp_core( assign ready = ready_reg; assign cycles = {cycle_ctr_high_reg, cycle_ctr_low_reg}; - assign length_m1 = modulus_length - 8'h1; + assign modulus_length_m1 = modulus_length - 8'h1; + assign exponent_length_m1 = exponent_length - 8'h1; //---------------------------------------------------------------- // core instantiations. //---------------------------------------------------------------- - montprod montprod_inst( - .clk(clk), - .reset_n(reset_n), - - .calculate(montprod_calc), - .ready(montprod_ready), - - .length(montprod_length), - - .opa_addr(montprod_opa_addr), - .opa_data(montprod_opa_data), - - .opb_addr(montprod_opb_addr), - .opb_data(montprod_opb_data), - - .opm_addr(montprod_opm_addr), - .opm_data(montprod_opm_data), - - .result_addr(montprod_result_addr), - .result_data(montprod_result_data), - .result_we(montprod_result_we) - ); - - - residue residue_inst( - .clk(clk), - .reset_n(reset_n), - .calculate(residue_calculate), - .ready(residue_ready), - .nn(residue_nn), - .length(residue_length), - .opa_rd_addr(residue_opa_rd_addr), - .opa_rd_data(residue_opa_rd_data), - .opa_wr_addr(residue_opa_wr_addr), - .opa_wr_data(residue_opa_wr_data), - .opa_wr_we(residue_opa_wr_we), - .opm_addr(residue_opm_addr), - .opm_data(residue_opm_data) - ); - - blockmem2r1w residue_mem( - .clk(clk), - .read_addr0(residue_opa_rd_addr), - .read_data0(residue_opa_rd_data), - .read_addr1(residue_mem_montprod_read_addr), - .read_data1(residue_mem_montprod_read_data), - .wr(residue_opa_wr_we), - .write_addr(residue_opa_wr_addr), - .write_data(residue_opa_wr_data) - ); - - - blockmem2r1w p_mem( - .clk(clk), - .read_addr0(p_mem_rd0_addr), - .read_data0(p_mem_rd0_data), - .read_addr1(p_mem_rd1_addr), - .read_data1(p_mem_rd1_data), - .wr(p_mem_we), - .write_addr(p_mem_wr_addr), - .write_data(p_mem_wr_data) - ); - - - blockmem2r1wptr exponent_mem( - .clk(clk), - .reset_n(reset_n), - .read_addr0(exponent_mem_int_rd_addr), - .read_data0(exponent_mem_int_rd_data), - .read_data1(exponent_mem_api_read_data), - .rst(exponent_mem_api_rst), - .cs(exponent_mem_api_cs), - .wr(exponent_mem_api_wr), - .write_data(exponent_mem_api_write_data) - ); - - - blockmem2r1wptr modulus_mem( - .clk(clk), - .reset_n(reset_n), - .read_addr0(modulus_mem_int_rd_addr), - .read_data0(modulus_mem_int_rd_data), - .read_data1(modulus_mem_api_read_data), - .rst(modulus_mem_api_rst), - .cs(modulus_mem_api_cs), - .wr(modulus_mem_api_wr), - .write_data(modulus_mem_api_write_data) - ); - - - blockmem2r1wptr message_mem( - .clk(clk), - .reset_n(reset_n), - .read_addr0(message_mem_int_rd_addr), - .read_data0(message_mem_int_rd_data), - .read_data1(message_mem_api_read_data), - .rst(message_mem_api_rst), - .cs(message_mem_api_cs), - .wr(message_mem_api_wr), - .write_data(message_mem_api_write_data) - ); - - - blockmem2rptr1w result_mem( - .clk(clk), - .reset_n(reset_n), - .read_addr0(result_mem_int_rd_addr[7 : 0]), - .read_data0(result_mem_int_rd_data), - .read_data1(result_mem_api_read_data), - .rst(result_mem_api_rst), - .cs(result_mem_api_cs), - .wr(result_mem_int_we), - .write_addr(result_mem_int_wr_addr), - .write_data(result_mem_int_wr_data) - ); + montprod #(.OPW(OPW), .ADW(ADW)) + montprod_inst( + .clk(clk), + .reset_n(reset_n), + + .calculate(montprod_calc), + .ready(montprod_ready), + + .length(montprod_length), + + .opa_addr(montprod_opa_addr), + .opa_data(montprod_opa_data), + + .opb_addr(montprod_opb_addr), + .opb_data(montprod_opb_data), + + .opm_addr(montprod_opm_addr), + .opm_data(montprod_opm_data), + + .result_addr(montprod_result_addr), + .result_data(montprod_result_data), + .result_we(montprod_result_we) + ); + + + residue #(.OPW(OPW), .ADW(ADW)) + residue_inst( + .clk(clk), + .reset_n(reset_n), + .calculate(residue_calculate), + .ready(residue_ready), + .nn(residue_nn), + .length(residue_length), + .opa_rd_addr(residue_opa_rd_addr), + .opa_rd_data(residue_opa_rd_data), + .opa_wr_addr(residue_opa_wr_addr), + .opa_wr_data(residue_opa_wr_data), + .opa_wr_we(residue_opa_wr_we), + .opm_addr(residue_opm_addr), + .opm_data(residue_opm_data) + ); + + + blockmem2r1w #(.OPW(OPW), .ADW(ADW)) + residue_mem( + .clk(clk), + .read_addr0(residue_opa_rd_addr), + .read_data0(residue_opa_rd_data), + .read_addr1(residue_mem_montprod_read_addr), + .read_data1(residue_mem_montprod_read_data), + .wr(residue_opa_wr_we), + .write_addr(residue_opa_wr_addr), + .write_data(residue_opa_wr_data) + ); + + + blockmem2r1w #(.OPW(OPW), .ADW(ADW)) + p_mem( + .clk(clk), + .read_addr0(p_mem_rd0_addr), + .read_data0(p_mem_rd0_data), + .read_addr1(p_mem_rd1_addr), + .read_data1(p_mem_rd1_data), + .wr(p_mem_we), + .write_addr(p_mem_wr_addr), + .write_data(p_mem_wr_data) + ); + + + blockmem2r1wptr #(.OPW(OPW), .ADW(ADW)) + exponent_mem( + .clk(clk), + .reset_n(reset_n), + .read_addr0(exponent_mem_int_rd_addr), + .read_data0(exponent_mem_int_rd_data), + .read_data1(exponent_mem_api_read_data), + .rst(exponent_mem_api_rst), + .cs(exponent_mem_api_cs), + .wr(exponent_mem_api_wr), + .write_data(exponent_mem_api_write_data) + ); + + + blockmem2r1wptr #(.OPW(OPW), .ADW(ADW)) + modulus_mem( + .clk(clk), + .reset_n(reset_n), + .read_addr0(modulus_mem_int_rd_addr), + .read_data0(modulus_mem_int_rd_data), + .read_data1(modulus_mem_api_read_data), + .rst(modulus_mem_api_rst), + .cs(modulus_mem_api_cs), + .wr(modulus_mem_api_wr), + .write_data(modulus_mem_api_write_data) + ); + + + blockmem2r1wptr #(.OPW(OPW), .ADW(ADW)) + message_mem( + .clk(clk), + .reset_n(reset_n), + .read_addr0(message_mem_int_rd_addr), + .read_data0(message_mem_int_rd_data), + .read_data1(message_mem_api_read_data), + .rst(message_mem_api_rst), + .cs(message_mem_api_cs), + .wr(message_mem_api_wr), + .write_data(message_mem_api_write_data) + ); + + + blockmem2rptr1w #(.OPW(OPW), .ADW(ADW)) + result_mem( + .clk(clk), + .reset_n(reset_n), + .read_addr0(result_mem_int_rd_addr[7 : 0]), + .read_data0(result_mem_int_rd_data), + .read_data1(result_mem_api_read_data), + .rst(result_mem_api_rst), + .cs(result_mem_api_cs), + .wr(result_mem_int_we), + .write_addr(result_mem_int_wr_addr), + .write_data(result_mem_int_wr_data) + ); //---------------------------------------------------------------- @@ -485,10 +497,10 @@ module modexp_core( one_new = 32'h00000000; b_one_new = 32'h00000000; - if (montprod_opa_addr == length_m1) + if (montprod_opa_addr == modulus_length_m1) one_new = 32'h00000001; - if (montprod_opb_addr == length_m1) + if (montprod_opb_addr == modulus_length_m1) b_one_new = 32'h00000001; end @@ -634,7 +646,7 @@ module modexp_core( loop_counter_new = 13'b0; loop_counter_we = 1'b0; - if (loop_counter_reg == {length_m1, 5'b11111}) + if (loop_counter_reg == {exponent_length_m1, 5'b11111}) last_iteration = 1'b1; else last_iteration = 1'b0; @@ -668,7 +680,7 @@ module modexp_core( begin : exponent_process // Accessing new instead of reg - pick up update at // CTRL_ITERATE_NEW to remove a pipeline stall. - E_word_index = length_m1 - loop_counter_new[ 12 : 5 ]; + E_word_index = exponent_length_m1 - loop_counter_new[ 12 : 5 ]; E_bit_index = loop_counter_reg[ 04 : 0 ]; diff --git a/src/rtl/montprod.v b/src/rtl/montprod.v index 6b525f7..3a310d7 100644 --- a/src/rtl/montprod.v +++ b/src/rtl/montprod.v @@ -4,6 +4,9 @@ // --------- // Montgomery product calculator for the modular exponentiantion core. // +// parameter OPW is operand word width in bits. +// parameter ADW is address width in bits. +// // // Author: Peter Magnusson, Joachim Strombergson // Copyright (c) 2015, NORDUnet A/S All rights reserved. @@ -36,135 +39,158 @@ // //====================================================================== -module montprod( - input wire clk, - input wire reset_n, +module montprod #(parameter OPW = 32, parameter ADW = 8) + ( + input wire clk, + input wire reset_n, - input wire calculate, - output wire ready, + input wire calculate, + output wire ready, - input [7 : 0] length, + input wire [(ADW - 1) : 0] length, - output wire [7 : 0] opa_addr, - input wire [31 : 0] opa_data, + output wire [(ADW - 1) : 0] opa_addr, + input wire [(OPW - 1) : 0] opa_data, - output wire [7 : 0] opb_addr, - input wire [31 : 0] opb_data, + output wire [(ADW - 1) : 0] opb_addr, + input wire [(OPW - 1) : 0] opb_data, - output wire [7 : 0] opm_addr, - input wire [31 : 0] opm_data, + output wire [(ADW - 1) : 0] opm_addr, + input wire [(OPW - 1) : 0] opm_data, - output wire [7 : 0] result_addr, - output wire [31 : 0] result_data, - output wire result_we + output wire [(ADW - 1) : 0] result_addr, + output wire [(OPW - 1) : 0] result_data, + output wire result_we ); //---------------------------------------------------------------- // Internal constant and parameter definitions. //---------------------------------------------------------------- - localparam DEBUG = 0; - - localparam CTRL_IDLE = 4'h0; - localparam CTRL_INIT_S = 4'h1; - localparam CTRL_LOOP_INIT = 4'h2; - localparam CTRL_LOOP_ITER = 4'h3; - localparam CTRL_LOOP_BQ = 4'h4; - localparam CTRL_L_CALC_SM = 4'h5; - localparam CTRL_L_STALLPIPE_SM = 4'h6; - localparam CTRL_L_CALC_SA = 4'h7; - localparam CTRL_L_STALLPIPE_SA = 4'h8; - localparam CTRL_L_CALC_SDIV2 = 4'h9; - localparam CTRL_L_STALLPIPE_D2 = 4'hA; - localparam CTRL_L_STALLPIPE_ES = 4'hB; - localparam CTRL_EMIT_S = 4'hC; - localparam CTRL_DONE = 4'hD; - - localparam SMUX_0 = 2'h0; - localparam SMUX_ADD_SM = 2'h1; - localparam SMUX_ADD_SA = 2'h2; - localparam SMUX_SHR = 2'h3; + localparam CTRL_IDLE = 4'h0; + localparam CTRL_LOOP_ITER = 4'h1; + localparam CTRL_LOOP_BQ = 4'h2; + localparam CTRL_CALC_ADD = 4'h3; + localparam CTRL_STALLPIPE_ADD = 4'h4; + localparam CTRL_CALC_SDIV2 = 4'h5; + localparam CTRL_STALLPIPE_SDIV2 = 4'h6; + localparam CTRL_L_STALLPIPE_ES = 4'h7; + localparam CTRL_EMIT_S = 4'h8; + + localparam SMUX_ZERO = 2'h0; + localparam SMUX_ADD = 2'h1; + localparam SMUX_SHR = 2'h2; + //---------------------------------------------------------------- // Registers including update variables and write enable. //---------------------------------------------------------------- + reg ready_reg; + reg ready_new; + reg ready_we; - reg [07 : 0] opa_addr_reg; - reg [07 : 0] opb_addr_reg; - reg [07 : 0] opm_addr_reg; + reg [3 : 0] montprod_ctrl_reg; + reg [3 : 0] montprod_ctrl_new; + reg montprod_ctrl_we; - reg [07 : 0] result_addr_reg; - reg [31 : 0] result_data_reg; + reg [1 : 0] s_mux_new; + reg [1 : 0] s_mux_reg; - reg ready_reg; - reg ready_new; - reg ready_we; + reg s_mem_we_reg; + reg s_mem_we_new; - reg [3 : 0] montprod_ctrl_reg; - reg [3 : 0] montprod_ctrl_new; - reg montprod_ctrl_we; + reg [(ADW - 1) : 0] s_mem_read_addr_reg; - reg [1 : 0] s_mux_new; - reg [1 : 0] s_mux_reg; + reg q_new; + reg q_reg; + reg b_new; + reg b_reg; + reg bq_we; - reg [31 : 0] s_mem_new; - reg s_mem_we; - reg s_mem_we_new; - reg [07 : 0] s_mem_addr; - reg [07 : 0] s_mem_wr_addr; - wire [31 : 0] s_mem_read_data; + reg [12 : 0] loop_ctr_reg; + reg [12 : 0] loop_ctr_new; + reg loop_ctr_we; + reg loop_ctr_set; + reg loop_ctr_dec; - reg q; //q = (s - b * A) & 1 - reg q_reg; - reg b; //b: bit of B - reg b_reg; + reg [(13 - ADW - 1) : 0] b_bit_index_reg; + reg [(13 - ADW - 1) : 0] b_bit_index_new; + reg b_bit_index_we; - reg [12 : 0] loop_counter; - reg [12 : 0] loop_counter_new; - reg [12 : 0] loop_counter_dec; - reg [07 : 0] B_word_index; //loop counter as a word index - reg [04 : 0] B_bit_index; //loop counter as a bit index - reg [04 : 0] B_bit_index_reg; //loop counter as a bit index + reg [(ADW - 1) : 0] word_index_reg; + reg [(ADW - 1) : 0] word_index_new; + reg word_index_we; + reg [(ADW - 1) : 0] word_index_prev_reg; + reg reset_word_index_lsw; + reg reset_word_index_msw; + reg inc_word_index; + reg dec_word_index; - reg [07 : 0] word_index; //register of what word is being read - reg [07 : 0] word_index_new; //calculation of what word to be read - reg [07 : 0] word_index_prev; //register of what word was read previously (result address to emit) - reg [07 : 0] length_m1; + reg add_carry_in_sa_reg; + reg add_carry_in_sa_new; + reg add_carry_in_sm_reg; + reg add_carry_in_sm_new; - reg add_carry_in_sa; - reg add_carry_new_sa; - reg add_carry_in_sm; - reg add_carry_new_sm; + reg shr_carry_in_reg; + reg shr_carry_in_new; - reg shr_carry_in; - reg shr_carry_new; + reg first_iteration_reg; + reg first_iteration_new; + reg first_iteration_we; - reg reset_word_index_LSW; - reg reset_word_index_MSW; + reg test_reg; + reg test_new; + + reg [(OPW - 2) : 0] shr_data_out_reg; + reg shr_carry_out_reg; + reg shr_carry_out_new; //---------------------------------------------------------------- // Wires. //---------------------------------------------------------------- - reg tmp_result_we; - wire [31 : 0] add_result_sa; - wire add_carry_out_sa; - wire [31 : 0] add_result_sm; - wire add_carry_out_sm; + wire [(OPW - 1) : 0] add_result_sa; + wire add_carry_out_sa; + wire [(OPW - 1) : 0] add_result_sm; + wire add_carry_out_sm; + + reg [(ADW - 1) : 0] b_word_index; //loop counter as a word index + + reg [(OPW - 1) : 0] shr_data_in; + wire shr_carry_out; + wire [(OPW - 1) : 0] shr_data_out; + + reg [(ADW - 1) : 0] tmp_opa_addr; + reg tmp_result_we; + + reg [(ADW - 1) : 0] s_mem_read_addr; + wire [(OPW - 1) : 0] s_mem_read_data; + reg [(ADW - 1) : 0] s_mem_write_addr; + reg [(OPW - 1) : 0] s_mem_write_data; + reg [(OPW - 1) : 0] tmp_s_mem_write_data; + + reg [(OPW - 1) : 0] sa_adder_data_in; + reg [(OPW - 1) : 0] muxed_s_mem_read_data; + reg [(OPW - 1) : 0] shifted_s_mem_write_data; + + wire [(ADW - 1) : 0] length_m1; - wire shr_carry_out; - wire [31 : 0] shr_adiv2; + // Temporary debug wires. + reg [1 : 0] state_trace; + reg [1 : 0] mux_trace; //---------------------------------------------------------------- // Concurrent connectivity for ports etc. //---------------------------------------------------------------- - assign opa_addr = opa_addr_reg; - assign opb_addr = opb_addr_reg; - assign opm_addr = opm_addr_reg; + assign length_m1 = length - 1'b1; - assign result_addr = result_addr_reg; - assign result_data = result_data_reg; + assign opa_addr = tmp_opa_addr; + assign opb_addr = b_word_index; + assign opm_addr = word_index_reg; + + assign result_addr = word_index_prev_reg; + assign result_data = s_mem_read_data; assign result_we = tmp_result_we; assign ready = ready_reg; @@ -173,55 +199,38 @@ module montprod( //---------------------------------------------------------------- // Instantions //---------------------------------------------------------------- - - blockmem1r1w s_mem( - .clk(clk), - .read_addr(s_mem_addr), - .read_data(s_mem_read_data), - .wr(s_mem_we), - .write_addr(s_mem_wr_addr), - .write_data(s_mem_new) - ); - - - adder32 s_adder_sa( - .a(s_mem_read_data), - .b(opa_data), - .carry_in(add_carry_in_sa), - .sum(add_result_sa), - .carry_out(add_carry_out_sa) - ); - - adder32 s_adder_sm( - .a(s_mem_read_data), - .b(opm_data), - .carry_in(add_carry_in_sm), - .sum(add_result_sm), - .carry_out(add_carry_out_sm) - ); - - shr32 shifter( - .a( s_mem_read_data ), - .carry_in( shr_carry_in ), - .adiv2( shr_adiv2 ), - .carry_out( shr_carry_out ) - ); - - always @* - begin : s_mux - case (s_mux_reg) - SMUX_0: - s_mem_new = 32'b0; - SMUX_ADD_SA: - s_mem_new = add_result_sa; - SMUX_ADD_SM: - s_mem_new = add_result_sm; - SMUX_SHR: - s_mem_new = shr_adiv2; - endcase - if (DEBUG) - $display("SMUX%x: %x", s_mux_reg, s_mem_new); - end + blockmem1r1w #(.OPW(OPW), .ADW(ADW)) s_mem( + .clk(clk), + .read_addr(s_mem_read_addr), + .read_data(s_mem_read_data), + .wr(s_mem_we_reg), + .write_addr(s_mem_write_addr), + .write_data(s_mem_write_data) + ); + + adder #(.OPW(OPW)) s_adder_sm( + .a(muxed_s_mem_read_data), + .b(opm_data), + .carry_in(add_carry_in_sm_reg), + .sum(add_result_sm), + .carry_out(add_carry_out_sm) + ); + + + adder #(.OPW(OPW)) s_adder_sa( + .a(sa_adder_data_in), + .b(opa_data), + .carry_in(add_carry_in_sa_reg), + .sum(add_result_sa), + .carry_out(add_carry_out_sa) + ); + + shr #(.OPW(OPW)) shifter( + .a(shr_data_in), + .carry_in(shr_carry_in_reg), + .adiv2(shr_data_out), + .carry_out(shr_carry_out) + ); //---------------------------------------------------------------- @@ -235,205 +244,246 @@ module montprod( begin : reg_update if (!reset_n) begin - ready_reg <= 1'b0; - loop_counter <= 13'h0; - word_index <= 8'h0; - word_index_prev <= 8'h0; - add_carry_in_sa <= 1'b0; - add_carry_in_sm <= 1'b0; - shr_carry_in <= 1'b0; - montprod_ctrl_reg <= CTRL_IDLE; - b_reg <= 1'b0; - q_reg <= 1'b0; - s_mux_reg <= SMUX_0; - s_mem_we <= 1'b0; - s_mem_wr_addr <= 8'h0; - B_bit_index_reg <= 5'h0; + test_reg <= 1'b1; + ready_reg <= 1'b1; + loop_ctr_reg <= 13'h0; + word_index_reg <= {ADW{1'b0}}; + word_index_prev_reg <= {ADW{1'b0}}; + add_carry_in_sa_reg <= 1'b0; + add_carry_in_sm_reg <= 1'b0; + shr_data_out_reg <= {(OPW - 1){1'b0}}; + shr_carry_in_reg <= 1'b0; + b_reg <= 1'b0; + q_reg <= 1'b0; + s_mux_reg <= SMUX_ZERO; + s_mem_we_reg <= 1'b0; + s_mem_read_addr_reg <= {ADW{1'b0}}; + b_bit_index_reg <= {(13 - ADW){1'b0}}; + first_iteration_reg <= 1'b0; + montprod_ctrl_reg <= CTRL_IDLE; end else begin - if (ready_we) - ready_reg <= ready_new; + test_reg <= test_new; - if (montprod_ctrl_we) - begin - montprod_ctrl_reg <= montprod_ctrl_new; - end + s_mem_read_addr_reg <= s_mem_read_addr; + s_mem_we_reg <= s_mem_we_new; + s_mux_reg <= s_mux_new; - s_mem_wr_addr <= s_mem_addr; + word_index_prev_reg <= word_index_reg; - s_mem_we <= s_mem_we_new; + shr_carry_in_reg <= shr_carry_in_new; + add_carry_in_sa_reg <= add_carry_in_sa_new; + add_carry_in_sm_reg <= add_carry_in_sm_new; + shr_data_out_reg <= shr_data_out[(OPW - 2) : 0]; - word_index <= word_index_new; - word_index_prev <= word_index; + if (word_index_we) + word_index_reg <= word_index_new; - loop_counter <= loop_counter_new; - shr_carry_in <= shr_carry_new; - add_carry_in_sa <= add_carry_new_sa; - add_carry_in_sm <= add_carry_new_sm; + if (first_iteration_we) + first_iteration_reg <= first_iteration_new; - B_bit_index_reg <= B_bit_index; - q_reg <= q; - b_reg <= b; + if (b_bit_index_we) + b_bit_index_reg <= b_bit_index_new; - s_mux_reg <= s_mux_new; - end - end // reg_update + if (bq_we) + begin + b_reg <= b_new; + q_reg <= q_new; + end - always @* - begin : bq_process - b = b_reg; - q = q_reg; - if (montprod_ctrl_reg == CTRL_LOOP_BQ) - begin - b = opb_data[ B_bit_index_reg ]; - //opa_addr will point to length-1 to get A LSB. - //s_read_addr will point to length-1 - q = s_mem_read_data[0] ^ (opa_data[0] & b); - if (DEBUG) - $display("s_mem_read_data: %x opa_data %x b %x q %x B_bit_index_reg %x", s_mem_read_data, opa_data, b, q, B_bit_index_reg); + if (ready_we) + ready_reg <= ready_new; + + if (loop_ctr_we) + loop_ctr_reg <= loop_ctr_new; + + if (montprod_ctrl_we) + begin + montprod_ctrl_reg <= montprod_ctrl_new; + end end - end + end // reg_update //---------------------------------------------------------------- - // Process for iterating the loop counter and setting related B indexes + // s_logic + // + // Logic to calculate S memory updates including address + // and write enable. This is the main montprod datapath. //---------------------------------------------------------------- always @* - begin : loop_counter_process - loop_counter_new = loop_counter; - length_m1 = length - 1'b1; - loop_counter_dec = loop_counter - 1'b1; - B_word_index = loop_counter[12:5]; - B_bit_index = B_bit_index_reg; + begin : s_logic + shr_carry_in_new = 1'b0; + muxed_s_mem_read_data = {OPW{1'b0}}; + sa_adder_data_in = {OPW{1'b0}}; + add_carry_in_sa_new = 1'b0; + add_carry_in_sm_new = 1'b0; + s_mem_read_addr = word_index_reg; + s_mem_write_addr = s_mem_read_addr_reg; + s_mem_write_data = {OPW{1'b0}}; + s_mem_we_new = 1'b0; + state_trace = 0; + mux_trace = 0; + tmp_s_mem_write_data = {OPW{1'b0}}; + test_new = 1'b0; case (montprod_ctrl_reg) - CTRL_LOOP_INIT: - loop_counter_new = {length, 5'b00000} - 1'b1; - CTRL_LOOP_ITER: begin - B_word_index = loop_counter[12:5]; - B_bit_index = 5'h1f - loop_counter[4:0]; + s_mem_read_addr = length_m1; end - CTRL_L_STALLPIPE_D2: - loop_counter_new = loop_counter_dec; - - default: - loop_counter_new = loop_counter; - endcase - end - - - //---------------------------------------------------------------- - // prodcalc - //---------------------------------------------------------------- - always @* - begin : prodcalc - - case (montprod_ctrl_reg) - CTRL_LOOP_ITER: - //q = (s[length-1] ^ A[length-1]) & 1; - opa_addr_reg = length_m1; - - default: - opa_addr_reg = word_index; - endcase + CTRL_CALC_ADD: + begin + //s = (s + q*M + b*A) >>> 1;, if(b==1) S+= A. Takes (1..length) cycles. + s_mem_we_new = b_reg | q_reg | first_iteration_reg; + state_trace = 1; + test_new = 1'b1; + end - opb_addr_reg = B_word_index; - opm_addr_reg = word_index; + CTRL_CALC_SDIV2: + begin + //s = (s + q*M + b*A) >>> 1; s>>=1. Takes (1..length) cycles. + s_mem_we_new = 1'b1; + end - case (montprod_ctrl_reg) - CTRL_LOOP_ITER: - s_mem_addr = length_m1; default: - s_mem_addr = word_index; + begin + end endcase + case (s_mux_reg) + SMUX_ADD: + begin + mux_trace = 1; + if (first_iteration_reg) + muxed_s_mem_read_data = {OPW{1'b0}}; + else + muxed_s_mem_read_data = s_mem_read_data; - result_addr_reg = word_index_prev; - result_data_reg = s_mem_read_data; - case (montprod_ctrl_reg) - CTRL_EMIT_S: - tmp_result_we = 1'b1; - default: - tmp_result_we = 1'b0; - endcase + if (q_reg) + sa_adder_data_in = add_result_sm; + else + sa_adder_data_in = muxed_s_mem_read_data; - if (reset_word_index_LSW == 1'b1) - word_index_new = length_m1; - else if (reset_word_index_MSW == 1'b1) - word_index_new = 8'h0; - else if (montprod_ctrl_reg == CTRL_L_CALC_SDIV2) - word_index_new = word_index + 1'b1; - else - word_index_new = word_index - 1'b1; - end // prodcalc + if (b_reg) + tmp_s_mem_write_data = add_result_sa; + else if (q_reg) + tmp_s_mem_write_data = add_result_sm; + else if (first_iteration_reg) + tmp_s_mem_write_data = {OPW{1'b0}}; + s_mem_write_data = tmp_s_mem_write_data; + add_carry_in_sa_new = add_carry_out_sa; + add_carry_in_sm_new = add_carry_out_sm; - always @* - begin : s_writer_process - shr_carry_new = 1'b0; - s_mux_new = SMUX_0; - s_mem_we_new = 1'b0; - case (montprod_ctrl_reg) - CTRL_INIT_S: - begin - s_mem_we_new = 1'b1; - s_mux_new = SMUX_0; // write 0 + // Experimental integration of shift in add. + shr_data_in = tmp_s_mem_write_data; + shifted_s_mem_write_data = {shr_carry_out, shr_data_out_reg}; end - CTRL_L_CALC_SM: - begin - //s = (s + q*M + b*A) >>> 1;, if(q==1) S+= M. Takes (1..length) cycles. - s_mem_we_new = q_reg; - s_mux_new = SMUX_ADD_SM; - end - - CTRL_L_CALC_SA: - begin - //s = (s + q*M + b*A) >>> 1;, if(b==1) S+= A. Takes (1..length) cycles. - s_mem_we_new = b_reg; - s_mux_new = SMUX_ADD_SA; - end - CTRL_L_CALC_SDIV2: + SMUX_SHR: begin - //s = (s + q*M + b*A) >>> 1; s>>=1. Takes (1..length) cycles. - s_mux_new = SMUX_SHR; - s_mem_we_new = 1'b1; + shr_data_in = s_mem_read_data; + s_mem_write_data = shr_data_out; + shr_carry_in_new = shr_carry_out; end default: begin end endcase + end // s_logic - add_carry_new_sa = 1'b0; - add_carry_new_sm = 1'b0; - case (s_mux_reg) - SMUX_ADD_SM: - add_carry_new_sm = add_carry_out_sm; + //---------------------------------------------------------------- + // bq + // + // Extract b and q bits. + // b: current bit of B used. + // q = (s - b * A) & 1 + // update the b bit and word indices based on loop counter. + //---------------------------------------------------------------- + always @* + begin : bq + b_new = opb_data[b_bit_index_reg]; - SMUX_ADD_SA: - add_carry_new_sa = add_carry_out_sa; + if (first_iteration_reg) + q_new = 1'b0 ^ (opa_data[0] & b_new); + else + q_new = s_mem_read_data[0] ^ (opa_data[0] & b_new); - SMUX_SHR: - shr_carry_new = shr_carry_out; + // B_bit_index = 5'h1f - loop_counter[4:0]; + b_bit_index_new = ((2**(13 - ADW)) - 1'b1) - loop_ctr_reg[(13 - ADW - 1) : 0]; + b_word_index = loop_ctr_reg[12 : (13 - ADW)]; + end // bq - default: - begin - end - endcase - end // prodcalc + //---------------------------------------------------------------- + // word_index + // + // Logic that implements the word index used to drive + // addresses for operands. + //---------------------------------------------------------------- + always @* + begin : word_index + word_index_new = {ADW{1'b0}}; + word_index_we = 1'b0; + + if (reset_word_index_lsw) + begin + word_index_new = length_m1; + word_index_we = 1'b1; + end + + if (reset_word_index_msw) + begin + word_index_new = {ADW{1'b0}}; + word_index_we = 1'b1; + end + + if (inc_word_index) + begin + word_index_new = word_index_reg + 1'b1; + word_index_we = 1'b1; + end + + if (dec_word_index) + begin + word_index_new = word_index_reg - 1'b1; + word_index_we = 1'b1; + end + end // word_index + + + //---------------------------------------------------------------- + // loop_ctr + // Logic for updating the loop counter. + //---------------------------------------------------------------- + always @* + begin : loop_ctr + loop_ctr_new = 13'h0; + loop_ctr_we = 1'b0; + + if (loop_ctr_set) + begin + loop_ctr_new = {length, {(13 - ADW){1'b0}}} - 1'b1; + loop_ctr_we = 1'b1; + end + + if (loop_ctr_dec) + begin + loop_ctr_new = loop_ctr_reg - 1'b1; + loop_ctr_we = 1'b1; + end + end //---------------------------------------------------------------- @@ -443,146 +493,131 @@ module montprod( //---------------------------------------------------------------- always @* begin : montprod_ctrl - ready_new = 1'b0; - ready_we = 1'b0; - montprod_ctrl_new = CTRL_IDLE; - montprod_ctrl_we = 1'b0; + ready_new = 1'b0; + ready_we = 1'b0; + loop_ctr_set = 1'b0; + loop_ctr_dec = 1'b0; + b_bit_index_we = 1'b0; + bq_we = 1'b0; + s_mux_new = SMUX_ZERO; + dec_word_index = 1'b0; + inc_word_index = 1'b0; + reset_word_index_lsw = 1'b0; + reset_word_index_msw = 1'b0; + first_iteration_new = 1'b0; + first_iteration_we = 1'b0; + tmp_opa_addr = word_index_reg; + tmp_result_we = 1'b0; + montprod_ctrl_new = CTRL_IDLE; + montprod_ctrl_we = 1'b0; - reset_word_index_LSW = 1'b0; - reset_word_index_MSW = 1'b0; case (montprod_ctrl_reg) CTRL_IDLE: begin if (calculate) begin - ready_new = 1'b0; - ready_we = 1'b1; - montprod_ctrl_new = CTRL_INIT_S; - montprod_ctrl_we = 1'b1; - reset_word_index_LSW = 1'b1; - end - else - begin - ready_new = 1'b1; - ready_we = 1'b1; + first_iteration_new = 1'b1; + first_iteration_we = 1'b1; + ready_new = 1'b0; + ready_we = 1'b1; + reset_word_index_lsw = 1'b1; + loop_ctr_set = 1'b1; + montprod_ctrl_new = CTRL_LOOP_ITER; + montprod_ctrl_we = 1'b1; end end - CTRL_INIT_S: - begin - if (word_index == 8'h0) - begin - montprod_ctrl_new = CTRL_LOOP_INIT; - montprod_ctrl_we = 1'b1; - end - end - - - CTRL_LOOP_INIT: - begin - montprod_ctrl_new = CTRL_LOOP_ITER; - montprod_ctrl_we = 1'b1; - end - //calculate q = (s - b * A) & 1;. // Also abort loop if done. CTRL_LOOP_ITER: begin - reset_word_index_LSW = 1'b1; - montprod_ctrl_new = CTRL_LOOP_BQ; - montprod_ctrl_we = 1'b1; + tmp_opa_addr = length_m1; + b_bit_index_we = 1'b1; + montprod_ctrl_new = CTRL_LOOP_BQ; + montprod_ctrl_we = 1'b1; end CTRL_LOOP_BQ: begin - reset_word_index_LSW = 1'b1; - montprod_ctrl_new = CTRL_L_CALC_SM; - montprod_ctrl_we = 1'b1; + reset_word_index_lsw = 1'b1; + bq_we = 1'b1; + montprod_ctrl_new = CTRL_CALC_ADD; + montprod_ctrl_we = 1'b1; end - CTRL_L_CALC_SM: + CTRL_CALC_ADD: begin - if (word_index == 8'h0) + s_mux_new = SMUX_ADD; + + if (word_index_reg == 0) begin - reset_word_index_LSW = 1'b1; - montprod_ctrl_we = 1'b1; - montprod_ctrl_new = CTRL_L_STALLPIPE_SM; + reset_word_index_lsw = 1'b1; + montprod_ctrl_new = CTRL_STALLPIPE_ADD; + montprod_ctrl_we = 1'b1; end - end - - CTRL_L_STALLPIPE_SM: - begin - montprod_ctrl_new = CTRL_L_CALC_SA; - montprod_ctrl_we = 1'b1; - reset_word_index_LSW = 1'b1; - end - - CTRL_L_CALC_SA: - begin - if (word_index == 8'h0) + else begin - reset_word_index_LSW = 1'b1; - montprod_ctrl_new = CTRL_L_STALLPIPE_SA; - montprod_ctrl_we = 1'b1; + dec_word_index = 1'b1; end end - CTRL_L_STALLPIPE_SA: + CTRL_STALLPIPE_ADD: begin - montprod_ctrl_new = CTRL_L_CALC_SDIV2; - montprod_ctrl_we = 1'b1; - reset_word_index_MSW = 1'b1; + first_iteration_new = 1'b0; + first_iteration_we = 1'b1; + reset_word_index_msw = 1'b1; + montprod_ctrl_new = CTRL_CALC_SDIV2; + montprod_ctrl_we = 1'b1; end - CTRL_L_CALC_SDIV2: + CTRL_CALC_SDIV2: begin - if (word_index == length_m1) + s_mux_new = SMUX_SHR; + + if (word_index_reg == length_m1) begin - montprod_ctrl_new = CTRL_L_STALLPIPE_D2; - montprod_ctrl_we = 1'b1; - //reset_word_index = 1'b1; + montprod_ctrl_new = CTRL_STALLPIPE_SDIV2; + montprod_ctrl_we = 1'b1; end + else + inc_word_index = 1'b1; end - CTRL_L_STALLPIPE_D2: + CTRL_STALLPIPE_SDIV2: begin - montprod_ctrl_new = CTRL_LOOP_ITER; //loop - montprod_ctrl_we = 1'b1; - reset_word_index_LSW = 1'b1; - if (loop_counter == 0) + loop_ctr_dec = 1'b1; + montprod_ctrl_new = CTRL_LOOP_ITER; + montprod_ctrl_we = 1'b1; + reset_word_index_lsw = 1'b1; + + if (loop_ctr_reg == 0) begin montprod_ctrl_new = CTRL_L_STALLPIPE_ES; - montprod_ctrl_we = 1'b1; + montprod_ctrl_we = 1'b1; end end CTRL_L_STALLPIPE_ES: begin montprod_ctrl_new = CTRL_EMIT_S; - montprod_ctrl_we = 1'b1; - //reset_word_index_LSW = 1'b1; + montprod_ctrl_we = 1'b1; end CTRL_EMIT_S: begin - if (DEBUG) - $display("EMIT_S word_index: %d", word_index); - if (word_index_prev == 8'h0) + dec_word_index = 1'b1; + tmp_result_we = 1'b1; + + if (word_index_prev_reg == 0) begin - montprod_ctrl_new = CTRL_DONE; + ready_new = 1'b1; + ready_we = 1'b1; + montprod_ctrl_new = CTRL_IDLE; montprod_ctrl_we = 1'b1; end end - CTRL_DONE: - begin - ready_new = 1'b1; - ready_we = 1'b1; - montprod_ctrl_new = CTRL_IDLE; - montprod_ctrl_we = 1'b1; - end - default: begin end diff --git a/src/rtl/residue.v b/src/rtl/residue.v index 3fa1666..f3d114c 100644 --- a/src/rtl/residue.v +++ b/src/rtl/residue.v @@ -45,119 +45,109 @@ // //====================================================================== -module residue( - input wire clk, - input wire reset_n, +module residue #(parameter OPW = 32, parameter ADW = 8) + ( + input wire clk, + input wire reset_n, - input wire calculate, - output wire ready, + input wire calculate, + output wire ready, - input wire [14 : 0] nn, //MAX(2*N)=8192*2 (14 bit) - input wire [07 : 0] length, + input wire [14 : 0] nn, //MAX(2*N)=8192*2 (14 bit) + input wire [(ADW - 1) : 0] length, - output wire [07 : 0] opa_rd_addr, - input wire [31 : 0] opa_rd_data, - output wire [07 : 0] opa_wr_addr, - output wire [31 : 0] opa_wr_data, - output wire opa_wr_we, + output wire [(ADW - 1) : 0] opa_rd_addr, + input wire [(OPW - 1) : 0] opa_rd_data, + output wire [(ADW - 1) : 0] opa_wr_addr, + output wire [(OPW - 1) : 0] opa_wr_data, + output wire opa_wr_we, - output wire [07 : 0] opm_addr, - input wire [31 : 0] opm_data + output wire [(ADW - 1) : 0] opm_addr, + input wire [(OPW - 1) : 0] opm_data + ); -); - -//---------------------------------------------------------------- -// Internal constant and parameter definitions. -//---------------------------------------------------------------- + //---------------------------------------------------------------- + // Internal constant and parameter definitions. + //---------------------------------------------------------------- + localparam CTRL_IDLE = 4'h0; + localparam CTRL_INIT = 4'h1; + localparam CTRL_INIT_STALL = 4'h2; + localparam CTRL_SHL = 4'h3; + localparam CTRL_SHL_STALL = 4'h4; + localparam CTRL_COMPARE = 4'h5; + localparam CTRL_COMPARE_STALL = 4'h6; + localparam CTRL_SUB = 4'h7; + localparam CTRL_SUB_STALL = 4'h8; + localparam CTRL_LOOP = 4'h9; -// m_residue_2_2N_array( N, M, Nr) -// Nr = 00...01 ; Nr = 1 == 2**(2N-2N) -// for (int i = 0; i < 2 * N; i++) -// Nr = Nr shift left 1 -// if (Nr less than M) continue; -// Nr = Nr - M -// return Nr -// -localparam CTRL_IDLE = 4'h0; -localparam CTRL_INIT = 4'h1; // Nr = 00...01 ; Nr = 1 == 2**(2N-2N) -localparam CTRL_INIT_STALL = 4'h2; -localparam CTRL_SHL = 4'h3; // Nr = Nr shift left 1 -localparam CTRL_SHL_STALL = 4'h4; -localparam CTRL_COMPARE = 4'h5; //if (Nr less than M) continue; -localparam CTRL_COMPARE_STALL = 4'h6; -localparam CTRL_SUB = 4'h7; //Nr = Nr - M -localparam CTRL_SUB_STALL = 4'h8; -localparam CTRL_LOOP = 4'h9; //for (int i = 0; i < 2 * N; i++) - -//---------------------------------------------------------------- -// Registers including update variables and write enable. -//---------------------------------------------------------------- - -reg [07 : 0] opa_rd_addr_reg; -reg [07 : 0] opa_wr_addr_reg; -reg [31 : 0] opa_wr_data_reg; -reg opa_wr_we_reg; -reg [07 : 0] opm_addr_reg; -reg ready_reg; -reg ready_new; -reg ready_we; -reg [03 : 0] residue_ctrl_reg; -reg [03 : 0] residue_ctrl_new; -reg residue_ctrl_we; -reg reset_word_index; -reg reset_n_counter; -reg [14 : 0] loop_counter_1_to_nn_reg; //for i = 1 to nn (2*N) -reg [14 : 0] loop_counter_1_to_nn_new; -reg loop_counter_1_to_nn_we; -reg [14 : 0] nn_reg; -reg nn_we; -reg [07 : 0] length_m1_reg; -reg [07 : 0] length_m1_new; -reg length_m1_we; -reg [07 : 0] word_index_reg; -reg [07 : 0] word_index_new; -reg word_index_we; - -reg [31 : 0] one_data; -wire [31 : 0] sub_data; -wire [31 : 0] shl_data; -reg sub_carry_in_new; -reg sub_carry_in_reg; -wire sub_carry_out; -reg shl_carry_in_new; -reg shl_carry_in_reg; -wire shl_carry_out; - -//---------------------------------------------------------------- -// Concurrent connectivity for ports etc. -//---------------------------------------------------------------- -assign opa_rd_addr = opa_rd_addr_reg; -assign opa_wr_addr = opa_wr_addr_reg; -assign opa_wr_data = opa_wr_data_reg; -assign opa_wr_we = opa_wr_we_reg; -assign opm_addr = opm_addr_reg; -assign ready = ready_reg; + //---------------------------------------------------------------- + // Registers including update variables and write enable. + //---------------------------------------------------------------- + reg [(ADW - 1) : 0] opa_rd_addr_reg; + reg [(ADW - 1) : 0] opa_wr_addr_reg; + reg [(OPW - 1) : 0] opa_wr_data_reg; + reg opa_wr_we_reg; + reg [(ADW - 1) : 0] opm_addr_reg; + reg ready_reg; + reg ready_new; + reg ready_we; + reg [03 : 0] residue_ctrl_reg; + reg [03 : 0] residue_ctrl_new; + reg residue_ctrl_we; + reg reset_word_index; + reg reset_n_counter; + reg [14 : 0] loop_counter_1_to_nn_reg; //for i = 1 to nn (2*N) + reg [14 : 0] loop_counter_1_to_nn_new; + reg loop_counter_1_to_nn_we; + reg [14 : 0] nn_reg; + reg nn_we; + reg [(ADW - 1) : 0] length_m1_reg; + reg [(ADW - 1) : 0] length_m1_new; + reg length_m1_we; + reg [(ADW - 1) : 0] word_index_reg; + reg [(ADW - 1) : 0] word_index_new; + reg word_index_we; + + reg [(OPW - 1) : 0] one_data; + wire [(OPW - 1) : 0] sub_data; + wire [(OPW - 1) : 0] shl_data; + reg sub_carry_in_new; + reg sub_carry_in_reg; + wire sub_carry_out; + reg shl_carry_in_new; + reg shl_carry_in_reg; + wire shl_carry_out; //---------------------------------------------------------------- - // Instantions + // Concurrent connectivity for ports etc. //---------------------------------------------------------------- - adder32 subcmp( - .a(opa_rd_data), - .b( ~ opm_data), - .carry_in(sub_carry_in_reg), - .sum(sub_data), - .carry_out(sub_carry_out) - ); + assign opa_rd_addr = opa_rd_addr_reg; + assign opa_wr_addr = opa_wr_addr_reg; + assign opa_wr_data = opa_wr_data_reg; + assign opa_wr_we = opa_wr_we_reg; + assign opm_addr = opm_addr_reg; + assign ready = ready_reg; - shl32 shl( - .a(opa_rd_data), - .carry_in(shl_carry_in_reg), - .amul2(shl_data), - .carry_out(shl_carry_out) - ); + //---------------------------------------------------------------- + // Instantions + //---------------------------------------------------------------- + adder #(.OPW(OPW)) add_inst( + .a(opa_rd_data), + .b( ~ opm_data), + .carry_in(sub_carry_in_reg), + .sum(sub_data), + .carry_out(sub_carry_out) + ); + + shl #(.OPW(OPW)) shl_inst( + .a(opa_rd_data), + .carry_in(shl_carry_in_reg), + .amul2(shl_data), + .carry_out(shl_carry_out) + ); //---------------------------------------------------------------- @@ -167,14 +157,14 @@ assign ready = ready_reg; begin if (!reset_n) begin - residue_ctrl_reg <= CTRL_IDLE; - word_index_reg <= 8'h0; - length_m1_reg <= 8'h0; - nn_reg <= 15'h0; + residue_ctrl_reg <= CTRL_IDLE; + word_index_reg <= {ADW{1'b1}}; + length_m1_reg <= {ADW{1'b1}}; + nn_reg <= 15'h0; loop_counter_1_to_nn_reg <= 15'h0; - ready_reg <= 1'b1; - sub_carry_in_reg <= 1'b0; - shl_carry_in_reg <= 1'b0; + ready_reg <= 1'b1; + sub_carry_in_reg <= 1'b0; + shl_carry_in_reg <= 1'b0; end else begin @@ -229,22 +219,24 @@ assign ready = ready_reg; loop_counter_1_to_nn_we = 1'b1; end + //---------------------------------------------------------------- // implements looping over words in a multiword operation //---------------------------------------------------------------- always @* begin : word_index_process - word_index_new = word_index_reg - 8'h1; + word_index_new = word_index_reg - 1'b1; word_index_we = 1'b1; if (reset_word_index) word_index_new = length_m1_reg; if (residue_ctrl_reg == CTRL_IDLE) - word_index_new = length_m1_new; //reduce a pipeline stage with early read - + //reduce a pipeline stage with early read + word_index_new = length_m1_new; end + //---------------------------------------------------------------- // writer process. implements: // Nr = 00...01 ; Nr = 1 == 2**(2N-2N) @@ -299,6 +291,7 @@ assign ready = ready_reg; opm_addr_reg = word_index_new; end + //---------------------------------------------------------------- // carry process. "Ripple carry awesomeness!" //---------------------------------------------------------------- @@ -321,6 +314,7 @@ assign ready = ready_reg; endcase end + //---------------------------------------------------------------- // Nr = 00...01 ; Nr = 1 == 2**(2N-2N) //---------------------------------------------------------------- @@ -329,128 +323,130 @@ assign ready = ready_reg; one_data = 32'h0; if (residue_ctrl_reg == CTRL_INIT) if (word_index_reg == length_m1_reg) - one_data = 32'h1; + one_data = {{(OPW - 1){1'b0}}, 1'b1}; end -//---------------------------------------------------------------- -// residue_ctrl -// -// Control FSM for residue -//---------------------------------------------------------------- -always @* - begin : residue_ctrl - ready_new = 1'b0; - ready_we = 1'b0; - - residue_ctrl_new = CTRL_IDLE; - residue_ctrl_we = 1'b0; - reset_word_index = 1'b0; - reset_n_counter = 1'b0; + //---------------------------------------------------------------- + // residue_ctrl + // + // Control FSM for residue + //---------------------------------------------------------------- + always @* + begin : residue_ctrl + ready_new = 1'b0; + ready_we = 1'b0; + reset_word_index = 1'b0; + reset_n_counter = 1'b0; + length_m1_new = length - 1'b1; + length_m1_we = 1'b0; + nn_we = 1'b0; + residue_ctrl_new = CTRL_IDLE; + residue_ctrl_we = 1'b0; - length_m1_new = length - 8'h1; - length_m1_we = 1'b0; + case (residue_ctrl_reg) + CTRL_IDLE: + if (calculate) + begin + ready_new = 1'b0; + ready_we = 1'b1; + reset_word_index = 1'b1; + length_m1_we = 1'b1; + nn_we = 1'b1; + residue_ctrl_new = CTRL_INIT; + residue_ctrl_we = 1'b1; + end - nn_we = 1'b0; + // Nr = 00...01 ; Nr = 1 == 2**(2N-2N) + CTRL_INIT: + if (word_index_reg == 0) + begin + residue_ctrl_new = CTRL_INIT_STALL; + residue_ctrl_we = 1'b1; + end - case (residue_ctrl_reg) - CTRL_IDLE: - if (calculate) + CTRL_INIT_STALL: begin - ready_new = 1'b0; - ready_we = 1'b1; - residue_ctrl_new = CTRL_INIT; - residue_ctrl_we = 1'b1; reset_word_index = 1'b1; - length_m1_we = 1'b1; - nn_we = 1'b1; + reset_n_counter = 1'b1; + residue_ctrl_new = CTRL_SHL; + residue_ctrl_we = 1'b1; end - CTRL_INIT: - if (word_index_reg == 8'h0) + // Nr = Nr shift left 1 + CTRL_SHL: begin - residue_ctrl_new = CTRL_INIT_STALL; - residue_ctrl_we = 1'b1; + if (word_index_reg == 0) + begin + residue_ctrl_new = CTRL_SHL_STALL; + residue_ctrl_we = 1'b1; + end end - CTRL_INIT_STALL: - begin - reset_word_index = 1'b1; - reset_n_counter = 1'b1; - residue_ctrl_new = CTRL_SHL; - residue_ctrl_we = 1'b1; - end - - CTRL_SHL: - begin - if (word_index_reg == 8'h0) + CTRL_SHL_STALL: begin - residue_ctrl_new = CTRL_SHL_STALL; + reset_word_index = 1'b1; + residue_ctrl_new = CTRL_COMPARE; residue_ctrl_we = 1'b1; end - end - CTRL_SHL_STALL: - begin - reset_word_index = 1'b1; - residue_ctrl_new = CTRL_COMPARE; - residue_ctrl_we = 1'b1; - end + //if (Nr less than M) continue + CTRL_COMPARE: + if (word_index_reg == 0) + begin + residue_ctrl_new = CTRL_COMPARE_STALL; + residue_ctrl_we = 1'b1; + end - CTRL_COMPARE: - if (word_index_reg == 8'h0) + CTRL_COMPARE_STALL: begin - residue_ctrl_new = CTRL_COMPARE_STALL; + reset_word_index = 1'b1; residue_ctrl_we = 1'b1; + if (sub_carry_in_reg == 1'b1) + //TODO: Bug! detect CF to detect less than, but no detect ZF to detect equal to. + residue_ctrl_new = CTRL_SUB; + else + residue_ctrl_new = CTRL_LOOP; end - CTRL_COMPARE_STALL: - begin - reset_word_index = 1'b1; - residue_ctrl_we = 1'b1; - if (sub_carry_in_reg == 1'b1) - //TODO: Bug! detect CF to detect less than, but no detect ZF to detect equal to. - residue_ctrl_new = CTRL_SUB; - else - residue_ctrl_new = CTRL_LOOP; - end + //Nr = Nr - M + CTRL_SUB: + if (word_index_reg == 0) + begin + residue_ctrl_new = CTRL_SUB_STALL; + residue_ctrl_we = 1'b1; + end - CTRL_SUB: - if (word_index_reg == 8'h0) + CTRL_SUB_STALL: begin - residue_ctrl_new = CTRL_SUB_STALL; + residue_ctrl_new = CTRL_LOOP; residue_ctrl_we = 1'b1; end - CTRL_SUB_STALL: - begin - residue_ctrl_new = CTRL_LOOP; - residue_ctrl_we = 1'b1; - end - - CTRL_LOOP: - begin - if (loop_counter_1_to_nn_reg == nn_reg) - begin - ready_new = 1'b1; - ready_we = 1'b1; - residue_ctrl_new = CTRL_IDLE; - residue_ctrl_we = 1'b1; - end - else - begin - reset_word_index = 1'b1; - residue_ctrl_new = CTRL_SHL; - residue_ctrl_we = 1'b1; - end - end + //for (int i = 0; i < 2 * N; i++) + CTRL_LOOP: + begin + if (loop_counter_1_to_nn_reg == nn_reg) + begin + ready_new = 1'b1; + ready_we = 1'b1; + residue_ctrl_new = CTRL_IDLE; + residue_ctrl_we = 1'b1; + end + else + begin + reset_word_index = 1'b1; + residue_ctrl_new = CTRL_SHL; + residue_ctrl_we = 1'b1; + end + end - default: - begin - end + default: + begin + end - endcase - end + endcase + end endmodule // residue diff --git a/src/rtl/shl32.v b/src/rtl/shl.v index 42521fd..bed83e8 100644 --- a/src/rtl/shl32.v +++ b/src/rtl/shl.v @@ -1,11 +1,12 @@ //====================================================================== // -// shl32.v -// ------- -// 32bit left shift with carry in / carry out +// shl.v +// ----- +// One bit left shift of words with carry in and carry out. Used in +// the residue module of the modexp core. // // -// Author: Peter Magnusson +// Author: Peter Magnusson, Joachim Strömbergson // Copyright (c) 2015, NORDUnet A/S All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -36,18 +37,20 @@ // //====================================================================== -module shl32( - input wire [31 : 0] a, - input wire carry_in, - output wire [31 : 0] amul2, - output wire carry_out - ); +module shl #(parameter OPW = 32) + ( + input wire [(OPW - 1) : 0] a, + input wire carry_in, - assign amul2 = {a[30 : 0], carry_in}; - assign carry_out = a[31]; + output wire [(OPW - 1) : 0] amul2, + output wire carry_out + ); -endmodule // shl32 + assign amul2 = {a[(OPW - 2) : 0], carry_in}; + assign carry_out = a[(OPW - 1)]; + +endmodule // shl //====================================================================== -// EOF shl32.v +// EOF shl.v //====================================================================== diff --git a/src/rtl/shr32.v b/src/rtl/shr.v index 66b15c3..40ef111 100644 --- a/src/rtl/shr32.v +++ b/src/rtl/shr.v @@ -2,10 +2,11 @@ // // shr32.v // ------- -// 32bit right shift with carry in / carry out. +// One bit right shift with carry in and carry out. +// Used in the montprod module of the modexp core. // // -// Author: Peter Magnusson +// Author: Peter Magnusson, Joachim Strömbergson // Copyright (c) 2015, NORDUnet A/S All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -36,18 +37,20 @@ // //====================================================================== -module shr32( - input wire [31 : 0] a, - input wire carry_in, - output wire [31 : 0] adiv2, - output wire carry_out - ); +module shr #(parameter OPW = 32) + ( + input wire [(OPW - 1) : 0] a, + input wire carry_in, - assign adiv2 = {carry_in, a[31 : 1]}; + output wire [(OPW - 1) : 0] adiv2, + output wire carry_out + ); + + assign adiv2 = {carry_in, a[(OPW - 1) : 1]}; assign carry_out = a[0]; -endmodule // shr32 +endmodule // shr //====================================================================== -// EOF shr32.v +// EOF shr.v //====================================================================== |