From eb3702ee1f2be0316ebac1883b479cff699470e4 Mon Sep 17 00:00:00 2001 From: Paul Selkirk Date: Mon, 16 Nov 2015 15:33:59 -0500 Subject: reformat, add copyright --- src/rtl/modexps6_adder64_carry32.v | 166 ++-- src/rtl/modexps6_buffer_core.v | 420 ++++----- src/rtl/modexps6_buffer_user.v | 382 ++++---- src/rtl/modexps6_modinv32.v | 256 +++--- src/rtl/modexps6_montgomery_coeff.v | 835 +++++++++--------- src/rtl/modexps6_montgomery_multiplier.v | 799 ++++++++--------- src/rtl/modexps6_top.v | 1402 +++++++++++++++--------------- src/rtl/modexps6_wrapper.v | 396 +++++---- src/rtl/ram_1rw_1ro_readfirst.v | 149 ++-- 9 files changed, 2483 insertions(+), 2322 deletions(-) diff --git a/src/rtl/modexps6_adder64_carry32.v b/src/rtl/modexps6_adder64_carry32.v index 87869d1..fb71b45 100644 --- a/src/rtl/modexps6_adder64_carry32.v +++ b/src/rtl/modexps6_adder64_carry32.v @@ -1,70 +1,96 @@ -`timescale 1ns / 1ps - -module modexps6_adder64_carry32 - ( - clk, t, x, y, s, c_in, c_out - ); - - - // - // Ports - // - input wire clk; - input wire [31: 0] t; - input wire [31: 0] x; - input wire [31: 0] y; - output wire [31: 0] s; - input wire [31: 0] c_in; - output wire [31: 0] c_out; - - - // - // Multiplier - // - wire [63: 0] multiplier_out; - - multiplier_s6 dsp_multiplier - ( - .clk (clk), - .a (x), - .b (y), - .p (multiplier_out) - ); - - - // - // Carry and T - // - wire [63: 0] t_ext = {{32{1'b0}}, t}; - wire [63: 0] c_ext = {{32{1'b0}}, c_in}; - - - // - // Sum - // - wire [63: 0] sum = multiplier_out + c_in + t; - - - // - // Output - // - assign s = sum[31: 0]; - assign c_out = sum[63:32]; - - /* - reg [31: 0] s_reg; - reg [31: 0] c_out_reg; - - assign s = s_reg; - assign c_out = c_out_reg; - - always @(posedge clk) begin - // - s_reg <= sum[31: 0]; - c_out_reg <= sum[63:32]; - // - end - */ - - -endmodule +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexps6_adder64_carry32 + ( + input wire clk, + input wire [31: 0] t, + input wire [31: 0] x, + input wire [31: 0] y, + output wire [31: 0] s, + input wire [31: 0] c_in, + output wire [31: 0] c_out + ); + + + // + // Multiplier + // + wire [63: 0] multiplier_out; + + multiplier_s6 dsp_multiplier + ( + .clk (clk), + .a (x), + .b (y), + .p (multiplier_out) + ); + + + // + // Carry and T + // + wire [63: 0] t_ext = {{32{1'b0}}, t}; + wire [63: 0] c_ext = {{32{1'b0}}, c_in}; + + + // + // Sum + // + wire [63: 0] sum = multiplier_out + c_in + t; + + + // + // Output + // + assign s = sum[31: 0]; + assign c_out = sum[63:32]; + + /* + reg [31: 0] s_reg; + reg [31: 0] c_out_reg; + + assign s = s_reg; + assign c_out = c_out_reg; + + always @(posedge clk) begin + // + s_reg <= sum[31: 0]; + c_out_reg <= sum[63:32]; + // + end + */ + + +endmodule diff --git a/src/rtl/modexps6_buffer_core.v b/src/rtl/modexps6_buffer_core.v index 86a6a4d..a468154 100644 --- a/src/rtl/modexps6_buffer_core.v +++ b/src/rtl/modexps6_buffer_core.v @@ -1,202 +1,218 @@ -`timescale 1ns / 1ps - -module modexps6_buffer_core - ( - clk, - rw_coeff_bram_addr, rw_coeff_bram_wr, rw_coeff_bram_in, rw_coeff_bram_out, ro_coeff_bram_addr, ro_coeff_bram_out, - rw_mm_bram_addr, rw_mm_bram_wr, rw_mm_bram_in, rw_mm_bram_out, ro_mm_bram_addr, ro_mm_bram_out, - rw_nn_bram_addr, rw_nn_bram_wr, rw_nn_bram_in, ro_nn_bram_addr, ro_nn_bram_out, - rw_y_bram_addr, rw_y_bram_wr, rw_y_bram_in, rw_y_bram_out, - rw_r_bram_addr, rw_r_bram_wr, rw_r_bram_in, rw_r_bram_out, ro_r_bram_addr, ro_r_bram_out, - rw_t_bram_addr, rw_t_bram_wr, rw_t_bram_in, rw_t_bram_out, ro_t_bram_addr, ro_t_bram_out - ); - - // - // Parameters - // - parameter OPERAND_ADDR_WIDTH = 5; // 1024 / 32 = 32 -> 5 bits - - - // - // Ports - // - input wire clk; - - input wire [OPERAND_ADDR_WIDTH:0] rw_coeff_bram_addr; - input wire rw_coeff_bram_wr; - input wire [ 31:0] rw_coeff_bram_in; - output wire [ 31:0] rw_coeff_bram_out; - - input wire [OPERAND_ADDR_WIDTH:0] rw_mm_bram_addr; - input wire rw_mm_bram_wr; - input wire [ 31:0] rw_mm_bram_in; - output wire [ 31:0] rw_mm_bram_out; - - input wire [OPERAND_ADDR_WIDTH:0] rw_nn_bram_addr; - input wire rw_nn_bram_wr; - input wire [ 31:0] rw_nn_bram_in; - - input wire [OPERAND_ADDR_WIDTH:0] rw_y_bram_addr; - input wire rw_y_bram_wr; - input wire [ 31:0] rw_y_bram_in; - output wire [ 31:0] rw_y_bram_out; - - input wire [OPERAND_ADDR_WIDTH:0] rw_r_bram_addr; - input wire rw_r_bram_wr; - input wire [ 31:0] rw_r_bram_in; - output wire [ 31:0] rw_r_bram_out; - - input wire [OPERAND_ADDR_WIDTH:0] rw_t_bram_addr; - input wire rw_t_bram_wr; - input wire [ 31:0] rw_t_bram_in; - output wire [ 31:0] rw_t_bram_out; - - input wire [OPERAND_ADDR_WIDTH:0] ro_coeff_bram_addr; - output wire [ 31:0] ro_coeff_bram_out; - - input wire [OPERAND_ADDR_WIDTH:0] ro_mm_bram_addr; - output wire [ 31:0] ro_mm_bram_out; - - input wire [OPERAND_ADDR_WIDTH:0] ro_nn_bram_addr; - output wire [ 31:0] ro_nn_bram_out; - - input wire [OPERAND_ADDR_WIDTH:0] ro_r_bram_addr; - output wire [ 31:0] ro_r_bram_out; - - input wire [OPERAND_ADDR_WIDTH:0] ro_t_bram_addr; - output wire [ 31:0] ro_t_bram_out; - - - // - // Montgomery Coefficient - // - ram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) - ) - mem_coeff - ( - .clk (clk), - - .a_addr (rw_coeff_bram_addr), - .a_wr (rw_coeff_bram_wr), - .a_in (rw_coeff_bram_in), - .a_out (rw_coeff_bram_out), - - .b_addr (ro_coeff_bram_addr), - .b_out (ro_coeff_bram_out) - ); - - - // - // Powers of Message - // - ram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) - ) - mem_mm - ( - .clk (clk), - - .a_addr (rw_mm_bram_addr), - .a_wr (rw_mm_bram_wr), - .a_in (rw_mm_bram_in), - .a_out (rw_mm_bram_out), - - .b_addr (ro_mm_bram_addr), - .b_out (ro_mm_bram_out) - ); - - - // - // Extended Modulus - // - ram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) - ) - mem_nn - ( - .clk (clk), - - .a_addr (rw_nn_bram_addr), - .a_wr (rw_nn_bram_wr), - .a_in (rw_nn_bram_in), - .a_out (), - - .b_addr (ro_nn_bram_addr), - .b_out (ro_nn_bram_out) - ); - - - // - // Output - // - ram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) - ) - mem_y - ( - .clk (clk), - - .a_addr (rw_y_bram_addr), - .a_wr (rw_y_bram_wr), - .a_in (rw_y_bram_in), - .a_out (rw_y_bram_out), - - .b_addr ({(OPERAND_ADDR_WIDTH+1){1'b0}}), - .b_out () - ); - - - // - // Result of Multiplication - // - ram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) - ) - mem_r - ( - .clk (clk), - - .a_addr (rw_r_bram_addr), - .a_wr (rw_r_bram_wr), - .a_in (rw_r_bram_in), - .a_out (rw_r_bram_out), - - .b_addr (ro_r_bram_addr), - .b_out (ro_r_bram_out) - ); - - - // - // Temporary Buffer - // - ram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) - ) - mem_t - ( - .clk (clk), - - .a_addr (rw_t_bram_addr), - .a_wr (rw_t_bram_wr), - .a_in (rw_t_bram_in), - .a_out (rw_t_bram_out), - - .b_addr (ro_t_bram_addr), - .b_out (ro_t_bram_out) - ); - - -endmodule +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexps6_buffer_core + #(parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits + ( + input wire clk, + + input wire [OPERAND_ADDR_WIDTH:0] rw_coeff_bram_addr, + input wire rw_coeff_bram_wr, + input wire [31:0] rw_coeff_bram_in, + output wire [31:0] rw_coeff_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] rw_mm_bram_addr, + input wire rw_mm_bram_wr, + input wire [31:0] rw_mm_bram_in, + output wire [31:0] rw_mm_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] rw_nn_bram_addr, + input wire rw_nn_bram_wr, + input wire [31:0] rw_nn_bram_in, + + input wire [OPERAND_ADDR_WIDTH:0] rw_y_bram_addr, + input wire rw_y_bram_wr, + input wire [31:0] rw_y_bram_in, + output wire [31:0] rw_y_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] rw_r_bram_addr, + input wire rw_r_bram_wr, + input wire [31:0] rw_r_bram_in, + output wire [31:0] rw_r_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] rw_t_bram_addr, + input wire rw_t_bram_wr, + input wire [31:0] rw_t_bram_in, + output wire [31:0] rw_t_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] ro_coeff_bram_addr, + output wire [31:0] ro_coeff_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] ro_mm_bram_addr, + output wire [31:0] ro_mm_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] ro_nn_bram_addr, + output wire [31:0] ro_nn_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] ro_r_bram_addr, + output wire [31:0] ro_r_bram_out, + + input wire [OPERAND_ADDR_WIDTH:0] ro_t_bram_addr, + output wire [31:0] ro_t_bram_out + ); + + + // + // Montgomery Coefficient + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_coeff + ( + .clk (clk), + + .a_addr (rw_coeff_bram_addr), + .a_wr (rw_coeff_bram_wr), + .a_in (rw_coeff_bram_in), + .a_out (rw_coeff_bram_out), + + .b_addr (ro_coeff_bram_addr), + .b_out (ro_coeff_bram_out) + ); + + + // + // Powers of Message + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_mm + ( + .clk (clk), + + .a_addr (rw_mm_bram_addr), + .a_wr (rw_mm_bram_wr), + .a_in (rw_mm_bram_in), + .a_out (rw_mm_bram_out), + + .b_addr (ro_mm_bram_addr), + .b_out (ro_mm_bram_out) + ); + + + // + // Extended Modulus + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_nn + ( + .clk (clk), + + .a_addr (rw_nn_bram_addr), + .a_wr (rw_nn_bram_wr), + .a_in (rw_nn_bram_in), + .a_out (), + + .b_addr (ro_nn_bram_addr), + .b_out (ro_nn_bram_out) + ); + + + // + // Output + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_y + ( + .clk (clk), + + .a_addr (rw_y_bram_addr), + .a_wr (rw_y_bram_wr), + .a_in (rw_y_bram_in), + .a_out (rw_y_bram_out), + + .b_addr ({(OPERAND_ADDR_WIDTH+1){1'b0}}), + .b_out () + ); + + + // + // Result of Multiplication + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_r + ( + .clk (clk), + + .a_addr (rw_r_bram_addr), + .a_wr (rw_r_bram_wr), + .a_in (rw_r_bram_in), + .a_out (rw_r_bram_out), + + .b_addr (ro_r_bram_addr), + .b_out (ro_r_bram_out) + ); + + + // + // Temporary Buffer + // + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH+1) + ) + mem_t + ( + .clk (clk), + + .a_addr (rw_t_bram_addr), + .a_wr (rw_t_bram_wr), + .a_in (rw_t_bram_in), + .a_out (rw_t_bram_out), + + .b_addr (ro_t_bram_addr), + .b_out (ro_t_bram_out) + ); + + +endmodule diff --git a/src/rtl/modexps6_buffer_user.v b/src/rtl/modexps6_buffer_user.v index 6072fc9..fde6105 100644 --- a/src/rtl/modexps6_buffer_user.v +++ b/src/rtl/modexps6_buffer_user.v @@ -1,185 +1,197 @@ -`timescale 1ns / 1ps - -module modexps6_buffer_user - ( - clk, - - bus_cs, bus_we, - bus_addr, bus_data_wr, bus_data_rd, - - ro_modulus_bram_addr, ro_modulus_bram_out, - ro_message_bram_addr, ro_message_bram_out, - ro_exponent_bram_addr, ro_exponent_bram_out, - rw_result_bram_addr, - rw_result_bram_wr, rw_result_bram_in - ); - - - // - // Parameters - // - parameter OPERAND_ADDR_WIDTH = 5; // 1024 / 32 = 32 -> 5 bits - - - // - // Locals - // - localparam ADDR_WIDTH_TOTAL = OPERAND_ADDR_WIDTH + 2; - - localparam [ 1: 0] BUS_ADDR_BANK_MODULUS = 2'b00; - localparam [ 1: 0] BUS_ADDR_BANK_MESSAGE = 2'b01; - localparam [ 1: 0] BUS_ADDR_BANK_EXPONENT = 2'b10; - localparam [ 1: 0] BUS_ADDR_BANK_RESULT = 2'b11; - - // - // Ports - // - input wire clk; - - input wire bus_cs; - input wire bus_we; - input wire [ ADDR_WIDTH_TOTAL-1:0] bus_addr; - input wire [ 31:0] bus_data_wr; - output wire [ 31:0] bus_data_rd; - - input wire [OPERAND_ADDR_WIDTH-1:0] ro_modulus_bram_addr; - output wire [ 31:0] ro_modulus_bram_out; - - input wire [OPERAND_ADDR_WIDTH-1:0] ro_message_bram_addr; - output wire [ 31:0] ro_message_bram_out; - - input wire [OPERAND_ADDR_WIDTH-1:0] ro_exponent_bram_addr; - output wire [ 31:0] ro_exponent_bram_out; - - input wire [OPERAND_ADDR_WIDTH-1:0] rw_result_bram_addr; - input wire rw_result_bram_wr; - input wire [ 31:0] rw_result_bram_in; - - - // - // Address Decoder - // - wire [OPERAND_ADDR_WIDTH-1:0] bus_addr_operand_word = bus_addr[OPERAND_ADDR_WIDTH-1:0]; - wire [ 1:0] bus_addr_operand_bank = bus_addr[ADDR_WIDTH_TOTAL-1:ADDR_WIDTH_TOTAL-2]; - - - // - // Modulus Memory - // - wire [31: 0] bus_data_rd_modulus; - - ram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) - ) - mem_modulus - ( - .clk (clk), - - .a_addr (bus_addr_operand_word), - .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MODULUS)), - .a_in (bus_data_wr), - .a_out (bus_data_rd_modulus), - - .b_addr (ro_modulus_bram_addr), - .b_out (ro_modulus_bram_out) - ); - - - // - // Message Memory - // - wire [31: 0] bus_data_rd_message; - - ram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) - ) - mem_message - ( - .clk (clk), - - .a_addr (bus_addr_operand_word), - .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MESSAGE)), - .a_in (bus_data_wr), - .a_out (bus_data_rd_message), - - .b_addr (ro_message_bram_addr), - .b_out (ro_message_bram_out) - ); - - - // - // Exponent Memory - // - wire [31: 0] bus_data_rd_exponent; - - ram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) - ) - mem_exponent - ( - .clk (clk), - - .a_addr (bus_addr_operand_word), - .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_EXPONENT)), - .a_in (bus_data_wr), - .a_out (bus_data_rd_exponent), - - .b_addr (ro_exponent_bram_addr), - .b_out (ro_exponent_bram_out) - ); - - - // - // Result Memory - // - wire [31: 0] bus_data_rd_result; - - ram_1rw_1ro_readfirst # - ( - .MEM_WIDTH (32), - .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) - ) - mem_result - ( - .clk (clk), - - .a_addr (rw_result_bram_addr), - .a_wr (rw_result_bram_wr), - .a_in (rw_result_bram_in), - .a_out (), - - .b_addr (bus_addr_operand_word), - .b_out (bus_data_rd_result) - ); - - - // - // Output Selector - // - reg [ 1: 0] bus_addr_operand_bank_prev; - always @(posedge clk) bus_addr_operand_bank_prev = bus_addr_operand_bank; - - reg [31: 0] bus_data_rd_mux; - assign bus_data_rd = bus_data_rd_mux; - - always @(*) - // - case (bus_addr_operand_bank_prev) - // - BUS_ADDR_BANK_MODULUS: bus_data_rd_mux = bus_data_rd_modulus; - BUS_ADDR_BANK_MESSAGE: bus_data_rd_mux = bus_data_rd_message; - BUS_ADDR_BANK_EXPONENT: bus_data_rd_mux = bus_data_rd_exponent; - BUS_ADDR_BANK_RESULT: bus_data_rd_mux = bus_data_rd_result; - // - default: bus_data_rd_mux = {32{1'bX}}; - // - endcase - - -endmodule +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexps6_buffer_user + #(parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits + ( + input wire clk, + + input wire bus_cs, + input wire bus_we, + input wire [ADDR_WIDTH_TOTAL-1:0] bus_addr, + input wire [31:0] bus_data_wr, + output wire [31:0] bus_data_rd, + + input wire [OPERAND_ADDR_WIDTH-1:0] ro_modulus_bram_addr, + output wire [31:0] ro_modulus_bram_out, + + input wire [OPERAND_ADDR_WIDTH-1:0] ro_message_bram_addr, + output wire [31:0] ro_message_bram_out, + + input wire [OPERAND_ADDR_WIDTH-1:0] ro_exponent_bram_addr, + output wire [31:0] ro_exponent_bram_out, + + input wire [OPERAND_ADDR_WIDTH-1:0] rw_result_bram_addr, + input wire rw_result_bram_wr, + input wire [31:0] rw_result_bram_in + ); + + + // + // Locals + // + localparam ADDR_WIDTH_TOTAL = OPERAND_ADDR_WIDTH + 2; + + localparam [1: 0] BUS_ADDR_BANK_MODULUS = 2'b00; + localparam [1: 0] BUS_ADDR_BANK_MESSAGE = 2'b01; + localparam [1: 0] BUS_ADDR_BANK_EXPONENT = 2'b10; + localparam [1: 0] BUS_ADDR_BANK_RESULT = 2'b11; + + // + // Address Decoder + // + wire [OPERAND_ADDR_WIDTH-1:0] bus_addr_operand_word = bus_addr[OPERAND_ADDR_WIDTH-1:0]; + wire [ 1:0] bus_addr_operand_bank = bus_addr[ADDR_WIDTH_TOTAL-1:ADDR_WIDTH_TOTAL-2]; + + + // + // Modulus Memory + // + wire [31: 0] bus_data_rd_modulus; + + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) + ) + mem_modulus + ( + .clk (clk), + + .a_addr (bus_addr_operand_word), + .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MODULUS)), + .a_in (bus_data_wr), + .a_out (bus_data_rd_modulus), + + .b_addr (ro_modulus_bram_addr), + .b_out (ro_modulus_bram_out) + ); + + + // + // Message Memory + // + wire [31: 0] bus_data_rd_message; + + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) + ) + mem_message + ( + .clk (clk), + + .a_addr (bus_addr_operand_word), + .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_MESSAGE)), + .a_in (bus_data_wr), + .a_out (bus_data_rd_message), + + .b_addr (ro_message_bram_addr), + .b_out (ro_message_bram_out) + ); + + + // + // Exponent Memory + // + wire [31: 0] bus_data_rd_exponent; + + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) + ) + mem_exponent + ( + .clk (clk), + + .a_addr (bus_addr_operand_word), + .a_wr (bus_cs & bus_we & (bus_addr_operand_bank == BUS_ADDR_BANK_EXPONENT)), + .a_in (bus_data_wr), + .a_out (bus_data_rd_exponent), + + .b_addr (ro_exponent_bram_addr), + .b_out (ro_exponent_bram_out) + ); + + + // + // Result Memory + // + wire [31: 0] bus_data_rd_result; + + ram_1rw_1ro_readfirst # + ( + .MEM_WIDTH (32), + .MEM_ADDR_BITS (OPERAND_ADDR_WIDTH) + ) + mem_result + ( + .clk (clk), + + .a_addr (rw_result_bram_addr), + .a_wr (rw_result_bram_wr), + .a_in (rw_result_bram_in), + .a_out (), + + .b_addr (bus_addr_operand_word), + .b_out (bus_data_rd_result) + ); + + + // + // Output Selector + // + reg [1: 0] bus_addr_operand_bank_prev; + always @(posedge clk) bus_addr_operand_bank_prev = bus_addr_operand_bank; + + reg [31: 0] bus_data_rd_mux; + assign bus_data_rd = bus_data_rd_mux; + + always @(*) + // + case (bus_addr_operand_bank_prev) + // + BUS_ADDR_BANK_MODULUS: bus_data_rd_mux = bus_data_rd_modulus; + BUS_ADDR_BANK_MESSAGE: bus_data_rd_mux = bus_data_rd_message; + BUS_ADDR_BANK_EXPONENT: bus_data_rd_mux = bus_data_rd_exponent; + BUS_ADDR_BANK_RESULT: bus_data_rd_mux = bus_data_rd_result; + // + default: bus_data_rd_mux = {32{1'bX}}; + // + endcase + + +endmodule diff --git a/src/rtl/modexps6_modinv32.v b/src/rtl/modexps6_modinv32.v index dc08b7b..916a17e 100644 --- a/src/rtl/modexps6_modinv32.v +++ b/src/rtl/modexps6_modinv32.v @@ -1,116 +1,140 @@ -`timescale 1ns / 1ps - -module modexps6_modinv32 - ( - clk, - ena, rdy, - n0, n0_modinv - ); - - - // - // Ports - // - input wire clk; - - input wire ena; - output wire rdy; - - input wire [31: 0] n0; - output wire [31: 0] n0_modinv; - - - // - // Trigger - // - reg ena_dly = 1'b0; - wire ena_trig = ena && !ena_dly; - always @(posedge clk) ena_dly <= ena; - - - // - // Ready Register - // - reg rdy_reg = 1'b0; - assign rdy = rdy_reg; - - - // - // Counter - // - reg [ 7: 0] cnt = 8'd0; - wire [ 7: 0] cnt_zero = 8'd0; - wire [ 7: 0] cnt_last = 8'd132; - wire [ 7: 0] cnt_next = cnt + 1'b1; - wire [ 1: 0] cnt_phase = cnt[1:0]; - wire [ 5: 0] cnt_cycle = cnt[7:2]; - - always @(posedge clk) - // - if (cnt == cnt_zero) cnt <= (!rdy_reg && ena_trig) ? cnt_next : cnt_zero; - else cnt <= (cnt == cnt_last) ? cnt_zero : cnt_next; - - - // - // Enable / Ready Logic - // - always @(posedge clk) - // - if (cnt == cnt_last) rdy_reg <= 1'b1; - else if ((cnt == cnt_zero) && (rdy_reg && !ena)) rdy_reg <= 1'b0; - - - // - // Output Register - // - reg [31: 0] n0_modinv_reg; - assign n0_modinv = n0_modinv_reg; - - - // - // Multiplier - // - wire [63: 0] multiplier_out; - wire [31: 0] multiplier_out_masked = multiplier_out[31: 0] & {mask_reg, 1'b1}; - - multiplier_s6 dsp_multiplier - ( - .clk (clk), - .a (n0), - .b (n0_modinv_reg), - .p (multiplier_out) - ); - - - // - // Mask and Power - // - reg [30: 0] mask_reg; - reg [31: 0] power_reg; - - always @(posedge clk) - // - if (cnt_phase == 2'd1) begin - // - if (cnt_cycle == 6'd0) begin - // - mask_reg <= 31'd0; - power_reg <= 32'd1; - // - n0_modinv_reg <= 32'd0; - // - end else begin - // - mask_reg <= { mask_reg[29:0], 1'b1}; - power_reg <= {power_reg[30:0], 1'b0}; - // - if (multiplier_out_masked != 32'd1) - // - n0_modinv_reg <= n0_modinv_reg + power_reg; - // - end - // - end - - -endmodule +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexps6_modinv32 + ( + input wire clk, + + input wire ena, + output wire rdy, + + input wire [31: 0] n0, + output wire [31: 0] n0_modinv + ); + + + // + // Trigger + // + reg ena_dly = 1'b0; + wire ena_trig = ena && !ena_dly; + always @(posedge clk) ena_dly <= ena; + + + // + // Ready Register + // + reg rdy_reg = 1'b0; + assign rdy = rdy_reg; + + + // + // Counter + // + reg [7: 0] cnt = 8'd0; + wire [7: 0] cnt_zero = 8'd0; + wire [7: 0] cnt_last = 8'd132; + wire [7: 0] cnt_next = cnt + 1'b1; + wire [1: 0] cnt_phase = cnt[1:0]; + wire [5: 0] cnt_cycle = cnt[7:2]; + + always @(posedge clk) + // + if (cnt == cnt_zero) cnt <= (!rdy_reg && ena_trig) ? cnt_next : cnt_zero; + else cnt <= (cnt == cnt_last) ? cnt_zero : cnt_next; + + + // + // Enable / Ready Logic + // + always @(posedge clk) + // + if (cnt == cnt_last) rdy_reg <= 1'b1; + else if ((cnt == cnt_zero) && (rdy_reg && !ena)) rdy_reg <= 1'b0; + + + // + // Output Register + // + reg [31: 0] n0_modinv_reg; + assign n0_modinv = n0_modinv_reg; + + + // + // Multiplier + // + wire [63: 0] multiplier_out; + wire [31: 0] multiplier_out_masked = multiplier_out[31: 0] & {mask_reg, 1'b1}; + + multiplier_s6 dsp_multiplier + ( + .clk (clk), + .a (n0), + .b (n0_modinv_reg), + .p (multiplier_out) + ); + + + // + // Mask and Power + // + reg [30: 0] mask_reg; + reg [31: 0] power_reg; + + always @(posedge clk) + // + if (cnt_phase == 2'd1) begin + // + if (cnt_cycle == 6'd0) begin + // + mask_reg <= 31'd0; + power_reg <= 32'd1; + // + n0_modinv_reg <= 32'd0; + // + end else begin + // + mask_reg <= { mask_reg[29:0], 1'b1}; + power_reg <= {power_reg[30:0], 1'b0}; + // + if (multiplier_out_masked != 32'd1) + // + n0_modinv_reg <= n0_modinv_reg + power_reg; + // + end + // + end + + +endmodule diff --git a/src/rtl/modexps6_montgomery_coeff.v b/src/rtl/modexps6_montgomery_coeff.v index c3ceeee..fc0a365 100644 --- a/src/rtl/modexps6_montgomery_coeff.v +++ b/src/rtl/modexps6_montgomery_coeff.v @@ -1,410 +1,425 @@ -`timescale 1ns / 1ps - -module modexps6_montgomery_coeff - ( - clk, - ena, rdy, - modulus_width, - coeff_bram_addr, coeff_bram_wr, coeff_bram_in, coeff_bram_out, - nn_bram_addr, nn_bram_wr, nn_bram_in, - modulus_bram_addr, modulus_bram_out, - modinv_n0, modinv_ena, modinv_rdy - ); - - - // - // Parameters - // - parameter MODULUS_NUM_BITS = 11; // 1024 -> 11 bits - parameter OPERAND_ADDR_WIDTH = 5; // 1024 / 32 = 32 -> 5 bits - - - // - // Locals - // - localparam [ MODULUS_NUM_BITS :0] round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}}; - localparam [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; - localparam [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}}; - - - // - // Ports - // - input wire clk; - - input wire ena; - output wire rdy; - - input wire [ MODULUS_NUM_BITS-1:0] modulus_width; - - output wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr; - output wire coeff_bram_wr; - output wire [ 31:0] coeff_bram_in; - input wire [ 31:0] coeff_bram_out; - - output wire [OPERAND_ADDR_WIDTH :0] nn_bram_addr; - output wire nn_bram_wr; - output wire [ 31:0] nn_bram_in; - - output wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr; - input wire [ 31:0] modulus_bram_out; - - output wire [ 31:0] modinv_n0; - output wire modinv_ena; - input wire modinv_rdy; - - - // - // FSM - // - localparam FSM_STATE_IDLE = 6'd0; - - localparam FSM_STATE_INIT = 6'd10; - - localparam FSM_STATE_SHIFT_READ = 6'd21; - localparam FSM_STATE_SHIFT_WRITE = 6'd22; - - localparam FSM_STATE_COMPARE_READ = 6'd31; - localparam FSM_STATE_COMPARE_COMPARE = 6'd32; - - localparam FSM_STATE_SUBTRACT_READ = 6'd41; - localparam FSM_STATE_SUBTRACT_WRITE = 6'd42; - - localparam FSM_STATE_ROUND = 6'd50; - - localparam FSM_STATE_FINAL = 6'd60; - - reg [ 5: 0] fsm_state = FSM_STATE_IDLE; - - - // - // Trigger - // - reg ena_dly = 1'b0; - - wire ena_trig = ena && !ena_dly; - - always @(posedge clk) ena_dly <= ena; - - - // - // Ready Register - // - reg rdy_reg = 1'b0; - - assign rdy = rdy_reg; - - - // - // ModInv Control - // - reg modinv_ena_reg = 1'b0; - reg [31: 0] modinv_n0_reg; - - assign modinv_ena = modinv_ena_reg; - assign modinv_n0 = modinv_n0_reg; - - - // - // Enable / Ready Logic - // - always @(posedge clk) - // - if (fsm_state == FSM_STATE_FINAL) begin - // - if (modinv_rdy) rdy_reg <= 1'b1; - // - end else if (fsm_state == FSM_STATE_IDLE) begin - // - if (rdy_reg && !ena) rdy_reg <= 1'b0; - // - end - - - // - // Flags - // - reg reg_shift_carry = 1'b0; - reg reg_subtractor_borrow = 1'b0; - - - // - // Round Counter - // - reg [MODULUS_NUM_BITS:0] round_count = round_count_zero; - wire [MODULUS_NUM_BITS:0] round_count_last = {modulus_width, 1'b0} + 6'd63; - wire [MODULUS_NUM_BITS:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero; - - - // - // Modulus BRAM Interface - // - reg [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_reg = modulus_bram_addr_zero; - - assign modulus_bram_addr = modulus_bram_addr_reg; - - - // - // Coeff BRAM Interface - // - reg [OPERAND_ADDR_WIDTH:0] coeff_bram_addr_reg = coeff_bram_addr_zero; - reg coeff_bram_wr_reg = 1'b0; - - assign coeff_bram_addr = coeff_bram_addr_reg; - assign coeff_bram_wr = coeff_bram_wr_reg; - - - // - // NN BRAM Interface - // - reg [OPERAND_ADDR_WIDTH:0] nn_bram_addr_reg = coeff_bram_addr_zero; - reg nn_bram_wr_reg = 1'b0; - - assign nn_bram_addr = nn_bram_addr_reg; - assign nn_bram_wr = nn_bram_wr_reg; - - - // - // Hardware Subtractor - // - wire [31: 0] subtractor_out; - wire subtractor_out_nonzero = |subtractor_out; - wire subtractor_borrow_out; - wire subtractor_borrow_in; - - assign subtractor_borrow_in = (fsm_state == FSM_STATE_COMPARE_COMPARE) ? 1'b0 : reg_subtractor_borrow; - - subtractor_s6 dsp_subtractor - ( - .a (coeff_bram_out), - .b (modulus_bram_out), - .s (subtractor_out), - .c_in (subtractor_borrow_in), - .c_out (subtractor_borrow_out) - ); - - - // - // Handy Wires - // - wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH]; - - wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_last = {modulus_width_msb, 1'b0}; - wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_next_or_zero = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_zero; - wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_next_or_last = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_last; - wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_prev_or_zero = (coeff_bram_addr_reg > coeff_bram_addr_zero) ? coeff_bram_addr_reg - 1'b1 : coeff_bram_addr_zero; - - wire [OPERAND_ADDR_WIDTH :0] modulus_bram_addr_last_ext = coeff_bram_addr_last - 1'b1; - - wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_last = modulus_bram_addr_last_ext[OPERAND_ADDR_WIDTH-1:0]; - wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_next_or_zero = (modulus_bram_addr_reg < modulus_bram_addr_last) ? modulus_bram_addr_reg + 1'b1 : modulus_bram_addr_zero; - wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_prev_or_zero = (modulus_bram_addr_reg > modulus_bram_addr_zero) ? modulus_bram_addr_reg - 1'b1 : modulus_bram_addr_zero; - - - // - // Coeff BRAM Input Logic - // - reg [31: 0] coeff_bram_in_mux; - - assign coeff_bram_in = coeff_bram_in_mux; - - always @(*) - // - case (fsm_state) - - FSM_STATE_INIT: - // - if (coeff_bram_addr_reg == coeff_bram_addr_zero) coeff_bram_in_mux = 32'h00000001; - else coeff_bram_in_mux = 32'h00000000; - - FSM_STATE_SHIFT_WRITE: - // - coeff_bram_in_mux = {coeff_bram_out[30:0], reg_shift_carry}; - - FSM_STATE_SUBTRACT_WRITE: - // - if (coeff_bram_addr_reg == coeff_bram_addr_last) coeff_bram_in_mux = 32'h00000000; - else coeff_bram_in_mux = subtractor_out; - - default: - // - coeff_bram_in_mux = {32{1'bX}}; - - endcase - - - // - // NN BRAM Input Logic - // - reg [31: 0] nn_bram_in_mux; - - assign nn_bram_in = nn_bram_in_mux; - - always @(*) - // - case (fsm_state) - - FSM_STATE_INIT: - // - if (coeff_bram_addr_reg == coeff_bram_addr_last) nn_bram_in_mux = {32{1'b0}}; - else nn_bram_in_mux = modulus_bram_out; - - default: - // - nn_bram_in_mux = {32{1'bX}}; - - endcase - - - // - // Comparison Functions - // - reg compare_greater_or_equal; - reg compare_less_than; - - wire compare_done = compare_greater_or_equal | compare_less_than; - - always @(*) - // - if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_greater_or_equal = coeff_bram_out[0]; - // - else if (coeff_bram_addr_reg == coeff_bram_addr_zero) compare_greater_or_equal = !subtractor_borrow_out; - // - else compare_greater_or_equal = !subtractor_borrow_out && subtractor_out_nonzero; - - always @(*) - // - if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_less_than = 1'b0; - // - else compare_less_than = subtractor_borrow_out; - - - - // - // Main Logic - // - always @(posedge clk) - // - case (fsm_state) - - FSM_STATE_INIT: begin - // - coeff_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0; - coeff_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero; - // - nn_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0; - nn_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero; - // - if (!coeff_bram_wr_reg) begin - // - modinv_ena_reg <= 1'b1; - modinv_n0_reg <= modulus_bram_out; - // - end - // - if (modulus_bram_addr_reg == modulus_bram_addr_zero) begin - // - if (!coeff_bram_wr_reg) - // - modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero; - // - end else begin - // - modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero; - // - end - // - end - - FSM_STATE_SHIFT_READ: begin - // - coeff_bram_wr_reg <= 1'b1; - // - if (coeff_bram_addr_reg == coeff_bram_addr_zero) - // - reg_shift_carry <= 1'b0; - // - end - - FSM_STATE_SHIFT_WRITE: begin - // - coeff_bram_wr_reg <= 1'b0; - coeff_bram_addr_reg <= coeff_bram_addr_next_or_last; - // - reg_shift_carry <= coeff_bram_out[31]; - // - end - - FSM_STATE_COMPARE_COMPARE: begin - // - coeff_bram_addr_reg <= compare_done ? coeff_bram_addr_zero : coeff_bram_addr_prev_or_zero; - // - modulus_bram_addr_reg <= compare_done ? modulus_bram_addr_zero : ((coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_last : modulus_bram_addr_prev_or_zero); - // - end - - FSM_STATE_SUBTRACT_READ: begin - // - coeff_bram_wr_reg <= 1'b1; - // - if (coeff_bram_addr_reg == coeff_bram_addr_zero) - // - reg_subtractor_borrow <= 1'b0; - // - end - - FSM_STATE_SUBTRACT_WRITE: begin - // - coeff_bram_wr_reg <= 1'b0; - coeff_bram_addr_reg <= coeff_bram_addr_next_or_zero; - // - modulus_bram_addr_reg <= (coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_zero : modulus_bram_addr_next_or_zero; - // - reg_subtractor_borrow <= subtractor_borrow_out; - // - end - - FSM_STATE_ROUND: begin - // - round_count <= round_count_next; - // - end - - FSM_STATE_FINAL: begin - // - if (modinv_rdy) modinv_ena_reg <= 1'b0; - // - end - - endcase - - - // - // FSM Transition Logic - // - always @(posedge clk) - // - case (fsm_state) - - FSM_STATE_IDLE: fsm_state <= (!rdy_reg && !modinv_rdy && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE; - - FSM_STATE_SHIFT_READ: fsm_state <= FSM_STATE_SHIFT_WRITE; - FSM_STATE_COMPARE_READ: fsm_state <= FSM_STATE_COMPARE_COMPARE; - FSM_STATE_SUBTRACT_READ: fsm_state <= FSM_STATE_SUBTRACT_WRITE; - - FSM_STATE_INIT: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_INIT : FSM_STATE_SHIFT_READ; - FSM_STATE_SHIFT_WRITE: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SHIFT_READ : FSM_STATE_COMPARE_READ; - FSM_STATE_SUBTRACT_WRITE: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND; - - FSM_STATE_ROUND: fsm_state <= (round_count < round_count_last) ? FSM_STATE_SHIFT_READ : FSM_STATE_FINAL; - - FSM_STATE_COMPARE_COMPARE: fsm_state <= compare_done ? (compare_greater_or_equal ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND) : FSM_STATE_COMPARE_READ; - - FSM_STATE_FINAL: fsm_state <= modinv_rdy ? FSM_STATE_IDLE : FSM_STATE_FINAL; - - default: fsm_state <= FSM_STATE_IDLE; - - endcase - - -endmodule +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexps6_montgomery_coeff + #(parameter MODULUS_NUM_BITS = 11, // 1024 -> 11 bits + parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits + ( + input wire clk, + + input wire ena, + output wire rdy, + + input wire [MODULUS_NUM_BITS-1:0] modulus_width, + + output wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr, + output wire coeff_bram_wr, + output wire [31:0] coeff_bram_in, + input wire [31:0] coeff_bram_out, + + output wire [OPERAND_ADDR_WIDTH :0] nn_bram_addr, + output wire nn_bram_wr, + output wire [31:0] nn_bram_in, + + output wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr, + input wire [31:0] modulus_bram_out, + + output wire [31:0] modinv_n0, + output wire modinv_ena, + input wire modinv_rdy + ); + + + // + // Locals + // + localparam [ MODULUS_NUM_BITS :0] round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}}; + localparam [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; + localparam [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}}; + + + // + // FSM + // + localparam FSM_STATE_IDLE = 6'd0; + + localparam FSM_STATE_INIT = 6'd10; + + localparam FSM_STATE_SHIFT_READ = 6'd21; + localparam FSM_STATE_SHIFT_WRITE = 6'd22; + + localparam FSM_STATE_COMPARE_READ = 6'd31; + localparam FSM_STATE_COMPARE_COMPARE = 6'd32; + + localparam FSM_STATE_SUBTRACT_READ = 6'd41; + localparam FSM_STATE_SUBTRACT_WRITE = 6'd42; + + localparam FSM_STATE_ROUND = 6'd50; + + localparam FSM_STATE_FINAL = 6'd60; + + reg [5: 0] fsm_state = FSM_STATE_IDLE; + + + // + // Trigger + // + reg ena_dly = 1'b0; + + wire ena_trig = ena && !ena_dly; + + always @(posedge clk) ena_dly <= ena; + + + // + // Ready Register + // + reg rdy_reg = 1'b0; + + assign rdy = rdy_reg; + + + // + // ModInv Control + // + reg modinv_ena_reg = 1'b0; + reg [31: 0] modinv_n0_reg; + + assign modinv_ena = modinv_ena_reg; + assign modinv_n0 = modinv_n0_reg; + + + // + // Enable / Ready Logic + // + always @(posedge clk) + // + if (fsm_state == FSM_STATE_FINAL) begin + // + if (modinv_rdy) rdy_reg <= 1'b1; + // + end else if (fsm_state == FSM_STATE_IDLE) begin + // + if (rdy_reg && !ena) rdy_reg <= 1'b0; + // + end + + + // + // Flags + // + reg reg_shift_carry = 1'b0; + reg reg_subtractor_borrow = 1'b0; + + + // + // Round Counter + // + reg [MODULUS_NUM_BITS:0] round_count = round_count_zero; + wire [MODULUS_NUM_BITS:0] round_count_last = {modulus_width, 1'b0} + 6'd63; + wire [MODULUS_NUM_BITS:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero; + + + // + // Modulus BRAM Interface + // + reg [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_reg = modulus_bram_addr_zero; + + assign modulus_bram_addr = modulus_bram_addr_reg; + + + // + // Coeff BRAM Interface + // + reg [OPERAND_ADDR_WIDTH:0] coeff_bram_addr_reg = coeff_bram_addr_zero; + reg coeff_bram_wr_reg = 1'b0; + + assign coeff_bram_addr = coeff_bram_addr_reg; + assign coeff_bram_wr = coeff_bram_wr_reg; + + + // + // NN BRAM Interface + // + reg [OPERAND_ADDR_WIDTH:0] nn_bram_addr_reg = coeff_bram_addr_zero; + reg nn_bram_wr_reg = 1'b0; + + assign nn_bram_addr = nn_bram_addr_reg; + assign nn_bram_wr = nn_bram_wr_reg; + + + // + // Hardware Subtractor + // + wire [31: 0] subtractor_out; + wire subtractor_out_nonzero = |subtractor_out; + wire subtractor_borrow_out; + wire subtractor_borrow_in; + + assign subtractor_borrow_in = (fsm_state == FSM_STATE_COMPARE_COMPARE) ? 1'b0 : reg_subtractor_borrow; + + subtractor_s6 dsp_subtractor + ( + .a (coeff_bram_out), + .b (modulus_bram_out), + .s (subtractor_out), + .c_in (subtractor_borrow_in), + .c_out (subtractor_borrow_out) + ); + + + // + // Handy Wires + // + wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH]; + + wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_last = {modulus_width_msb, 1'b0}; + wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_next_or_zero = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_zero; + wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_next_or_last = (coeff_bram_addr_reg < coeff_bram_addr_last) ? coeff_bram_addr_reg + 1'b1 : coeff_bram_addr_last; + wire [OPERAND_ADDR_WIDTH :0] coeff_bram_addr_prev_or_zero = (coeff_bram_addr_reg > coeff_bram_addr_zero) ? coeff_bram_addr_reg - 1'b1 : coeff_bram_addr_zero; + + wire [OPERAND_ADDR_WIDTH :0] modulus_bram_addr_last_ext = coeff_bram_addr_last - 1'b1; + + wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_last = modulus_bram_addr_last_ext[OPERAND_ADDR_WIDTH-1:0]; + wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_next_or_zero = (modulus_bram_addr_reg < modulus_bram_addr_last) ? modulus_bram_addr_reg + 1'b1 : modulus_bram_addr_zero; + wire [OPERAND_ADDR_WIDTH-1:0] modulus_bram_addr_prev_or_zero = (modulus_bram_addr_reg > modulus_bram_addr_zero) ? modulus_bram_addr_reg - 1'b1 : modulus_bram_addr_zero; + + + // + // Coeff BRAM Input Logic + // + reg [31: 0] coeff_bram_in_mux; + + assign coeff_bram_in = coeff_bram_in_mux; + + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT: + // + if (coeff_bram_addr_reg == coeff_bram_addr_zero) coeff_bram_in_mux = 32'h00000001; + else coeff_bram_in_mux = 32'h00000000; + + FSM_STATE_SHIFT_WRITE: + // + coeff_bram_in_mux = {coeff_bram_out[30:0], reg_shift_carry}; + + FSM_STATE_SUBTRACT_WRITE: + // + if (coeff_bram_addr_reg == coeff_bram_addr_last) coeff_bram_in_mux = 32'h00000000; + else coeff_bram_in_mux = subtractor_out; + + default: + // + coeff_bram_in_mux = {32{1'bX}}; + + endcase + + + // + // NN BRAM Input Logic + // + reg [31: 0] nn_bram_in_mux; + + assign nn_bram_in = nn_bram_in_mux; + + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT: + // + if (coeff_bram_addr_reg == coeff_bram_addr_last) nn_bram_in_mux = {32{1'b0}}; + else nn_bram_in_mux = modulus_bram_out; + + default: + // + nn_bram_in_mux = {32{1'bX}}; + + endcase + + + // + // Comparison Functions + // + reg compare_greater_or_equal; + reg compare_less_than; + + wire compare_done = compare_greater_or_equal | compare_less_than; + + always @(*) + // + if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_greater_or_equal = coeff_bram_out[0]; + // + else if (coeff_bram_addr_reg == coeff_bram_addr_zero) compare_greater_or_equal = !subtractor_borrow_out; + // + else compare_greater_or_equal = !subtractor_borrow_out && subtractor_out_nonzero; + + always @(*) + // + if (coeff_bram_addr_reg == coeff_bram_addr_last) compare_less_than = 1'b0; + // + else compare_less_than = subtractor_borrow_out; + + + + // + // Main Logic + // + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_INIT: begin + // + coeff_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0; + coeff_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero; + // + nn_bram_wr_reg <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? 1'b1 : 1'b0; + nn_bram_addr_reg <= coeff_bram_wr_reg ? coeff_bram_addr_next_or_zero : coeff_bram_addr_zero; + // + if (!coeff_bram_wr_reg) begin + // + modinv_ena_reg <= 1'b1; + modinv_n0_reg <= modulus_bram_out; + // + end + // + if (modulus_bram_addr_reg == modulus_bram_addr_zero) begin + // + if (!coeff_bram_wr_reg) + // + modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero; + // + end else begin + // + modulus_bram_addr_reg <= modulus_bram_addr_next_or_zero; + // + end + // + end + + FSM_STATE_SHIFT_READ: begin + // + coeff_bram_wr_reg <= 1'b1; + // + if (coeff_bram_addr_reg == coeff_bram_addr_zero) + // + reg_shift_carry <= 1'b0; + // + end + + FSM_STATE_SHIFT_WRITE: begin + // + coeff_bram_wr_reg <= 1'b0; + coeff_bram_addr_reg <= coeff_bram_addr_next_or_last; + // + reg_shift_carry <= coeff_bram_out[31]; + // + end + + FSM_STATE_COMPARE_COMPARE: begin + // + coeff_bram_addr_reg <= compare_done ? coeff_bram_addr_zero : coeff_bram_addr_prev_or_zero; + // + modulus_bram_addr_reg <= compare_done ? modulus_bram_addr_zero : ((coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_last : modulus_bram_addr_prev_or_zero); + // + end + + FSM_STATE_SUBTRACT_READ: begin + // + coeff_bram_wr_reg <= 1'b1; + // + if (coeff_bram_addr_reg == coeff_bram_addr_zero) + // + reg_subtractor_borrow <= 1'b0; + // + end + + FSM_STATE_SUBTRACT_WRITE: begin + // + coeff_bram_wr_reg <= 1'b0; + coeff_bram_addr_reg <= coeff_bram_addr_next_or_zero; + // + modulus_bram_addr_reg <= (coeff_bram_addr_reg == coeff_bram_addr_last) ? modulus_bram_addr_zero : modulus_bram_addr_next_or_zero; + // + reg_subtractor_borrow <= subtractor_borrow_out; + // + end + + FSM_STATE_ROUND: begin + // + round_count <= round_count_next; + // + end + + FSM_STATE_FINAL: begin + // + if (modinv_rdy) modinv_ena_reg <= 1'b0; + // + end + + endcase + + + // + // FSM Transition Logic + // + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_IDLE: fsm_state <= (!rdy_reg && !modinv_rdy && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE; + + FSM_STATE_SHIFT_READ: fsm_state <= FSM_STATE_SHIFT_WRITE; + FSM_STATE_COMPARE_READ: fsm_state <= FSM_STATE_COMPARE_COMPARE; + FSM_STATE_SUBTRACT_READ: fsm_state <= FSM_STATE_SUBTRACT_WRITE; + + FSM_STATE_INIT: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_INIT : FSM_STATE_SHIFT_READ; + FSM_STATE_SHIFT_WRITE: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SHIFT_READ : FSM_STATE_COMPARE_READ; + FSM_STATE_SUBTRACT_WRITE: fsm_state <= (coeff_bram_addr_reg < coeff_bram_addr_last) ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND; + + FSM_STATE_ROUND: fsm_state <= (round_count < round_count_last) ? FSM_STATE_SHIFT_READ : FSM_STATE_FINAL; + + FSM_STATE_COMPARE_COMPARE: fsm_state <= compare_done ? (compare_greater_or_equal ? FSM_STATE_SUBTRACT_READ : FSM_STATE_ROUND) : FSM_STATE_COMPARE_READ; + + FSM_STATE_FINAL: fsm_state <= modinv_rdy ? FSM_STATE_IDLE : FSM_STATE_FINAL; + + default: fsm_state <= FSM_STATE_IDLE; + + endcase + + +endmodule diff --git a/src/rtl/modexps6_montgomery_multiplier.v b/src/rtl/modexps6_montgomery_multiplier.v index f22f93d..14f32f8 100644 --- a/src/rtl/modexps6_montgomery_multiplier.v +++ b/src/rtl/modexps6_montgomery_multiplier.v @@ -1,392 +1,407 @@ -`timescale 1ns / 1ps - -module modexps6_montgomery_multiplier - ( - clk, - ena, rdy, - operand_width, - x_bram_addr, x_bram_out, - y_bram_addr, y_bram_out, - n_bram_addr, n_bram_out, - z_bram_addr, z_bram_wr, z_bram_in, z_bram_out, - n0_modinv - ); - - // - // Parameters - // - parameter OPERAND_NUM_BITS = 11; // 1024 -> 11 bits - parameter OPERAND_ADDR_WIDTH = 5; // 1024 / 32 = 32 -> 5 bits - - - // - // Locals - // - localparam [OPERAND_ADDR_WIDTH:0] round_count_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; - localparam [OPERAND_ADDR_WIDTH:0] bram_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; - - - // - // Ports - // - input wire clk; - - input wire ena; - output wire rdy; - - input wire [ OPERAND_NUM_BITS-1:0] operand_width; - - output wire [OPERAND_ADDR_WIDTH :0] x_bram_addr; - input wire [ 31:0] x_bram_out; - - output wire [OPERAND_ADDR_WIDTH :0] y_bram_addr; - input wire [ 31:0] y_bram_out; - - output wire [OPERAND_ADDR_WIDTH :0] n_bram_addr; - input wire [ 31:0] n_bram_out; - - output wire [OPERAND_ADDR_WIDTH :0] z_bram_addr; - output wire z_bram_wr; - output wire [ 31:0] z_bram_in; - input wire [ 31:0] z_bram_out; - - input wire [ 31:0] n0_modinv; - - - // - // FSM - // - localparam FSM_STATE_IDLE = 6'd0; - - localparam FSM_STATE_INIT = 6'd10; - - localparam FSM_STATE_MUL_XY_CALC = 6'd21; - localparam FSM_STATE_MUL_XY_PIPELINE = 6'd22; - localparam FSM_STATE_MUL_XY_REGISTER = 6'd23; - localparam FSM_STATE_MUL_XY_WRITE = 6'd24; - - localparam FSM_STATE_MAGIC_CALC = 6'd31; - localparam FSM_STATE_MAGIC_PIPELINE = 6'd32; - localparam FSM_STATE_MAGIC_REGISTER = 6'd33; - - localparam FSM_STATE_MUL_MN_CALC = 6'd41; - localparam FSM_STATE_MUL_MN_PIPELINE = 6'd42; - localparam FSM_STATE_MUL_MN_REGISTER = 6'd43; - localparam FSM_STATE_MUL_MN_WRITE = 6'd44; - - localparam FSM_STATE_SHIFT = 6'd50; - - localparam FSM_STATE_ROUND = 6'd55; - - localparam FSM_STATE_FINAL = 6'd60; - - reg [ 5: 0] fsm_state = FSM_STATE_IDLE; - - - // - // Trigger - // - reg ena_dly = 1'b0; - always @(posedge clk) ena_dly <= ena; - wire ena_trig = (ena == 1'b1) && (ena_dly == 1'b0); - - - // - // Ready Register - // - reg rdy_reg = 1'b0; - assign rdy = rdy_reg; - - - // - // Enable / Ready Logic - // - always @(posedge clk) - // - if (fsm_state == FSM_STATE_FINAL) begin - // - rdy_reg <= 1'b1; - // - end else if (fsm_state == FSM_STATE_IDLE) begin - // - if (rdy_reg && !ena) rdy_reg <= 1'b0; - // - end - - - // - // X, Y, N BRAM Interface - // - reg [OPERAND_ADDR_WIDTH:0] x_bram_addr_reg = bram_addr_zero; - reg [OPERAND_ADDR_WIDTH:0] y_bram_addr_reg = bram_addr_zero; - reg [OPERAND_ADDR_WIDTH:0] n_bram_addr_reg = bram_addr_zero; - - assign x_bram_addr = x_bram_addr_reg; - assign y_bram_addr = y_bram_addr_reg; - assign n_bram_addr = n_bram_addr_reg; - - - // - // Z BRAM Interface - // - reg [OPERAND_ADDR_WIDTH:0] z_bram_addr_reg = bram_addr_zero; - reg z_bram_wr_reg = 1'b0; - reg [ 31:0] z_bram_in_mux; - - assign z_bram_addr = z_bram_addr_reg; - assign z_bram_wr = z_bram_wr_reg; - assign z_bram_in = z_bram_in_mux; - - - // - // Handy Wires - // - wire [OPERAND_ADDR_WIDTH-1:0] operand_width_msb = operand_width[OPERAND_NUM_BITS-1:OPERAND_NUM_BITS-OPERAND_ADDR_WIDTH]; - - wire [OPERAND_ADDR_WIDTH :0] bram_addr_last = {operand_width_msb, 1'b1}; // +1 - - - // - // Hardware Multiplier (X * Y) - // - reg [31: 0] multiplier_xy_carry_in; - wire [31: 0] multiplier_xy_out; - wire [31: 0] multiplier_xy_carry_out; - - modexps6_adder64_carry32 dsp_multiplier_xy - ( - .clk (clk), - .t (/*(z_bram_addr_reg < bram_addr_last) ? */z_bram_out/* : {32{1'b0}}*/), - .x (/*(z_bram_addr_reg < bram_addr_last) ? */x_bram_out/* : {32{1'b0}}*/), - .y (/*(z_bram_addr_reg < bram_addr_last) ? */y_bram_out/* : {32{1'b0}}*/), - .s (multiplier_xy_out), - .c_in (multiplier_xy_carry_in), - .c_out (multiplier_xy_carry_out) - ); - - - // - // Hardware Multiplier (Magic) - // - wire [63: 0] multiplier_magic_out; - reg [31: 0] magic_value_reg; - - multiplier_s6 dsp_multiplier_magic - ( - .clk (clk), - .a (z_bram_out), - .b (n0_modinv), - .p (multiplier_magic_out) - ); - - - // - // Hardware Multiplier (M * N) - // - reg [31: 0] multiplier_mn_carry_in; - wire [31: 0] multiplier_mn_out; - wire [31: 0] multiplier_mn_carry_out; - - modexps6_adder64_carry32 dsp_multiplier_mn - ( - .clk (clk), - .t (z_bram_out), - .x (magic_value_reg), - .y (/*(z_bram_addr_reg < bram_addr_last) ? */n_bram_out/* : {32{1'b0}}*/), - .s (multiplier_mn_out), - .c_in (multiplier_mn_carry_in), - .c_out (multiplier_mn_carry_out) - ); - - - // - // Z BRAM Input Selector - // - always @(*) - // - case (fsm_state) - - FSM_STATE_INIT: - // - z_bram_in_mux = {32{1'b0}}; - - FSM_STATE_MUL_XY_WRITE: - // - if (z_bram_addr_reg < bram_addr_last) z_bram_in_mux = multiplier_xy_out; - else z_bram_in_mux = multiplier_xy_carry_in; - - FSM_STATE_MUL_MN_WRITE: - // - if (z_bram_addr_reg < bram_addr_last) z_bram_in_mux = multiplier_mn_out; - else z_bram_in_mux = multiplier_mn_carry_in + z_bram_out; - - FSM_STATE_SHIFT: - // - z_bram_in_mux = z_bram_out; - - default: - // - z_bram_in_mux = {32{1'bX}}; - - endcase - - - // - // Handy Functions - // - function [OPERAND_ADDR_WIDTH:0] bram_addr_next_or_zero; - input [OPERAND_ADDR_WIDTH:0] bram_addr; - begin - bram_addr_next_or_zero = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_zero; - end - endfunction - - function [OPERAND_ADDR_WIDTH:0] bram_addr_next_or_last; - input [OPERAND_ADDR_WIDTH:0] bram_addr; - begin - bram_addr_next_or_last = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_last; - end - endfunction - - function [OPERAND_ADDR_WIDTH:0] bram_addr_prev_or_zero; - input [OPERAND_ADDR_WIDTH:0] bram_addr; - begin - bram_addr_prev_or_zero = (bram_addr > bram_addr_zero) ? bram_addr - 1'b1 : bram_addr_zero; - end - endfunction - - - // - // Round Counter - // - reg [OPERAND_ADDR_WIDTH:0] round_count = round_count_zero; - wire [OPERAND_ADDR_WIDTH:0] round_count_last = {operand_width_msb, 1'b0}; - wire [OPERAND_ADDR_WIDTH:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero; - - - // - // Main Logic - // - always @(posedge clk) - // - case (fsm_state) - - FSM_STATE_INIT: begin - // - z_bram_wr_reg <= (z_bram_addr_reg < bram_addr_last) ? 1'b1 : 1'b0; - z_bram_addr_reg <= z_bram_wr_reg ? bram_addr_next_or_zero(z_bram_addr_reg) : bram_addr_zero; - // - end - - FSM_STATE_MUL_XY_CALC: begin - // - if (z_bram_addr_reg == bram_addr_zero) begin - // - multiplier_xy_carry_in <= {32{1'b0}}; - // - end - // - end - - FSM_STATE_MUL_XY_REGISTER: begin - // - z_bram_wr_reg <= 1'b1; - // - end - - FSM_STATE_MUL_XY_WRITE: begin - // - z_bram_wr_reg <= 1'b0; - z_bram_addr_reg <= bram_addr_next_or_zero(z_bram_addr_reg); - // - x_bram_addr_reg <= bram_addr_next_or_zero(x_bram_addr_reg); - // - multiplier_xy_carry_in <= multiplier_xy_carry_out; - // - end - - FSM_STATE_MUL_MN_CALC: begin - // - if (z_bram_addr_reg == bram_addr_zero) begin - // - multiplier_mn_carry_in <= {32{1'b0}}; - // - magic_value_reg <= multiplier_magic_out[31:0]; - // - end - // - end - - FSM_STATE_MUL_MN_REGISTER: begin - // - z_bram_wr_reg <= 1'b1; - // - end - - FSM_STATE_MUL_MN_WRITE: begin - // - z_bram_wr_reg <= 1'b0; - z_bram_addr_reg <= bram_addr_next_or_last(z_bram_addr_reg); - // - n_bram_addr_reg <= bram_addr_next_or_zero(n_bram_addr_reg); - // - multiplier_mn_carry_in <= multiplier_mn_carry_out; - // - end - - FSM_STATE_SHIFT: begin - // - if (z_bram_wr_reg == 1'b0) z_bram_wr_reg <= 1'b1; - else if (z_bram_addr_reg == bram_addr_zero) z_bram_wr_reg <= 1'b0; - - z_bram_addr_reg <= bram_addr_prev_or_zero(z_bram_addr_reg); - // - end - - FSM_STATE_ROUND: begin - // - y_bram_addr_reg <= (round_count < round_count_last) ? bram_addr_next_or_zero(y_bram_addr_reg) : bram_addr_zero; - // - round_count <= round_count_next; - // - end - - endcase - - - // - // FSM Transition Logic - // - always @(posedge clk) - // - case (fsm_state) - // - FSM_STATE_IDLE: fsm_state <= (!rdy_reg && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE; - - FSM_STATE_INIT: fsm_state <= (z_bram_addr < bram_addr_last ) ? FSM_STATE_INIT : FSM_STATE_MUL_XY_CALC; - FSM_STATE_ROUND: fsm_state <= (round_count < round_count_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_FINAL; - - FSM_STATE_MUL_XY_CALC: fsm_state <= FSM_STATE_MUL_XY_PIPELINE; - FSM_STATE_MAGIC_CALC: fsm_state <= FSM_STATE_MAGIC_PIPELINE; - FSM_STATE_MUL_MN_CALC: fsm_state <= FSM_STATE_MUL_MN_PIPELINE; - - FSM_STATE_MUL_XY_PIPELINE: fsm_state <= FSM_STATE_MUL_XY_REGISTER; - FSM_STATE_MAGIC_PIPELINE: fsm_state <= FSM_STATE_MAGIC_REGISTER; - FSM_STATE_MUL_MN_PIPELINE: fsm_state <= FSM_STATE_MUL_MN_REGISTER; - - FSM_STATE_MUL_XY_REGISTER: fsm_state <= FSM_STATE_MUL_XY_WRITE; - FSM_STATE_MAGIC_REGISTER: fsm_state <= FSM_STATE_MUL_MN_CALC; - FSM_STATE_MUL_MN_REGISTER: fsm_state <= FSM_STATE_MUL_MN_WRITE; - - FSM_STATE_MUL_XY_WRITE: fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_MAGIC_CALC; - FSM_STATE_MUL_MN_WRITE: fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_MN_CALC : FSM_STATE_SHIFT; - FSM_STATE_SHIFT: fsm_state <= (z_bram_addr > bram_addr_zero) ? FSM_STATE_SHIFT : FSM_STATE_ROUND; - - FSM_STATE_FINAL: fsm_state <= FSM_STATE_IDLE; - - default: fsm_state <= FSM_STATE_IDLE; - - endcase - - -endmodule +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexps6_montgomery_multiplier + #(parameter OPERAND_NUM_BITS = 11, // 1024 -> 11 bits + parameter OPERAND_ADDR_WIDTH = 5) // 1024 / 32 = 32 -> 5 bits + ( + input wire clk, + + input wire ena, + output wire rdy, + + input wire [OPERAND_NUM_BITS-1:0] operand_width, + + output wire [OPERAND_ADDR_WIDTH :0] x_bram_addr, + input wire [31:0] x_bram_out, + + output wire [OPERAND_ADDR_WIDTH :0] y_bram_addr, + input wire [31:0] y_bram_out, + + output wire [OPERAND_ADDR_WIDTH :0] n_bram_addr, + input wire [31:0] n_bram_out, + + output wire [OPERAND_ADDR_WIDTH :0] z_bram_addr, + output wire z_bram_wr, + output wire [31:0] z_bram_in, + input wire [31:0] z_bram_out, + + input wire [31:0] n0_modinv + ); + + + // + // Locals + // + localparam [OPERAND_ADDR_WIDTH:0] round_count_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; + localparam [OPERAND_ADDR_WIDTH:0] bram_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; + + + // + // FSM + // + localparam FSM_STATE_IDLE = 6'd0; + + localparam FSM_STATE_INIT = 6'd10; + + localparam FSM_STATE_MUL_XY_CALC = 6'd21; + localparam FSM_STATE_MUL_XY_PIPELINE = 6'd22; + localparam FSM_STATE_MUL_XY_REGISTER = 6'd23; + localparam FSM_STATE_MUL_XY_WRITE = 6'd24; + + localparam FSM_STATE_MAGIC_CALC = 6'd31; + localparam FSM_STATE_MAGIC_PIPELINE = 6'd32; + localparam FSM_STATE_MAGIC_REGISTER = 6'd33; + + localparam FSM_STATE_MUL_MN_CALC = 6'd41; + localparam FSM_STATE_MUL_MN_PIPELINE = 6'd42; + localparam FSM_STATE_MUL_MN_REGISTER = 6'd43; + localparam FSM_STATE_MUL_MN_WRITE = 6'd44; + + localparam FSM_STATE_SHIFT = 6'd50; + + localparam FSM_STATE_ROUND = 6'd55; + + localparam FSM_STATE_FINAL = 6'd60; + + reg [5: 0] fsm_state = FSM_STATE_IDLE; + + + // + // Trigger + // + reg ena_dly = 1'b0; + always @(posedge clk) ena_dly <= ena; + wire ena_trig = (ena == 1'b1) && (ena_dly == 1'b0); + + + // + // Ready Register + // + reg rdy_reg = 1'b0; + assign rdy = rdy_reg; + + + // + // Enable / Ready Logic + // + always @(posedge clk) + // + if (fsm_state == FSM_STATE_FINAL) begin + // + rdy_reg <= 1'b1; + // + end else if (fsm_state == FSM_STATE_IDLE) begin + // + if (rdy_reg && !ena) rdy_reg <= 1'b0; + // + end + + + // + // X, Y, N BRAM Interface + // + reg [OPERAND_ADDR_WIDTH:0] x_bram_addr_reg = bram_addr_zero; + reg [OPERAND_ADDR_WIDTH:0] y_bram_addr_reg = bram_addr_zero; + reg [OPERAND_ADDR_WIDTH:0] n_bram_addr_reg = bram_addr_zero; + + assign x_bram_addr = x_bram_addr_reg; + assign y_bram_addr = y_bram_addr_reg; + assign n_bram_addr = n_bram_addr_reg; + + + // + // Z BRAM Interface + // + reg [OPERAND_ADDR_WIDTH:0] z_bram_addr_reg = bram_addr_zero; + reg z_bram_wr_reg = 1'b0; + reg [ 31:0] z_bram_in_mux; + + assign z_bram_addr = z_bram_addr_reg; + assign z_bram_wr = z_bram_wr_reg; + assign z_bram_in = z_bram_in_mux; + + + // + // Handy Wires + // + wire [OPERAND_ADDR_WIDTH-1:0] operand_width_msb = operand_width[OPERAND_NUM_BITS-1:OPERAND_NUM_BITS-OPERAND_ADDR_WIDTH]; + + wire [OPERAND_ADDR_WIDTH :0] bram_addr_last = {operand_width_msb, 1'b1}; // +1 + + + // + // Hardware Multiplier (X * Y) + // + reg [31: 0] multiplier_xy_carry_in; + wire [31: 0] multiplier_xy_out; + wire [31: 0] multiplier_xy_carry_out; + + modexps6_adder64_carry32 dsp_multiplier_xy + ( + .clk (clk), + .t (/*(z_bram_addr_reg < bram_addr_last) ? */z_bram_out/* : {32{1'b0}}*/), + .x (/*(z_bram_addr_reg < bram_addr_last) ? */x_bram_out/* : {32{1'b0}}*/), + .y (/*(z_bram_addr_reg < bram_addr_last) ? */y_bram_out/* : {32{1'b0}}*/), + .s (multiplier_xy_out), + .c_in (multiplier_xy_carry_in), + .c_out (multiplier_xy_carry_out) + ); + + + // + // Hardware Multiplier (Magic) + // + wire [63: 0] multiplier_magic_out; + reg [31: 0] magic_value_reg; + + multiplier_s6 dsp_multiplier_magic + ( + .clk (clk), + .a (z_bram_out), + .b (n0_modinv), + .p (multiplier_magic_out) + ); + + + // + // Hardware Multiplier (M * N) + // + reg [31: 0] multiplier_mn_carry_in; + wire [31: 0] multiplier_mn_out; + wire [31: 0] multiplier_mn_carry_out; + + modexps6_adder64_carry32 dsp_multiplier_mn + ( + .clk (clk), + .t (z_bram_out), + .x (magic_value_reg), + .y (/*(z_bram_addr_reg < bram_addr_last) ? */n_bram_out/* : {32{1'b0}}*/), + .s (multiplier_mn_out), + .c_in (multiplier_mn_carry_in), + .c_out (multiplier_mn_carry_out) + ); + + + // + // Z BRAM Input Selector + // + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT: + // + z_bram_in_mux = {32{1'b0}}; + + FSM_STATE_MUL_XY_WRITE: + // + if (z_bram_addr_reg < bram_addr_last) z_bram_in_mux = multiplier_xy_out; + else z_bram_in_mux = multiplier_xy_carry_in; + + FSM_STATE_MUL_MN_WRITE: + // + if (z_bram_addr_reg < bram_addr_last) z_bram_in_mux = multiplier_mn_out; + else z_bram_in_mux = multiplier_mn_carry_in + z_bram_out; + + FSM_STATE_SHIFT: + // + z_bram_in_mux = z_bram_out; + + default: + // + z_bram_in_mux = {32{1'bX}}; + + endcase + + + // + // Handy Functions + // + function [OPERAND_ADDR_WIDTH:0] bram_addr_next_or_zero; + input [OPERAND_ADDR_WIDTH:0] bram_addr; + begin + bram_addr_next_or_zero = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_zero; + end + endfunction + + function [OPERAND_ADDR_WIDTH:0] bram_addr_next_or_last; + input [OPERAND_ADDR_WIDTH:0] bram_addr; + begin + bram_addr_next_or_last = (bram_addr < bram_addr_last) ? bram_addr + 1'b1 : bram_addr_last; + end + endfunction + + function [OPERAND_ADDR_WIDTH:0] bram_addr_prev_or_zero; + input [OPERAND_ADDR_WIDTH:0] bram_addr; + begin + bram_addr_prev_or_zero = (bram_addr > bram_addr_zero) ? bram_addr - 1'b1 : bram_addr_zero; + end + endfunction + + + // + // Round Counter + // + reg [OPERAND_ADDR_WIDTH:0] round_count = round_count_zero; + wire [OPERAND_ADDR_WIDTH:0] round_count_last = {operand_width_msb, 1'b0}; + wire [OPERAND_ADDR_WIDTH:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero; + + + // + // Main Logic + // + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_INIT: begin + // + z_bram_wr_reg <= (z_bram_addr_reg < bram_addr_last) ? 1'b1 : 1'b0; + z_bram_addr_reg <= z_bram_wr_reg ? bram_addr_next_or_zero(z_bram_addr_reg) : bram_addr_zero; + // + end + + FSM_STATE_MUL_XY_CALC: begin + // + if (z_bram_addr_reg == bram_addr_zero) begin + // + multiplier_xy_carry_in <= {32{1'b0}}; + // + end + // + end + + FSM_STATE_MUL_XY_REGISTER: begin + // + z_bram_wr_reg <= 1'b1; + // + end + + FSM_STATE_MUL_XY_WRITE: begin + // + z_bram_wr_reg <= 1'b0; + z_bram_addr_reg <= bram_addr_next_or_zero(z_bram_addr_reg); + // + x_bram_addr_reg <= bram_addr_next_or_zero(x_bram_addr_reg); + // + multiplier_xy_carry_in <= multiplier_xy_carry_out; + // + end + + FSM_STATE_MUL_MN_CALC: begin + // + if (z_bram_addr_reg == bram_addr_zero) begin + // + multiplier_mn_carry_in <= {32{1'b0}}; + // + magic_value_reg <= multiplier_magic_out[31:0]; + // + end + // + end + + FSM_STATE_MUL_MN_REGISTER: begin + // + z_bram_wr_reg <= 1'b1; + // + end + + FSM_STATE_MUL_MN_WRITE: begin + // + z_bram_wr_reg <= 1'b0; + z_bram_addr_reg <= bram_addr_next_or_last(z_bram_addr_reg); + // + n_bram_addr_reg <= bram_addr_next_or_zero(n_bram_addr_reg); + // + multiplier_mn_carry_in <= multiplier_mn_carry_out; + // + end + + FSM_STATE_SHIFT: begin + // + if (z_bram_wr_reg == 1'b0) z_bram_wr_reg <= 1'b1; + else if (z_bram_addr_reg == bram_addr_zero) z_bram_wr_reg <= 1'b0; + + z_bram_addr_reg <= bram_addr_prev_or_zero(z_bram_addr_reg); + // + end + + FSM_STATE_ROUND: begin + // + y_bram_addr_reg <= (round_count < round_count_last) ? bram_addr_next_or_zero(y_bram_addr_reg) : bram_addr_zero; + // + round_count <= round_count_next; + // + end + + endcase + + + // + // FSM Transition Logic + // + always @(posedge clk) + // + case (fsm_state) + // + FSM_STATE_IDLE: fsm_state <= (!rdy_reg && ena_trig) ? FSM_STATE_INIT : FSM_STATE_IDLE; + + FSM_STATE_INIT: fsm_state <= (z_bram_addr < bram_addr_last ) ? FSM_STATE_INIT : FSM_STATE_MUL_XY_CALC; + FSM_STATE_ROUND: fsm_state <= (round_count < round_count_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_FINAL; + + FSM_STATE_MUL_XY_CALC: fsm_state <= FSM_STATE_MUL_XY_PIPELINE; + FSM_STATE_MAGIC_CALC: fsm_state <= FSM_STATE_MAGIC_PIPELINE; + FSM_STATE_MUL_MN_CALC: fsm_state <= FSM_STATE_MUL_MN_PIPELINE; + + FSM_STATE_MUL_XY_PIPELINE: fsm_state <= FSM_STATE_MUL_XY_REGISTER; + FSM_STATE_MAGIC_PIPELINE: fsm_state <= FSM_STATE_MAGIC_REGISTER; + FSM_STATE_MUL_MN_PIPELINE: fsm_state <= FSM_STATE_MUL_MN_REGISTER; + + FSM_STATE_MUL_XY_REGISTER: fsm_state <= FSM_STATE_MUL_XY_WRITE; + FSM_STATE_MAGIC_REGISTER: fsm_state <= FSM_STATE_MUL_MN_CALC; + FSM_STATE_MUL_MN_REGISTER: fsm_state <= FSM_STATE_MUL_MN_WRITE; + + FSM_STATE_MUL_XY_WRITE: fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_XY_CALC : FSM_STATE_MAGIC_CALC; + FSM_STATE_MUL_MN_WRITE: fsm_state <= (z_bram_addr < bram_addr_last) ? FSM_STATE_MUL_MN_CALC : FSM_STATE_SHIFT; + FSM_STATE_SHIFT: fsm_state <= (z_bram_addr > bram_addr_zero) ? FSM_STATE_SHIFT : FSM_STATE_ROUND; + + FSM_STATE_FINAL: fsm_state <= FSM_STATE_IDLE; + + default: fsm_state <= FSM_STATE_IDLE; + + endcase + + +endmodule diff --git a/src/rtl/modexps6_top.v b/src/rtl/modexps6_top.v index 29845f8..d3c65a5 100644 --- a/src/rtl/modexps6_top.v +++ b/src/rtl/modexps6_top.v @@ -1,696 +1,706 @@ -`timescale 1ns / 1ps - -module modexps6_top - ( - clk, - - init, ready, - next, valid, - - modulus_width, - exponent_width, - - fast_public_mode, - - bus_cs, bus_we, - bus_addr, bus_data_wr, bus_data_rd - ); - - - // - // Parameters - // - parameter MAX_MODULUS_WIDTH = 1024; - - - // - // modexps6_clog2() - // - function integer modexps6_clog2; - input integer value; - integer ret; - begin - value = value - 1; - for (ret = 0; value > 0; ret = ret + 1) - value = value >> 1; - modexps6_clog2 = ret; - end - endfunction - - - // - // Locals - // - localparam OPERAND_ADDR_WIDTH = modexps6_clog2(MAX_MODULUS_WIDTH / 32); - localparam MODULUS_NUM_BITS = modexps6_clog2(MAX_MODULUS_WIDTH + 1); - localparam ADDR_WIDTH_TOTAL = OPERAND_ADDR_WIDTH + 2; - - localparam [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}}; - localparam [OPERAND_ADDR_WIDTH :0] bram_core_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; - - localparam [ MODULUS_NUM_BITS:0] round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}}; - - - // - // Ports - // - input wire clk; - - input wire init; - output wire ready; - - input wire next; - output wire valid; - - input wire [MODULUS_NUM_BITS-1:0] modulus_width; - input wire [MODULUS_NUM_BITS-1:0] exponent_width; - - input wire fast_public_mode; - - input wire bus_cs; - input wire bus_we; - input wire [ADDR_WIDTH_TOTAL-1:0] bus_addr; - input wire [ 31:0] bus_data_wr; - output wire [ 31:0] bus_data_rd; - - - // - // User Memory - // - wire [OPERAND_ADDR_WIDTH-1:0] ro_modulus_bram_addr; - wire [ 31:0] ro_modulus_bram_out; - - reg [OPERAND_ADDR_WIDTH-1:0] ro_message_bram_addr = bram_user_addr_zero; - wire [ 31:0] ro_message_bram_out; - - reg [OPERAND_ADDR_WIDTH-1:0] ro_exponent_bram_addr = bram_user_addr_zero; - wire [ 31:0] ro_exponent_bram_out; - - reg [OPERAND_ADDR_WIDTH-1:0] rw_result_bram_addr = bram_user_addr_zero; - wire [ 31:0] rw_result_bram_out; - reg rw_result_bram_wr = 1'b0; - wire [ 31:0] rw_result_bram_in; - - modexps6_buffer_user # - ( - .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) - ) - mem_user - ( - .clk (clk), - - .bus_cs (bus_cs), - .bus_we (bus_we), - .bus_addr (bus_addr), - .bus_data_wr (bus_data_wr), - .bus_data_rd (bus_data_rd), - - .ro_modulus_bram_addr (ro_modulus_bram_addr), - .ro_modulus_bram_out (ro_modulus_bram_out), - - .ro_message_bram_addr (ro_message_bram_addr), - .ro_message_bram_out (ro_message_bram_out), - - .ro_exponent_bram_addr (ro_exponent_bram_addr), - .ro_exponent_bram_out (ro_exponent_bram_out), - - .rw_result_bram_addr (rw_result_bram_addr), - .rw_result_bram_wr (rw_result_bram_wr), - .rw_result_bram_in (rw_result_bram_in) - ); - - - // - // Core (Internal) Memory - // - wire [OPERAND_ADDR_WIDTH:0] rw_coeff_bram_addr; - wire rw_coeff_bram_wr; - wire [ 31:0] rw_coeff_bram_in; - wire [ 31:0] rw_coeff_bram_out; - - reg [OPERAND_ADDR_WIDTH:0] rw_mm_bram_addr = bram_core_addr_zero; - reg rw_mm_bram_wr = 1'b0; - reg [ 31:0] rw_mm_bram_in; - wire [ 31:0] rw_mm_bram_out; - - wire [OPERAND_ADDR_WIDTH:0] rw_nn_bram_addr; - wire rw_nn_bram_wr; - wire [ 31:0] rw_nn_bram_in; - - reg [OPERAND_ADDR_WIDTH:0] rw_y_bram_addr = bram_core_addr_zero; - reg rw_y_bram_wr = 1'b0; - reg [ 31:0] rw_y_bram_in; - wire [ 31:0] rw_y_bram_out; - - wire [OPERAND_ADDR_WIDTH:0] rw_r_bram_addr; - wire rw_r_bram_wr; - wire [ 31:0] rw_r_bram_in; - wire [ 31:0] rw_r_bram_out; - - reg [OPERAND_ADDR_WIDTH:0] rw_t_bram_addr = bram_core_addr_zero; - reg rw_t_bram_wr = 1'b0; - reg [ 31:0] rw_t_bram_in; - wire [ 31:0] rw_t_bram_out; - - reg [OPERAND_ADDR_WIDTH:0] ro_coeff_bram_addr = bram_core_addr_zero; - wire [ 31:0] ro_coeff_bram_out; - - wire [OPERAND_ADDR_WIDTH:0] ro_mm_bram_addr; - wire [ 31:0] ro_mm_bram_out; - - wire [OPERAND_ADDR_WIDTH:0] ro_nn_bram_addr; - wire [ 31:0] ro_nn_bram_out; - - reg [OPERAND_ADDR_WIDTH:0] ro_r_bram_addr = bram_core_addr_zero; - wire [ 31:0] ro_r_bram_out; - - wire [OPERAND_ADDR_WIDTH:0] ro_t_bram_addr; - wire [ 31:0] ro_t_bram_out; - - modexps6_buffer_core # - ( - .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) - ) - mem_core - ( - .clk (clk), - - .rw_coeff_bram_addr (rw_coeff_bram_addr), - .rw_coeff_bram_wr (rw_coeff_bram_wr), - .rw_coeff_bram_in (rw_coeff_bram_in), - .rw_coeff_bram_out (rw_coeff_bram_out), - - .rw_mm_bram_addr (rw_mm_bram_addr), - .rw_mm_bram_wr (rw_mm_bram_wr), - .rw_mm_bram_in (rw_mm_bram_in), - .rw_mm_bram_out (rw_mm_bram_out), - - .rw_nn_bram_addr (rw_nn_bram_addr), - .rw_nn_bram_wr (rw_nn_bram_wr), - .rw_nn_bram_in (rw_nn_bram_in), - - .rw_y_bram_addr (rw_y_bram_addr), - .rw_y_bram_wr (rw_y_bram_wr), - .rw_y_bram_in (rw_y_bram_in), - .rw_y_bram_out (rw_y_bram_out), - - .rw_r_bram_addr (rw_r_bram_addr), - .rw_r_bram_wr (rw_r_bram_wr), - .rw_r_bram_in (rw_r_bram_in), - .rw_r_bram_out (rw_r_bram_out), - - .rw_t_bram_addr (rw_t_bram_addr), - .rw_t_bram_wr (rw_t_bram_wr), - .rw_t_bram_in (rw_t_bram_in), - .rw_t_bram_out (rw_t_bram_out), - - .ro_coeff_bram_addr (ro_coeff_bram_addr), - .ro_coeff_bram_out (ro_coeff_bram_out), - - .ro_mm_bram_addr (ro_mm_bram_addr), - .ro_mm_bram_out (ro_mm_bram_out), - - .ro_nn_bram_addr (ro_nn_bram_addr), - .ro_nn_bram_out (ro_nn_bram_out), - - .ro_r_bram_addr (ro_r_bram_addr), - .ro_r_bram_out (ro_r_bram_out), - - .ro_t_bram_addr (ro_t_bram_addr), - .ro_t_bram_out (ro_t_bram_out) - ); - - - // - // Small 32-bit ModInv Core - // - wire modinv_ena; - wire modinv_rdy; - - wire [31: 0] modinv_n0; - wire [31: 0] modinv_n0_negative = ~modinv_n0 + 1'b1; - wire [31: 0] modinv_n0_modinv; - - modexps6_modinv32 core_modinv32 - ( - .clk (clk), - - .ena (modinv_ena), - .rdy (modinv_rdy), - - .n0 (modinv_n0_negative), - .n0_modinv (modinv_n0_modinv) - ); - - - // - // Montgomery Coefficient Calculator - // - modexps6_montgomery_coeff # - ( - .MODULUS_NUM_BITS (MODULUS_NUM_BITS), - .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) - ) - core_montgomery_coeff - ( - .clk (clk), - - .ena (init), - .rdy (ready), - - .modulus_width (modulus_width), - - .coeff_bram_addr (rw_coeff_bram_addr), - .coeff_bram_wr (rw_coeff_bram_wr), - .coeff_bram_in (rw_coeff_bram_in), - .coeff_bram_out (rw_coeff_bram_out), - - .nn_bram_addr (rw_nn_bram_addr), - .nn_bram_wr (rw_nn_bram_wr), - .nn_bram_in (rw_nn_bram_in), - - .modulus_bram_addr (ro_modulus_bram_addr), - .modulus_bram_out (ro_modulus_bram_out), - - .modinv_n0 (modinv_n0), - .modinv_ena (modinv_ena), - .modinv_rdy (modinv_rdy) - ); - - - // - // Montgomery Multiplier - // - reg mul_ena = 1'b0; - wire mul_rdy; - - modexps6_montgomery_multiplier # - ( - .OPERAND_NUM_BITS (MODULUS_NUM_BITS), - .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) - ) - core_montgomery_multiplier - ( - .clk (clk), - - .ena (mul_ena), - .rdy (mul_rdy), - - .operand_width (modulus_width), - - .x_bram_addr (ro_t_bram_addr), - .x_bram_out (ro_t_bram_out), - - .y_bram_addr (ro_mm_bram_addr), - .y_bram_out (ro_mm_bram_out), - - .n_bram_addr (ro_nn_bram_addr), - .n_bram_out (ro_nn_bram_out), - - .z_bram_addr (rw_r_bram_addr), - .z_bram_wr (rw_r_bram_wr), - .z_bram_in (rw_r_bram_in), - .z_bram_out (rw_r_bram_out), - - .n0_modinv (modinv_n0_modinv) - ); - - - // - // FSM - // - localparam FSM_STATE_IDLE = 6'd0; - - localparam FSM_STATE_INIT_LOAD = 6'd11; - localparam FSM_STATE_INIT_WAIT = 6'd12; - localparam FSM_STATE_INIT_UNLOAD = 6'd13; - - localparam FSM_STATE_READ_EI = 6'd20; - - localparam FSM_STATE_ROUND_BEGIN = 6'd25; - - localparam FSM_STATE_MULTIPLY_LOAD = 6'd31; - localparam FSM_STATE_MULTIPLY_WAIT = 6'd32; - localparam FSM_STATE_MULTIPLY_UNLOAD = 6'd33; - - localparam FSM_STATE_SQUARE_LOAD = 6'd41; - localparam FSM_STATE_SQUARE_WAIT = 6'd42; - localparam FSM_STATE_SQUARE_UNLOAD = 6'd43; - - localparam FSM_STATE_ROUND_END = 6'd50; - - localparam FSM_STATE_FINAL = 6'd60; - - reg [ 5: 0] fsm_state = FSM_STATE_IDLE; - - - // - // Trigger - // - reg next_dly = 1'b0; - always @(posedge clk) next_dly <= next; - wire next_trig = (next == 1'b1) && (next_dly == 1'b0); - - - // - // Valid Register - // - reg valid_reg = 1'b0; - assign valid = valid_reg; - - - // - // Next/ Valid Logic - // - always @(posedge clk) - // - if (fsm_state == FSM_STATE_FINAL) begin - // - valid_reg <= 1'b1; - // - end else if (fsm_state == FSM_STATE_IDLE) begin - // - if (valid_reg && !next) valid_reg <= 1'b0; - // - end - - - // - // Exponent Bit Counter - // - reg [ 4: 0] ei_bit_count = 5'd0; - wire ei_bit = ro_exponent_bram_out[ei_bit_count]; - - - // - // Round Counter - // - reg [MODULUS_NUM_BITS:0] round_count = round_count_zero; - wire [MODULUS_NUM_BITS:0] round_count_last = exponent_width - 1'b1; - wire [MODULUS_NUM_BITS:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero; - - - // - // Handy Wires - // - wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH]; - - wire [OPERAND_ADDR_WIDTH :0] bram_core_addr_last = {modulus_width_msb, 1'b0}; - - wire [OPERAND_ADDR_WIDTH :0] bram_user_addr_last_ext = bram_core_addr_last - 1'b1; - wire [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_last = bram_user_addr_last_ext[OPERAND_ADDR_WIDTH-1:0]; - - - // - // Handy Functions - // - function [OPERAND_ADDR_WIDTH:0] bram_core_addr_next_or_zero; - input [OPERAND_ADDR_WIDTH:0] bram_core_addr; - begin - bram_core_addr_next_or_zero = (bram_core_addr < bram_core_addr_last) ? bram_core_addr + 1'b1 : bram_core_addr_zero; - end - endfunction - - function [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_next_or_zero; - input [OPERAND_ADDR_WIDTH-1:0] bram_user_addr; - begin - bram_user_addr_next_or_zero = (bram_user_addr < bram_user_addr_last) ? bram_user_addr + 1'b1 : bram_user_addr_zero; - end - endfunction - - - // - // Result BRAM Input - // - assign rw_result_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out; - - - // - // MM BRAM Input Selector - // - always @(*) - // - case (fsm_state) - - FSM_STATE_INIT_LOAD: - // - rw_mm_bram_in = (rw_mm_bram_addr < bram_core_addr_last) ? ro_message_bram_out : {32{1'b0}}; - - FSM_STATE_INIT_UNLOAD: - // - rw_mm_bram_in = ro_r_bram_out; - - FSM_STATE_SQUARE_UNLOAD: - // - rw_mm_bram_in = ro_r_bram_out; - - default: - // - rw_mm_bram_in = {32{1'bX}}; - - endcase - - - // - // Y BRAM Input Selector - // - always @(*) - // - case (fsm_state) - - FSM_STATE_INIT_LOAD: - // - rw_y_bram_in = (rw_mm_bram_addr == bram_core_addr_zero) ? 32'h00000001 : 32'h00000000; - - FSM_STATE_MULTIPLY_UNLOAD: - // - rw_y_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out; // RW! - - default: - // - rw_y_bram_in = {32{1'bX}}; - - endcase - - - // - // T BRAM Input Selector - // - always @(*) - // - case (fsm_state) - - FSM_STATE_INIT_LOAD: - // - rw_t_bram_in = ro_coeff_bram_out; - - FSM_STATE_MULTIPLY_LOAD: - // - rw_t_bram_in = rw_y_bram_out; - - FSM_STATE_SQUARE_LOAD: - // - rw_t_bram_in = rw_mm_bram_out; - - default: - // - rw_t_bram_in = {32{1'bX}}; - - endcase - - - // - // Main Logic - // - always @(posedge clk) - // - case (fsm_state) - - FSM_STATE_INIT_LOAD: begin - // - rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; - rw_y_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; - rw_t_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; - // - rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; - rw_y_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; - rw_t_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; - // - if (ro_coeff_bram_addr > bram_core_addr_zero) ro_coeff_bram_addr <= bram_core_addr_next_or_zero(ro_coeff_bram_addr); - else ro_coeff_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_coeff_bram_addr); - // - if (ro_message_bram_addr > bram_user_addr_zero) ro_message_bram_addr <= bram_user_addr_next_or_zero(ro_message_bram_addr); - else ro_message_bram_addr <= rw_mm_bram_wr ? bram_user_addr_zero : bram_user_addr_next_or_zero(ro_message_bram_addr); - // - end - - FSM_STATE_INIT_WAIT: begin - // - if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1; - else mul_ena <= 1'b1; - // - end - - FSM_STATE_INIT_UNLOAD: begin - // - rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; - // - rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; - // - if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr); - else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr); - // - end - - FSM_STATE_MULTIPLY_LOAD: begin - // - rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; - // - rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero; - // - if (rw_y_bram_addr > bram_core_addr_zero) rw_y_bram_addr <= bram_core_addr_next_or_zero(rw_y_bram_addr); - else rw_y_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_y_bram_addr); - // - end - - FSM_STATE_MULTIPLY_WAIT: begin - // - if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1; - else mul_ena <= 1'b1; - // - end - - FSM_STATE_MULTIPLY_UNLOAD: begin - // - rw_y_bram_wr <= (rw_y_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; - // - rw_y_bram_addr <= rw_y_bram_wr ? bram_core_addr_next_or_zero(rw_y_bram_addr) : bram_core_addr_zero; - // - if (ei_bit) begin - // - if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr); - else ro_r_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr); - // - end else begin - // - if (rw_t_bram_addr > bram_core_addr_zero) rw_t_bram_addr <= bram_core_addr_next_or_zero(rw_t_bram_addr); - else rw_t_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_t_bram_addr); - // - end - // - if (round_count == round_count_last) begin - // - if (rw_result_bram_addr == bram_user_addr_zero) begin - // - if (rw_y_bram_wr) begin - // - rw_result_bram_wr <= (rw_y_bram_addr > bram_core_addr_zero) ? 1'b0 : 1'b1; - rw_result_bram_addr <= (rw_y_bram_addr > bram_core_addr_zero) ? bram_user_addr_zero : bram_user_addr_next_or_zero(rw_result_bram_addr); - // - end else begin - // - rw_result_bram_wr <= 1'b1; - rw_result_bram_addr <= bram_user_addr_zero; - // - end - // - end else begin - // - rw_result_bram_wr <= (rw_result_bram_addr < bram_user_addr_last) ? 1'b1 : 1'b0; - rw_result_bram_addr <= bram_user_addr_next_or_zero(rw_result_bram_addr); - // - end - // - end - // - end - - FSM_STATE_SQUARE_LOAD: begin - // - rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; - // - rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero; - // - if (rw_mm_bram_addr > bram_core_addr_zero) rw_mm_bram_addr <= bram_core_addr_next_or_zero(rw_mm_bram_addr); - else rw_mm_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_mm_bram_addr); - // - end - - FSM_STATE_SQUARE_WAIT: begin - // - if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1; - else mul_ena <= 1'b1; - // - end - - FSM_STATE_SQUARE_UNLOAD: begin - // - rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; - // - rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; - // - if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr); - else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr); - // - end - - FSM_STATE_ROUND_END: begin - // - round_count <= round_count_next; - // - if (round_count < round_count_last) begin - // - ei_bit_count <= ei_bit_count + 1'b1; - // - if (ei_bit_count == 5'd31) - // - ro_exponent_bram_addr <= bram_user_addr_next_or_zero(ro_exponent_bram_addr); - // - end else begin - // - ei_bit_count <= 5'd0; - // - ro_exponent_bram_addr <= bram_user_addr_zero; - // - end - // - end - - endcase - - - // - // FSM Transition Logic - // - always @(posedge clk) - // - case (fsm_state) - - FSM_STATE_IDLE: fsm_state <= (!valid_reg && next_trig) ? FSM_STATE_INIT_LOAD : FSM_STATE_IDLE; - - FSM_STATE_INIT_LOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_LOAD : FSM_STATE_INIT_WAIT; - FSM_STATE_INIT_WAIT: fsm_state <= mul_rdy ? FSM_STATE_INIT_UNLOAD : FSM_STATE_INIT_WAIT; - FSM_STATE_INIT_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_UNLOAD : FSM_STATE_READ_EI; - - FSM_STATE_READ_EI: fsm_state <= FSM_STATE_ROUND_BEGIN; - - FSM_STATE_ROUND_BEGIN: fsm_state <= (!ei_bit && fast_public_mode && (round_count < round_count_last)) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_MULTIPLY_LOAD; - - FSM_STATE_MULTIPLY_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_LOAD : FSM_STATE_MULTIPLY_WAIT; - FSM_STATE_MULTIPLY_WAIT: fsm_state <= mul_rdy ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_MULTIPLY_WAIT; - FSM_STATE_MULTIPLY_UNLOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_SQUARE_LOAD; - - FSM_STATE_SQUARE_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_SQUARE_WAIT; - FSM_STATE_SQUARE_WAIT: fsm_state <= mul_rdy ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_SQUARE_WAIT; - FSM_STATE_SQUARE_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_ROUND_END; - - FSM_STATE_ROUND_END: fsm_state <= (round_count < round_count_last) ? FSM_STATE_READ_EI : FSM_STATE_FINAL; - - FSM_STATE_FINAL: fsm_state <= FSM_STATE_IDLE; - - default: fsm_state <= FSM_STATE_IDLE; - - endcase - - -endmodule +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module modexps6_top + #(parameter MAX_MODULUS_WIDTH = 1024) + ( + input wire clk, + + input wire init, + output wire ready, + + input wire next, + output wire valid, + + input wire [MODULUS_NUM_BITS-1:0] modulus_width, + input wire [MODULUS_NUM_BITS-1:0] exponent_width, + + input wire fast_public_mode, + + input wire bus_cs, + input wire bus_we, + input wire [ADDR_WIDTH_TOTAL-1:0] bus_addr, + input wire [31:0] bus_data_wr, + output wire [31:0] bus_data_rd + ); + + + // + // modexps6_clog2() + // + function integer modexps6_clog2; + input integer value; + integer ret; + begin + value = value - 1; + for (ret = 0; value > 0; ret = ret + 1) + value = value >> 1; + modexps6_clog2 = ret; + end + endfunction + + + // + // Locals + // + localparam OPERAND_ADDR_WIDTH = modexps6_clog2(MAX_MODULUS_WIDTH / 32); + localparam MODULUS_NUM_BITS = modexps6_clog2(MAX_MODULUS_WIDTH + 1); + localparam ADDR_WIDTH_TOTAL = OPERAND_ADDR_WIDTH + 2; + + localparam [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_zero = {OPERAND_ADDR_WIDTH{1'b0}}; + localparam [OPERAND_ADDR_WIDTH :0] bram_core_addr_zero = {1'b0, {OPERAND_ADDR_WIDTH{1'b0}}}; + + localparam [ MODULUS_NUM_BITS:0] round_count_zero = {1'b0, {MODULUS_NUM_BITS{1'b0}}}; + + + // + // User Memory + // + wire [OPERAND_ADDR_WIDTH-1:0] ro_modulus_bram_addr; + wire [ 31:0] ro_modulus_bram_out; + + reg [OPERAND_ADDR_WIDTH-1:0] ro_message_bram_addr = bram_user_addr_zero; + wire [ 31:0] ro_message_bram_out; + + reg [OPERAND_ADDR_WIDTH-1:0] ro_exponent_bram_addr = bram_user_addr_zero; + wire [ 31:0] ro_exponent_bram_out; + + reg [OPERAND_ADDR_WIDTH-1:0] rw_result_bram_addr = bram_user_addr_zero; + wire [ 31:0] rw_result_bram_out; + reg rw_result_bram_wr = 1'b0; + wire [ 31:0] rw_result_bram_in; + + modexps6_buffer_user # + ( + .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) + ) + mem_user + ( + .clk (clk), + + .bus_cs (bus_cs), + .bus_we (bus_we), + .bus_addr (bus_addr), + .bus_data_wr (bus_data_wr), + .bus_data_rd (bus_data_rd), + + .ro_modulus_bram_addr (ro_modulus_bram_addr), + .ro_modulus_bram_out (ro_modulus_bram_out), + + .ro_message_bram_addr (ro_message_bram_addr), + .ro_message_bram_out (ro_message_bram_out), + + .ro_exponent_bram_addr (ro_exponent_bram_addr), + .ro_exponent_bram_out (ro_exponent_bram_out), + + .rw_result_bram_addr (rw_result_bram_addr), + .rw_result_bram_wr (rw_result_bram_wr), + .rw_result_bram_in (rw_result_bram_in) + ); + + + // + // Core (Internal) Memory + // + wire [OPERAND_ADDR_WIDTH:0] rw_coeff_bram_addr; + wire rw_coeff_bram_wr; + wire [ 31:0] rw_coeff_bram_in; + wire [ 31:0] rw_coeff_bram_out; + + reg [OPERAND_ADDR_WIDTH:0] rw_mm_bram_addr = bram_core_addr_zero; + reg rw_mm_bram_wr = 1'b0; + reg [ 31:0] rw_mm_bram_in; + wire [ 31:0] rw_mm_bram_out; + + wire [OPERAND_ADDR_WIDTH:0] rw_nn_bram_addr; + wire rw_nn_bram_wr; + wire [ 31:0] rw_nn_bram_in; + + reg [OPERAND_ADDR_WIDTH:0] rw_y_bram_addr = bram_core_addr_zero; + reg rw_y_bram_wr = 1'b0; + reg [ 31:0] rw_y_bram_in; + wire [ 31:0] rw_y_bram_out; + + wire [OPERAND_ADDR_WIDTH:0] rw_r_bram_addr; + wire rw_r_bram_wr; + wire [ 31:0] rw_r_bram_in; + wire [ 31:0] rw_r_bram_out; + + reg [OPERAND_ADDR_WIDTH:0] rw_t_bram_addr = bram_core_addr_zero; + reg rw_t_bram_wr = 1'b0; + reg [ 31:0] rw_t_bram_in; + wire [ 31:0] rw_t_bram_out; + + reg [OPERAND_ADDR_WIDTH:0] ro_coeff_bram_addr = bram_core_addr_zero; + wire [ 31:0] ro_coeff_bram_out; + + wire [OPERAND_ADDR_WIDTH:0] ro_mm_bram_addr; + wire [ 31:0] ro_mm_bram_out; + + wire [OPERAND_ADDR_WIDTH:0] ro_nn_bram_addr; + wire [ 31:0] ro_nn_bram_out; + + reg [OPERAND_ADDR_WIDTH:0] ro_r_bram_addr = bram_core_addr_zero; + wire [ 31:0] ro_r_bram_out; + + wire [OPERAND_ADDR_WIDTH:0] ro_t_bram_addr; + wire [ 31:0] ro_t_bram_out; + + modexps6_buffer_core # + ( + .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) + ) + mem_core + ( + .clk (clk), + + .rw_coeff_bram_addr (rw_coeff_bram_addr), + .rw_coeff_bram_wr (rw_coeff_bram_wr), + .rw_coeff_bram_in (rw_coeff_bram_in), + .rw_coeff_bram_out (rw_coeff_bram_out), + + .rw_mm_bram_addr (rw_mm_bram_addr), + .rw_mm_bram_wr (rw_mm_bram_wr), + .rw_mm_bram_in (rw_mm_bram_in), + .rw_mm_bram_out (rw_mm_bram_out), + + .rw_nn_bram_addr (rw_nn_bram_addr), + .rw_nn_bram_wr (rw_nn_bram_wr), + .rw_nn_bram_in (rw_nn_bram_in), + + .rw_y_bram_addr (rw_y_bram_addr), + .rw_y_bram_wr (rw_y_bram_wr), + .rw_y_bram_in (rw_y_bram_in), + .rw_y_bram_out (rw_y_bram_out), + + .rw_r_bram_addr (rw_r_bram_addr), + .rw_r_bram_wr (rw_r_bram_wr), + .rw_r_bram_in (rw_r_bram_in), + .rw_r_bram_out (rw_r_bram_out), + + .rw_t_bram_addr (rw_t_bram_addr), + .rw_t_bram_wr (rw_t_bram_wr), + .rw_t_bram_in (rw_t_bram_in), + .rw_t_bram_out (rw_t_bram_out), + + .ro_coeff_bram_addr (ro_coeff_bram_addr), + .ro_coeff_bram_out (ro_coeff_bram_out), + + .ro_mm_bram_addr (ro_mm_bram_addr), + .ro_mm_bram_out (ro_mm_bram_out), + + .ro_nn_bram_addr (ro_nn_bram_addr), + .ro_nn_bram_out (ro_nn_bram_out), + + .ro_r_bram_addr (ro_r_bram_addr), + .ro_r_bram_out (ro_r_bram_out), + + .ro_t_bram_addr (ro_t_bram_addr), + .ro_t_bram_out (ro_t_bram_out) + ); + + + // + // Small 32-bit ModInv Core + // + wire modinv_ena; + wire modinv_rdy; + + wire [31: 0] modinv_n0; + wire [31: 0] modinv_n0_negative = ~modinv_n0 + 1'b1; + wire [31: 0] modinv_n0_modinv; + + modexps6_modinv32 core_modinv32 + ( + .clk (clk), + + .ena (modinv_ena), + .rdy (modinv_rdy), + + .n0 (modinv_n0_negative), + .n0_modinv (modinv_n0_modinv) + ); + + + // + // Montgomery Coefficient Calculator + // + modexps6_montgomery_coeff # + ( + .MODULUS_NUM_BITS (MODULUS_NUM_BITS), + .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) + ) + core_montgomery_coeff + ( + .clk (clk), + + .ena (init), + .rdy (ready), + + .modulus_width (modulus_width), + + .coeff_bram_addr (rw_coeff_bram_addr), + .coeff_bram_wr (rw_coeff_bram_wr), + .coeff_bram_in (rw_coeff_bram_in), + .coeff_bram_out (rw_coeff_bram_out), + + .nn_bram_addr (rw_nn_bram_addr), + .nn_bram_wr (rw_nn_bram_wr), + .nn_bram_in (rw_nn_bram_in), + + .modulus_bram_addr (ro_modulus_bram_addr), + .modulus_bram_out (ro_modulus_bram_out), + + .modinv_n0 (modinv_n0), + .modinv_ena (modinv_ena), + .modinv_rdy (modinv_rdy) + ); + + + // + // Montgomery Multiplier + // + reg mul_ena = 1'b0; + wire mul_rdy; + + modexps6_montgomery_multiplier # + ( + .OPERAND_NUM_BITS (MODULUS_NUM_BITS), + .OPERAND_ADDR_WIDTH (OPERAND_ADDR_WIDTH) + ) + core_montgomery_multiplier + ( + .clk (clk), + + .ena (mul_ena), + .rdy (mul_rdy), + + .operand_width (modulus_width), + + .x_bram_addr (ro_t_bram_addr), + .x_bram_out (ro_t_bram_out), + + .y_bram_addr (ro_mm_bram_addr), + .y_bram_out (ro_mm_bram_out), + + .n_bram_addr (ro_nn_bram_addr), + .n_bram_out (ro_nn_bram_out), + + .z_bram_addr (rw_r_bram_addr), + .z_bram_wr (rw_r_bram_wr), + .z_bram_in (rw_r_bram_in), + .z_bram_out (rw_r_bram_out), + + .n0_modinv (modinv_n0_modinv) + ); + + + // + // FSM + // + localparam FSM_STATE_IDLE = 6'd0; + + localparam FSM_STATE_INIT_LOAD = 6'd11; + localparam FSM_STATE_INIT_WAIT = 6'd12; + localparam FSM_STATE_INIT_UNLOAD = 6'd13; + + localparam FSM_STATE_READ_EI = 6'd20; + + localparam FSM_STATE_ROUND_BEGIN = 6'd25; + + localparam FSM_STATE_MULTIPLY_LOAD = 6'd31; + localparam FSM_STATE_MULTIPLY_WAIT = 6'd32; + localparam FSM_STATE_MULTIPLY_UNLOAD = 6'd33; + + localparam FSM_STATE_SQUARE_LOAD = 6'd41; + localparam FSM_STATE_SQUARE_WAIT = 6'd42; + localparam FSM_STATE_SQUARE_UNLOAD = 6'd43; + + localparam FSM_STATE_ROUND_END = 6'd50; + + localparam FSM_STATE_FINAL = 6'd60; + + reg [5: 0] fsm_state = FSM_STATE_IDLE; + + + // + // Trigger + // + reg next_dly = 1'b0; + always @(posedge clk) next_dly <= next; + wire next_trig = (next == 1'b1) && (next_dly == 1'b0); + + + // + // Valid Register + // + reg valid_reg = 1'b0; + assign valid = valid_reg; + + + // + // Next/ Valid Logic + // + always @(posedge clk) + // + if (fsm_state == FSM_STATE_FINAL) begin + // + valid_reg <= 1'b1; + // + end else if (fsm_state == FSM_STATE_IDLE) begin + // + if (valid_reg && !next) valid_reg <= 1'b0; + // + end + + + // + // Exponent Bit Counter + // + reg [4: 0] ei_bit_count = 5'd0; + wire ei_bit = ro_exponent_bram_out[ei_bit_count]; + + + // + // Round Counter + // + reg [MODULUS_NUM_BITS:0] round_count = round_count_zero; + wire [MODULUS_NUM_BITS:0] round_count_last = exponent_width - 1'b1; + wire [MODULUS_NUM_BITS:0] round_count_next = (round_count < round_count_last) ? round_count + 1'b1 : round_count_zero; + + + // + // Handy Wires + // + wire [OPERAND_ADDR_WIDTH-1:0] modulus_width_msb = modulus_width[MODULUS_NUM_BITS-1:MODULUS_NUM_BITS-OPERAND_ADDR_WIDTH]; + + wire [OPERAND_ADDR_WIDTH :0] bram_core_addr_last = {modulus_width_msb, 1'b0}; + + wire [OPERAND_ADDR_WIDTH :0] bram_user_addr_last_ext = bram_core_addr_last - 1'b1; + wire [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_last = bram_user_addr_last_ext[OPERAND_ADDR_WIDTH-1:0]; + + + // + // Handy Functions + // + function [OPERAND_ADDR_WIDTH:0] bram_core_addr_next_or_zero; + input [OPERAND_ADDR_WIDTH:0] bram_core_addr; + begin + bram_core_addr_next_or_zero = (bram_core_addr < bram_core_addr_last) ? bram_core_addr + 1'b1 : bram_core_addr_zero; + end + endfunction + + function [OPERAND_ADDR_WIDTH-1:0] bram_user_addr_next_or_zero; + input [OPERAND_ADDR_WIDTH-1:0] bram_user_addr; + begin + bram_user_addr_next_or_zero = (bram_user_addr < bram_user_addr_last) ? bram_user_addr + 1'b1 : bram_user_addr_zero; + end + endfunction + + + // + // Result BRAM Input + // + assign rw_result_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out; + + + // + // MM BRAM Input Selector + // + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT_LOAD: + // + rw_mm_bram_in = (rw_mm_bram_addr < bram_core_addr_last) ? ro_message_bram_out : {32{1'b0}}; + + FSM_STATE_INIT_UNLOAD: + // + rw_mm_bram_in = ro_r_bram_out; + + FSM_STATE_SQUARE_UNLOAD: + // + rw_mm_bram_in = ro_r_bram_out; + + default: + // + rw_mm_bram_in = {32{1'bX}}; + + endcase + + + // + // Y BRAM Input Selector + // + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT_LOAD: + // + rw_y_bram_in = (rw_mm_bram_addr == bram_core_addr_zero) ? 32'h00000001 : 32'h00000000; + + FSM_STATE_MULTIPLY_UNLOAD: + // + rw_y_bram_in = ei_bit ? ro_r_bram_out : rw_t_bram_out; // RW! + + default: + // + rw_y_bram_in = {32{1'bX}}; + + endcase + + + // + // T BRAM Input Selector + // + always @(*) + // + case (fsm_state) + + FSM_STATE_INIT_LOAD: + // + rw_t_bram_in = ro_coeff_bram_out; + + FSM_STATE_MULTIPLY_LOAD: + // + rw_t_bram_in = rw_y_bram_out; + + FSM_STATE_SQUARE_LOAD: + // + rw_t_bram_in = rw_mm_bram_out; + + default: + // + rw_t_bram_in = {32{1'bX}}; + + endcase + + + // + // Main Logic + // + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_INIT_LOAD: begin + // + rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + rw_y_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + rw_t_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; + rw_y_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; + rw_t_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; + // + if (ro_coeff_bram_addr > bram_core_addr_zero) ro_coeff_bram_addr <= bram_core_addr_next_or_zero(ro_coeff_bram_addr); + else ro_coeff_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_coeff_bram_addr); + // + if (ro_message_bram_addr > bram_user_addr_zero) ro_message_bram_addr <= bram_user_addr_next_or_zero(ro_message_bram_addr); + else ro_message_bram_addr <= rw_mm_bram_wr ? bram_user_addr_zero : bram_user_addr_next_or_zero(ro_message_bram_addr); + // + end + + FSM_STATE_INIT_WAIT: begin + // + if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1; + else mul_ena <= 1'b1; + // + end + + FSM_STATE_INIT_UNLOAD: begin + // + rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; + // + if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr); + else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr); + // + end + + FSM_STATE_MULTIPLY_LOAD: begin + // + rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero; + // + if (rw_y_bram_addr > bram_core_addr_zero) rw_y_bram_addr <= bram_core_addr_next_or_zero(rw_y_bram_addr); + else rw_y_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_y_bram_addr); + // + end + + FSM_STATE_MULTIPLY_WAIT: begin + // + if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1; + else mul_ena <= 1'b1; + // + end + + FSM_STATE_MULTIPLY_UNLOAD: begin + // + rw_y_bram_wr <= (rw_y_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_y_bram_addr <= rw_y_bram_wr ? bram_core_addr_next_or_zero(rw_y_bram_addr) : bram_core_addr_zero; + // + if (ei_bit) begin + // + if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr); + else ro_r_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr); + // + end else begin + // + if (rw_t_bram_addr > bram_core_addr_zero) rw_t_bram_addr <= bram_core_addr_next_or_zero(rw_t_bram_addr); + else rw_t_bram_addr <= rw_y_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_t_bram_addr); + // + end + // + if (round_count == round_count_last) begin + // + if (rw_result_bram_addr == bram_user_addr_zero) begin + // + if (rw_y_bram_wr) begin + // + rw_result_bram_wr <= (rw_y_bram_addr > bram_core_addr_zero) ? 1'b0 : 1'b1; + rw_result_bram_addr <= (rw_y_bram_addr > bram_core_addr_zero) ? bram_user_addr_zero : bram_user_addr_next_or_zero(rw_result_bram_addr); + // + end else begin + // + rw_result_bram_wr <= 1'b1; + rw_result_bram_addr <= bram_user_addr_zero; + // + end + // + end else begin + // + rw_result_bram_wr <= (rw_result_bram_addr < bram_user_addr_last) ? 1'b1 : 1'b0; + rw_result_bram_addr <= bram_user_addr_next_or_zero(rw_result_bram_addr); + // + end + // + end + // + end + + FSM_STATE_SQUARE_LOAD: begin + // + rw_t_bram_wr <= (rw_t_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_t_bram_addr <= rw_t_bram_wr ? bram_core_addr_next_or_zero(rw_t_bram_addr) : bram_core_addr_zero; + // + if (rw_mm_bram_addr > bram_core_addr_zero) rw_mm_bram_addr <= bram_core_addr_next_or_zero(rw_mm_bram_addr); + else rw_mm_bram_addr <= rw_t_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(rw_mm_bram_addr); + // + end + + FSM_STATE_SQUARE_WAIT: begin + // + if (mul_ena) mul_ena <= mul_rdy ? 1'b0 : 1'b1; + else mul_ena <= 1'b1; + // + end + + FSM_STATE_SQUARE_UNLOAD: begin + // + rw_mm_bram_wr <= (rw_mm_bram_addr < bram_core_addr_last) ? 1'b1 : 1'b0; + // + rw_mm_bram_addr <= rw_mm_bram_wr ? bram_core_addr_next_or_zero(rw_mm_bram_addr) : bram_core_addr_zero; + // + if (ro_r_bram_addr > bram_core_addr_zero) ro_r_bram_addr <= bram_core_addr_next_or_zero(ro_r_bram_addr); + else ro_r_bram_addr <= rw_mm_bram_wr ? bram_core_addr_zero : bram_core_addr_next_or_zero(ro_r_bram_addr); + // + end + + FSM_STATE_ROUND_END: begin + // + round_count <= round_count_next; + // + if (round_count < round_count_last) begin + // + ei_bit_count <= ei_bit_count + 1'b1; + // + if (ei_bit_count == 5'd31) + // + ro_exponent_bram_addr <= bram_user_addr_next_or_zero(ro_exponent_bram_addr); + // + end else begin + // + ei_bit_count <= 5'd0; + // + ro_exponent_bram_addr <= bram_user_addr_zero; + // + end + // + end + + endcase + + + // + // FSM Transition Logic + // + always @(posedge clk) + // + case (fsm_state) + + FSM_STATE_IDLE: fsm_state <= (!valid_reg && next_trig) ? FSM_STATE_INIT_LOAD : FSM_STATE_IDLE; + + FSM_STATE_INIT_LOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_LOAD : FSM_STATE_INIT_WAIT; + FSM_STATE_INIT_WAIT: fsm_state <= mul_rdy ? FSM_STATE_INIT_UNLOAD : FSM_STATE_INIT_WAIT; + FSM_STATE_INIT_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_INIT_UNLOAD : FSM_STATE_READ_EI; + + FSM_STATE_READ_EI: fsm_state <= FSM_STATE_ROUND_BEGIN; + + FSM_STATE_ROUND_BEGIN: fsm_state <= (!ei_bit && fast_public_mode && (round_count < round_count_last)) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_MULTIPLY_LOAD; + + FSM_STATE_MULTIPLY_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_LOAD : FSM_STATE_MULTIPLY_WAIT; + FSM_STATE_MULTIPLY_WAIT: fsm_state <= mul_rdy ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_MULTIPLY_WAIT; + FSM_STATE_MULTIPLY_UNLOAD: fsm_state <= (rw_y_bram_addr < bram_core_addr_last) ? FSM_STATE_MULTIPLY_UNLOAD : FSM_STATE_SQUARE_LOAD; + + FSM_STATE_SQUARE_LOAD: fsm_state <= (rw_t_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_LOAD : FSM_STATE_SQUARE_WAIT; + FSM_STATE_SQUARE_WAIT: fsm_state <= mul_rdy ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_SQUARE_WAIT; + FSM_STATE_SQUARE_UNLOAD: fsm_state <= (rw_mm_bram_addr < bram_core_addr_last) ? FSM_STATE_SQUARE_UNLOAD : FSM_STATE_ROUND_END; + + FSM_STATE_ROUND_END: fsm_state <= (round_count < round_count_last) ? FSM_STATE_READ_EI : FSM_STATE_FINAL; + + FSM_STATE_FINAL: fsm_state <= FSM_STATE_IDLE; + + default: fsm_state <= FSM_STATE_IDLE; + + endcase + + +endmodule diff --git a/src/rtl/modexps6_wrapper.v b/src/rtl/modexps6_wrapper.v index aa49261..89646d5 100644 --- a/src/rtl/modexps6_wrapper.v +++ b/src/rtl/modexps6_wrapper.v @@ -1,187 +1,211 @@ -module modexps6_wrapper - ( - clk, reset_n, - cs, we, - address, write_data, read_data - ); - - - // - // Ports - // - input wire clk; - input wire reset_n; - - input wire cs; - input wire we; - - input wire [ 9: 0] address; - input wire [31: 0] write_data; - output wire [31: 0] read_data; - - - // - // Address Decoder - // - localparam ADDR_MSB_REGS = 1'b0; - localparam ADDR_MSB_CORE = 1'b1; - wire address_msb = address[9]; - wire [ 8: 0] address_lsb = address[8:0]; - - - // - // Output Mux - // - wire [31: 0] read_data_regs; - wire [31: 0] read_data_core; - - - // - // Registers - // - localparam ADDR_NAME0 = 9'h000; - localparam ADDR_NAME1 = 9'h001; - localparam ADDR_VERSION = 9'h002; - - localparam ADDR_CONTROL = 9'h008; // {next, init} - localparam ADDR_STATUS = 9'h009; // {valid, ready} - localparam ADDR_MODE = 9'h010; // 0 = slow secure, 1 = fast unsafe (public) - localparam ADDR_MODULUS_BITS = 9'h011; // - localparam ADDR_EXPONENT_BITS = 9'h012; // - localparam ADDR_GPIO_REG = 9'h020; // - - localparam CONTROL_INIT_BIT = 0; - localparam CONTROL_NEXT_BIT = 1; - - localparam STATUS_READY_BIT = 0; - localparam STATUS_VALID_BIT = 1; - - localparam CORE_NAME0 = 32'h6D6F6465; // "mode" - localparam CORE_NAME1 = 32'h78707336; // "xps6" - localparam CORE_VERSION = 32'h302E3130; // "0.10" - - - // - // Registers - // - reg [ 1: 0] reg_control; - reg reg_mode; - reg [12: 0] reg_modulus_width; - reg [12: 0] reg_exponent_width; - reg [31: 0] reg_gpio; - - - // - // Wires - // - wire [ 1: 0] reg_status; - - - // - // ModExpS6 - // - modexps6_top # - ( - .MAX_MODULUS_WIDTH (4096) - ) - modexps6_core - ( - .clk (clk), - - .init (reg_control[CONTROL_INIT_BIT]), - .ready (reg_status[STATUS_READY_BIT]), - .next (reg_control[CONTROL_NEXT_BIT]), - .valid (reg_status[STATUS_VALID_BIT]), - - .modulus_width (reg_modulus_width), - .exponent_width (reg_exponent_width), - - .fast_public_mode (reg_mode), - - .bus_cs (cs && (address_msb == ADDR_MSB_CORE)), - .bus_we (we), - .bus_addr (address_lsb), - .bus_data_wr (write_data), - .bus_data_rd (read_data_core) - ); - - - // - // Read Latch - // - reg [31: 0] tmp_read_data; - - - // - // Read/Write Interface - // - always @(posedge clk) - // - if (!reset_n) begin - // - reg_control <= 2'b00; - reg_mode <= 1'b0; - reg_modulus_width <= 13'd1024; - reg_exponent_width <= 13'd1024; - // - end else if (cs && (address_msb == ADDR_MSB_REGS)) begin - // - if (we) begin - // - // Write Handler - // - case (address_lsb) - // - ADDR_CONTROL: reg_control <= write_data[ 1: 0]; - ADDR_MODE: reg_mode <= write_data[0]; - ADDR_MODULUS_BITS: reg_modulus_width <= write_data[12: 0]; - ADDR_EXPONENT_BITS: reg_exponent_width <= write_data[12: 0]; - ADDR_GPIO_REG: reg_gpio <= write_data; - // - endcase - // - end else begin - // - // Read Handler - // - case (address) - // - ADDR_NAME0: tmp_read_data <= CORE_NAME0; - ADDR_NAME1: tmp_read_data <= CORE_NAME1; - ADDR_VERSION: tmp_read_data <= CORE_VERSION; - ADDR_CONTROL: tmp_read_data <= {{30{1'b0}}, reg_control}; - ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status}; - ADDR_MODE: tmp_read_data <= {{31{1'b0}}, reg_mode}; - ADDR_MODULUS_BITS: tmp_read_data <= {{19{1'b0}}, reg_modulus_width}; - ADDR_EXPONENT_BITS: tmp_read_data <= {{19{1'b0}}, reg_exponent_width}; - ADDR_GPIO_REG: tmp_read_data <= reg_gpio; - // - default: tmp_read_data <= 32'h00000000; - // - endcase - // - end - // - end - - - // - // Register / Core Memory Selector - // - reg address_msb_last; - always @(posedge clk) address_msb_last = address_msb; - - reg [31: 0] read_data_mux; - assign read_data = read_data_mux; - - always @(*) - // - case (address_msb_last) - // - ADDR_MSB_REGS: read_data_mux = tmp_read_data; - ADDR_MSB_CORE: read_data_mux = read_data_core; - // - endcase - - +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +module modexps6_wrapper + ( + input wire clk, + input wire reset_n, + + input wire cs, + input wire we, + + input wire [9: 0] address, + input wire [31: 0] write_data, + output wire [31: 0] read_data + ); + + + // + // Address Decoder + // + localparam ADDR_MSB_REGS = 1'b0; + localparam ADDR_MSB_CORE = 1'b1; + wire address_msb = address[9]; + wire [8: 0] address_lsb = address[8:0]; + + + // + // Output Mux + // + wire [31: 0] read_data_regs; + wire [31: 0] read_data_core; + + + // + // Registers + // + localparam ADDR_NAME0 = 9'h000; + localparam ADDR_NAME1 = 9'h001; + localparam ADDR_VERSION = 9'h002; + + localparam ADDR_CONTROL = 9'h008; // {next, init} + localparam ADDR_STATUS = 9'h009; // {valid, ready} + localparam ADDR_MODE = 9'h010; // 0 = slow secure, 1 = fast unsafe (public) + localparam ADDR_MODULUS_BITS = 9'h011; // + localparam ADDR_EXPONENT_BITS = 9'h012; // + localparam ADDR_GPIO_REG = 9'h020; // + + localparam CONTROL_INIT_BIT = 0; + localparam CONTROL_NEXT_BIT = 1; + + localparam STATUS_READY_BIT = 0; + localparam STATUS_VALID_BIT = 1; + + localparam CORE_NAME0 = 32'h6D6F6465; // "mode" + localparam CORE_NAME1 = 32'h78707336; // "xps6" + localparam CORE_VERSION = 32'h302E3130; // "0.10" + + + // + // Registers + // + reg [1: 0] reg_control; + reg reg_mode; + reg [12: 0] reg_modulus_width; + reg [12: 0] reg_exponent_width; + reg [31: 0] reg_gpio; + + + // + // Wires + // + wire [1: 0] reg_status; + + + // + // ModExpS6 + // + modexps6_top # + ( + .MAX_MODULUS_WIDTH (4096) + ) + modexps6_core + ( + .clk (clk), + + .init (reg_control[CONTROL_INIT_BIT]), + .ready (reg_status[STATUS_READY_BIT]), + .next (reg_control[CONTROL_NEXT_BIT]), + .valid (reg_status[STATUS_VALID_BIT]), + + .modulus_width (reg_modulus_width), + .exponent_width (reg_exponent_width), + + .fast_public_mode (reg_mode), + + .bus_cs (cs && (address_msb == ADDR_MSB_CORE)), + .bus_we (we), + .bus_addr (address_lsb), + .bus_data_wr (write_data), + .bus_data_rd (read_data_core) + ); + + + // + // Read Latch + // + reg [31: 0] tmp_read_data; + + + // + // Read/Write Interface + // + always @(posedge clk) + // + if (!reset_n) begin + // + reg_control <= 2'b00; + reg_mode <= 1'b0; + reg_modulus_width <= 13'd1024; + reg_exponent_width <= 13'd1024; + // + end else if (cs && (address_msb == ADDR_MSB_REGS)) begin + // + if (we) begin + // + // Write Handler + // + case (address_lsb) + // + ADDR_CONTROL: reg_control <= write_data[1: 0]; + ADDR_MODE: reg_mode <= write_data[0]; + ADDR_MODULUS_BITS: reg_modulus_width <= write_data[12: 0]; + ADDR_EXPONENT_BITS: reg_exponent_width <= write_data[12: 0]; + ADDR_GPIO_REG: reg_gpio <= write_data; + // + endcase + // + end else begin + // + // Read Handler + // + case (address) + // + ADDR_NAME0: tmp_read_data <= CORE_NAME0; + ADDR_NAME1: tmp_read_data <= CORE_NAME1; + ADDR_VERSION: tmp_read_data <= CORE_VERSION; + ADDR_CONTROL: tmp_read_data <= {{30{1'b0}}, reg_control}; + ADDR_STATUS: tmp_read_data <= {{30{1'b0}}, reg_status}; + ADDR_MODE: tmp_read_data <= {{31{1'b0}}, reg_mode}; + ADDR_MODULUS_BITS: tmp_read_data <= {{19{1'b0}}, reg_modulus_width}; + ADDR_EXPONENT_BITS: tmp_read_data <= {{19{1'b0}}, reg_exponent_width}; + ADDR_GPIO_REG: tmp_read_data <= reg_gpio; + // + default: tmp_read_data <= 32'h00000000; + // + endcase + // + end + // + end + + + // + // Register / Core Memory Selector + // + reg address_msb_last; + always @(posedge clk) address_msb_last = address_msb; + + reg [31: 0] read_data_mux; + assign read_data = read_data_mux; + + always @(*) + // + case (address_msb_last) + // + ADDR_MSB_REGS: read_data_mux = tmp_read_data; + ADDR_MSB_CORE: read_data_mux = read_data_core; + // + endcase + + endmodule diff --git a/src/rtl/ram_1rw_1ro_readfirst.v b/src/rtl/ram_1rw_1ro_readfirst.v index 7ba11ea..25b708f 100644 --- a/src/rtl/ram_1rw_1ro_readfirst.v +++ b/src/rtl/ram_1rw_1ro_readfirst.v @@ -1,69 +1,88 @@ -`timescale 1ns / 1ps - -module ram_1rw_1ro_readfirst - ( - clk, - a_addr, a_wr, a_in, a_out, - b_addr, b_out - ); - - - // - // Parameters - // - parameter MEM_WIDTH = 32; - parameter MEM_ADDR_BITS = 8; - - - // - // Ports - // - input wire clk; - - input wire [MEM_ADDR_BITS-1:0] a_addr; - input wire a_wr; - input wire [MEM_WIDTH-1:0] a_in; - output wire [MEM_WIDTH-1:0] a_out; - - input wire [MEM_ADDR_BITS-1:0] b_addr; - output wire [MEM_WIDTH-1:0] b_out; - - - // - // BRAM - // +//====================================================================== +// +// Copyright (c) 2015, NORDUnet A/S All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// - Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// - Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// - Neither the name of the NORDUnet nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +//====================================================================== + +`timescale 1ns / 1ps + +module ram_1rw_1ro_readfirst + #(parameter MEM_WIDTH = 32, + parameter MEM_ADDR_BITS = 8) + ( + input wire clk, + + input wire [MEM_ADDR_BITS-1:0] a_addr, + input wire a_wr, + input wire [MEM_WIDTH-1:0] a_in, + output wire [MEM_WIDTH-1:0] a_out, + + input wire [MEM_ADDR_BITS-1:0] b_addr, + output wire [MEM_WIDTH-1:0] b_out + ); + + + // + // BRAM + // (* RAM_STYLE="BLOCK" *) - reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1]; - - - // - // Output Registers - // - reg [MEM_WIDTH-1:0] bram_reg_a; - reg [MEM_WIDTH-1:0] bram_reg_b; - - assign a_out = bram_reg_a; - assign b_out = bram_reg_b; + reg [MEM_WIDTH-1:0] bram[0:(2**MEM_ADDR_BITS)-1]; + + // + // Output Registers + // + reg [MEM_WIDTH-1:0] bram_reg_a; + reg [MEM_WIDTH-1:0] bram_reg_b; - // - // Read-Write Port A - // + assign a_out = bram_reg_a; + assign b_out = bram_reg_b; + + + // + // Read-Write Port A + // always @(posedge clk) begin - // - bram_reg_a <= bram[a_addr]; - // - if (a_wr) bram[a_addr] <= a_in; - // - end - - - // - // Read-Only Port B - // - always @(posedge clk) - // - bram_reg_b <= bram[b_addr]; - - -endmodule + // + bram_reg_a <= bram[a_addr]; + // + if (a_wr) bram[a_addr] <= a_in; + // + end + + + // + // Read-Only Port B + // + always @(posedge clk) + // + bram_reg_b <= bram[b_addr]; + + +endmodule -- cgit v1.2.3