//------------------------------------------------------------------------------
//
// ecdsa384_uop_worker.v
// -----------------------------------------------------------------------------
// ECDSA uOP Worker for P-384
//
// Authors: Pavel Shatov
//
// Copyright (c) 2018, NORDUnet A/S
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// - Neither the name of the NORDUnet nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
//------------------------------------------------------------------------------
module ecdsa384_uop_worker
(
clk, rst_n,
ena, rdy,
uop_offset,
output_now,
flagz_sz, flagz_rz,
flagz_e, flagz_f,
xy_addr, xy_dout, x_wren, y_wren
);
//
// Microcode Header
//
`include "ecdsa_uop.vh"
//
// Ports
//
input clk; // system clock
input rst_n; // active-low async reset
input ena; // enable input
output rdy; // ready output
input [UOP_ADDR_WIDTH-1:0] uop_offset; // starting offset
input output_now; // produce output
output flagz_sz; // SZ is zero
output flagz_rz; // RZ is zero
output flagz_e; // E is zero
output flagz_f; // F is zero
output [ 3: 0] xy_addr;
output [31: 0] xy_dout;
output x_wren;
output y_wren;
//
// Constants
//
localparam integer OPERAND_NUM_WORDS = 12; // 384 bits -> 12 x 32-bit words
localparam integer WORD_COUNTER_WIDTH = 4; // 0..11 -> 4 bits
//
// FSM
//
localparam [1:0] FSM_STATE_IDLE = 2'b00;
localparam [1:0] FSM_STATE_FETCH = 2'b01;
localparam [1:0] FSM_STATE_DECODE = 2'b10;
localparam [1:0] FSM_STATE_BUSY = 2'b11;
reg [1:0] fsm_state = FSM_STATE_IDLE;
reg [1:0] fsm_state_next;
//
// Microcode
//
reg [UOP_ADDR_WIDTH-1:0] uop_addr;
wire [UOP_DATA_WIDTH-1:0] uop_data;
wire [5:0] uop_data_opcode = uop_data[1 + 3*6 +: 6];
wire uop_data_banks = uop_data[0 + 3*6 +: 1];
wire [5:0] uop_data_operand_src1 = uop_data[0 + 2*6 +: 6];
wire [5:0] uop_data_operand_src2 = uop_data[0 + 1*6 +: 6];
wire [5:0] uop_data_operand_dst = uop_data[0 + 0*6 +: 6];
wire uop_data_opcode_is_stop = uop_data_opcode[5];
wire uop_data_opcode_is_mul = uop_data_opcode[4];
wire uop_data_opcode_is_sub = uop_data_opcode[3];
wire uop_data_opcode_is_add = uop_data_opcode[2];
wire uop_data_opcode_is_copy = uop_data_opcode[1];
wire uop_data_opcode_is_cmpz = uop_data_opcode[0];
ecdsa384_microcode_rom microcode_rom
(
.clk (clk),
.addr (uop_addr),
.data (uop_data)
);
//
// Microcode Address Increment Logic
//
always @(posedge clk)
//
if (fsm_state_next == FSM_STATE_FETCH)
uop_addr <= (fsm_state == FSM_STATE_IDLE) ? uop_offset : uop_addr + 1'b1;
//
// Multi-Word Mover
//
reg mw_mover_ena = 1'b0;
wire mw_mover_rdy;
wire [WORD_COUNTER_WIDTH-1:0] mw_mover_x_addr;
wire [WORD_COUNTER_WIDTH-1:0] mw_mover_y_addr;
wire [ 32-1:0] mw_mover_x_din;
wire [ 32-1:0] mw_mover_y_dout;
wire mw_mover_y_wren;
multiword_mover #
(
.WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
.OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
)
mw_mover_inst
(
.clk (clk),
.rst_n (rst_n),
.ena (mw_mover_ena),
.rdy (mw_mover_rdy),
.x_addr (mw_mover_x_addr),
.y_addr (mw_mover_y_addr),
.y_wren (mw_mover_y_wren),
.x_din (mw_mover_x_din),
.y_dout (mw_mover_y_dout)
);
//
// Modular Multiplier
//
reg mod_mul_ena = 1'b0;
wire mod_mul_rdy;
wire [WORD_COUNTER_WIDTH-1:0] mod_mul_a_addr;
wire [WORD_COUNTER_WIDTH-1:0] mod_mul_b_addr;
wire [WORD_COUNTER_WIDTH-1:0] mod_mul_p_addr;
wire [ 32-1:0] mod_mul_a_din;
wire [ 32-1:0] mod_mul_b_din;
wire [ 32-1:0] mod_mul_p_dout;
wire mod_mul_p_wren;
ecdsa384_modular_multiplier mod_mul_inst
(
.clk (clk),
.rst_n (rst_n),
.ena (mod_mul_ena),
.rdy (mod_mul_rdy),
.a_addr (mod_mul_a_addr),
.b_addr (mod_mul_b_addr),
.p_addr (mod_mul_p_addr),
.p_wren (mod_mul_p_wren),
.a_din (mod_mul_a_din),
.b_din (mod_mul_b_din),
.p_dout (mod_mul_p_dout)
);
//
// Modular Adder
//
reg mod_add_ena = 1'b0;
wire mod_add_rdy;
wire [WORD_COUNTER_WIDTH-1:0] mod_add_ab_addr;
wire [WORD_COUNTER_WIDTH-1:0] mod_add_n_addr;
wire [WORD_COUNTER_WIDTH-1:0] mod_add_s_addr;
wire [ 32-1:0] mod_add_a_din;
wire [ 32-1:0] mod_add_b_din;
wire [ 32-1:0] mod_add_n_din;
wire [ 32-1:0] mod_add_s_dout;
wire mod_add_s_wren;
modular_adder #
(
.OPERAND_NUM_WORDS(OPERAND_NUM_WORDS),
.WORD_COUNTER_WIDTH(WORD_COUNTER_WIDTH)
)
mod_add_inst
(
.clk (clk),
.rst_n (rst_n),
.ena (mod_add_ena),
.rdy (mod_add_rdy),
.ab_addr (mod_add_ab_addr),
.n_addr (mod_add_n_addr),
.s_addr (mod_add_s_addr),
.s_wren (mod_add_s_wren),
.a_din (mod_add_a_din),
.b_din (mod_add_b_din),
.n_din (mod_add_n_din),
.s_dout (mod_add_s_dout)
);
//
// Modular Subtractor
//
reg mod_sub_ena = 1'b0;
wire mod_sub_rdy;
wire [WORD_COUNTER_WIDTH-1:0] mod_sub_ab_addr;
wire [WORD_COUNTER_WIDTH-1:0] mod_sub_n_addr;
wire [WORD_COUNTER_WIDTH-1:0] mod_sub_d_addr;
wire [ 32-1:0] mod_sub_a_din;
wire [ 32-1:0] mod_sub_b_din;
wire [ 32-1:0] mod_sub_n_din;
wire [ 32-1:0] mod_sub_d_dout;
wire mod_sub_d_wren;
modular_subtractor #
(
.OPERAND_NUM_WORDS(OPERAND_NUM_WORDS),
.WORD_COUNTER_WIDTH(WORD_COUNTER_WIDTH)
)
mod_sub_inst
(
.clk (clk),
.rst_n (rst_n),
.ena (mod_sub_ena),
.rdy (mod_sub_rdy),
.ab_addr (mod_sub_ab_addr),
.n_addr (mod_sub_n_addr),
.d_addr (mod_sub_d_addr),
.d_wren (mod_sub_d_wren),
.a_din (mod_sub_a_din),
.b_din (mod_sub_b_din),
.n_din (mod_sub_n_din),
.d_dout (mod_sub_d_dout)
);
//
// Modulus (two instances for better placement and routing)
//
ecdsa384_modulus_distmem modulus_add
(
.clk (clk),
.b_addr (mod_add_n_addr),
.b_out (mod_add_n_din)
);
ecdsa384_modulus_distmem modulus_sub
(
.clk (clk),
.b_addr (mod_sub_n_addr),
.b_out (mod_sub_n_din)
);
//
// Multi-Word Comparator
//
reg mw_comp_ena = 1'b0;
wire mw_comp_rdy;
wire mw_comp_cmp_e;
wire [WORD_COUNTER_WIDTH-1:0] mw_comp_x_addr;
wire [ 32-1:0] mw_comp_x_din;
multiword_comparator #
(
.WORD_COUNTER_WIDTH (WORD_COUNTER_WIDTH),
.OPERAND_NUM_WORDS (OPERAND_NUM_WORDS)
)
mw_comp_inst
(
.clk (clk),
.rst_n (rst_n),
.ena (mw_comp_ena),
.rdy (mw_comp_rdy),
.xy_addr (mw_comp_x_addr),
.x_din (mw_comp_x_din),
.y_din ({32{1'b0}}),
.cmp_l (),
.cmp_e (mw_comp_cmp_e),
.cmp_g ()
);
//
// Comparison Flags
//
reg flagz_sz_reg;
reg flagz_rz_reg;
reg flagz_e_reg;
reg flagz_f_reg;
assign flagz_sz = flagz_sz_reg;
assign flagz_rz = flagz_rz_reg;
assign flagz_e = flagz_e_reg;
assign flagz_f = flagz_f_reg;
reg mw_comp_rdy_dly = 1'b1;
always @(posedge clk) mw_comp_rdy_dly <= mw_comp_rdy;
always @(posedge clk)
//
if (mw_comp_rdy && !mw_comp_rdy_dly)
//
case (uop_data_operand_src1)
UOP_OPERAND_CYCLE_SZ: flagz_sz_reg <= mw_comp_cmp_e;
UOP_OPERAND_CYCLE_RZ: flagz_rz_reg <= mw_comp_cmp_e;
UOP_OPERAND_CYCLE_E: flagz_e_reg <= mw_comp_cmp_e;
UOP_OPERAND_CYCLE_F: flagz_f_reg <= mw_comp_cmp_e;
endcase
//
// uOP Trigger Logic
//
always @(posedge clk)
//
if (fsm_state == FSM_STATE_DECODE) begin
mw_comp_ena <= uop_data_opcode_is_cmpz;
mw_mover_ena <= uop_data_opcode_is_copy;
mod_mul_ena <= uop_data_opcode_is_mul;
mod_add_ena <= uop_data_opcode_is_add;
mod_sub_ena <= uop_data_opcode_is_sub;
end else begin
mw_comp_ena <= 1'b0;
mw_mover_ena <= 1'b0;
mod_mul_ena <= 1'b0;
mod_add_ena <= 1'b0;
mod_sub_ena <= 1'b0;
end
//
// uOP Completion Detector
//
reg fsm_exit_from_busy;
always @* begin
//
fsm_exit_from_busy = 0;
//
if (uop_data_opcode_is_cmpz) fsm_exit_from_busy = ~mw_comp_ena & mw_comp_rdy;
if (uop_data_opcode_is_copy) fsm_exit_from_busy = ~mw_mover_ena & mw_mover_rdy;
if (uop_data_opcode_is_mul) fsm_exit_from_busy = ~mod_mul_ena & mod_mul_rdy;
if (uop_data_opcode_is_add) fsm_exit_from_busy = ~mod_add_ena & mod_add_rdy;
if (uop_data_opcode_is_sub) fsm_exit_from_busy = ~mod_sub_ena & mod_sub_rdy;
//
end
//
// Banks
//
reg [ 3:0] banks_src1_addr;
reg [ 3:0] banks_src2_addr;
reg [ 3:0] banks_dst_addr;
reg banks_dst_wren;
reg [31:0] banks_dst_din;
wire [31:0] banks_src1_dout;
wire [31:0] banks_src2_dout;
ecdsa384_banks_array banks_array
(
.clk (clk),
.banks (uop_data_banks),
.src1_operand (uop_data_operand_src1),
.src2_operand (uop_data_operand_src2),
.dst_operand (uop_data_operand_dst),
.src1_addr (banks_src1_addr),
.src2_addr (banks_src2_addr),
.dst_addr (banks_dst_addr),
.dst_wren (banks_dst_wren),
.src1_dout (banks_src1_dout),
.src2_dout (banks_src2_dout),
.dst_din (banks_dst_din)
);
assign mw_comp_x_din = banks_src1_dout;
assign mw_mover_x_din = banks_src1_dout;
assign mod_mul_a_din = banks_src1_dout;
assign mod_mul_b_din = banks_src2_dout;
assign mod_add_a_din = banks_src1_dout;
assign mod_add_b_din = banks_src2_dout;
assign mod_sub_a_din = banks_src1_dout;
assign mod_sub_b_din = banks_src2_dout;
always @*
//
case (uop_data_opcode)
//
UOP_OPCODE_CMPZ: begin
banks_src1_addr = mw_comp_x_addr;
banks_src2_addr = {3{1'bX}};
//
banks_dst_addr = {3{1'bX}};
//
banks_dst_wren = 1'b0;
//
banks_dst_din = {32{1'bX}};
end
//
UOP_OPCODE_COPY: begin
//
banks_src1_addr = mw_mover_x_addr;
banks_src2_addr = {3{1'bX}};
//
banks_dst_addr = mw_mover_y_addr;
//
banks_dst_wren = mw_mover_y_wren;
//
banks_dst_din = mw_mover_y_dout;
//
end
//
UOP_OPCODE_ADD: begin
//
banks_src1_addr = mod_add_ab_addr;
banks_src2_addr = mod_add_ab_addr;
//
banks_dst_addr = mod_add_s_addr;
//
banks_dst_wren = mod_add_s_wren;
//
banks_dst_din = mod_add_s_dout;
//
end
//
UOP_OPCODE_SUB: begin
//
banks_src1_addr = mod_sub_ab_addr;
banks_src2_addr = mod_sub_ab_addr;
//
banks_dst_addr = mod_sub_d_addr;
//
banks_dst_wren = mod_sub_d_wren;
//
banks_dst_din = mod_sub_d_dout;
//
end
//
UOP_OPCODE_MUL: begin
//
banks_src1_addr = mod_mul_a_addr;
banks_src2_addr = mod_mul_b_addr;
//
banks_dst_addr = mod_mul_p_addr;
//
banks_dst_wren = mod_mul_p_wren;
//
banks_dst_din = mod_mul_p_dout;
//
end
//
default: begin
//
banks_src1_addr = {3{1'bX}};
banks_src2_addr = {3{1'bX}};
//
banks_dst_addr = {3{1'bX}};
//
banks_dst_wren = 1'b0;
//
banks_dst_din = {32{1'bX}};
//
end
//
endcase
//
// FSM Process
//
always @(posedge clk or negedge rst_n)
//
if (rst_n == 1'b0) fsm_state <= FSM_STATE_IDLE;
else fsm_state <= fsm_state_next;
//
// FSM Transition Logic
//
always @* begin
//
fsm_state_next = FSM_STATE_IDLE;
//
case (fsm_state)
FSM_STATE_IDLE: fsm_state_next = ena ? FSM_STATE_FETCH : FSM_STATE_IDLE;
FSM_STATE_FETCH: fsm_state_next = FSM_STATE_DECODE;
FSM_STATE_DECODE: fsm_state_next = uop_data_opcode_is_stop ? FSM_STATE_IDLE : FSM_STATE_BUSY;
FSM_STATE_BUSY: fsm_state_next = fsm_exit_from_busy ? FSM_STATE_FETCH : FSM_STATE_BUSY;
endcase
//
end
//
// Ready Flag Logic
//
reg rdy_reg = 1'b1;
assign rdy = rdy_reg;
always @(posedge clk or negedge rst_n)
//
if (rst_n == 1'b0) rdy_reg <= 1'b1;
else case (fsm_state)
FSM_STATE_IDLE: rdy_reg <= ~ena;
FSM_STATE_DECODE: rdy_reg <= uop_data_opcode_is_stop;
endcase
//
// Output Logic
//
reg [ 3: 0] xy_addr_reg = 4'b000;
reg [31: 0] xy_dout_reg = 32'h00000000;
reg x_wren_reg = 1'b0;
reg y_wren_reg = 1'b0;
assign xy_addr = xy_addr_reg;
assign xy_dout = xy_dout_reg;
assign x_wren = x_wren_reg;
assign y_wren = y_wren_reg;
reg xy_phase; // 0 - x, 1 - y
always @(posedge clk)
//
if (output_now) begin
if (ena)
xy_phase <= 1'b0;
else if (!mw_mover_ena && mw_mover_rdy && (fsm_state == FSM_STATE_BUSY))
xy_phase <= 1'b1;
end
always @(posedge clk)
//
if (output_now && mw_mover_y_wren) xy_addr_reg <= mw_mover_y_addr;
else xy_addr_reg <= 4'b0000;
always @(posedge clk)
//
if (output_now && mw_mover_y_wren) xy_dout_reg <= mw_mover_y_dout;
else xy_dout_reg <= 32'h00000000;
always @(posedge clk)
//
if (output_now && mw_mover_y_wren) {y_wren_reg, x_wren_reg} <= {xy_phase, ~xy_phase};
else {y_wren_reg, x_wren_reg} <= 2'b00;
endmodule
//------------------------------------------------------------------------------
// End-of-File
//------------------------------------------------------------------------------