aboutsummaryrefslogtreecommitdiff
path: root/src/rtl/chacha_core.v
diff options
context:
space:
mode:
authorJoachim StroĢˆmbergson <joachim@secworks.se>2016-12-28 10:55:30 +0100
committerJoachim StroĢˆmbergson <joachim@secworks.se>2016-12-28 10:55:30 +0100
commitf4731e83511a3b35f05e4a6222ba27af5920fcd8 (patch)
treef5efaf2985ffa4a4943cd7e0d68f2d09f1d801f4 /src/rtl/chacha_core.v
parent549b75a635817ce263c368c9b8b5b0b07f90ec21 (diff)
(1) Cleanup of top an core code with no functional changes. The code is now much more compact. (2) Fixed how the QR modules are used in parallel to actually work in parallel. This increases performance. (3) Changed registers into arrays and cleaned up how operands and data are accessed. This decreased total design size.cleanup
Diffstat (limited to 'src/rtl/chacha_core.v')
-rw-r--r--src/rtl/chacha_core.v1176
1 files changed, 271 insertions, 905 deletions
diff --git a/src/rtl/chacha_core.v b/src/rtl/chacha_core.v
index 0e1158b..5f496a4 100644
--- a/src/rtl/chacha_core.v
+++ b/src/rtl/chacha_core.v
@@ -7,7 +7,7 @@
//
//
// Author: Joachim Strombergson
-// Copyright (c) 2014, NORDUnet A/S All rights reserved.
+// Copyright (c) 2011, NORDUnet A/S All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
@@ -63,202 +63,89 @@ module chacha_core(
// Internal constant and parameter definitions.
//----------------------------------------------------------------
// Datapath quartterround states names.
- parameter STATE_QR0 = 1'b0;
- parameter STATE_QR1 = 1'b1;
+ localparam QR0 = 0;
+ localparam QR1 = 1;
- parameter NUM_ROUNDS = 4'h8;
+ localparam NUM_ROUNDS = 4'h8;
- parameter TAU0 = 32'h61707865;
- parameter TAU1 = 32'h3120646e;
- parameter TAU2 = 32'h79622d36;
- parameter TAU3 = 32'h6b206574;
+ localparam TAU0 = 32'h61707865;
+ localparam TAU1 = 32'h3120646e;
+ localparam TAU2 = 32'h79622d36;
+ localparam TAU3 = 32'h6b206574;
- parameter SIGMA0 = 32'h61707865;
- parameter SIGMA1 = 32'h3320646e;
- parameter SIGMA2 = 32'h79622d32;
- parameter SIGMA3 = 32'h6b206574;
+ localparam SIGMA0 = 32'h61707865;
+ localparam SIGMA1 = 32'h3320646e;
+ localparam SIGMA2 = 32'h79622d32;
+ localparam SIGMA3 = 32'h6b206574;
- parameter CTRL_IDLE = 3'h0;
- parameter CTRL_INIT = 3'h1;
- parameter CTRL_ROUNDS = 3'h2;
- parameter CTRL_FINALIZE = 3'h3;
- parameter CTRL_DONE = 3'h4;
+ localparam CTRL_IDLE = 3'h0;
+ localparam CTRL_INIT = 3'h1;
+ localparam CTRL_ROUNDS = 3'h2;
+ localparam CTRL_FINALIZE = 3'h3;
+ localparam CTRL_DONE = 3'h4;
+
+
+ //----------------------------------------------------------------
+ // l2b()
+ //
+ // Swap bytes from little to big endian byte order.
+ //----------------------------------------------------------------
+ function [31 : 0] l2b(input [31 : 0] op);
+ begin
+ l2b = {op[7 : 0], op[15 : 8], op[23 : 16], op[31 : 24]};
+ end
+ endfunction // b2l
//----------------------------------------------------------------
// Registers including update variables and write enable.
//----------------------------------------------------------------
- reg [31 : 0] key0_reg;
- reg [31 : 0] key0_new;
- reg [31 : 0] key1_reg;
- reg [31 : 0] key1_new;
- reg [31 : 0] key2_reg;
- reg [31 : 0] key2_new;
- reg [31 : 0] key3_reg;
- reg [31 : 0] key3_new;
- reg [31 : 0] key4_reg;
- reg [31 : 0] key4_new;
- reg [31 : 0] key5_reg;
- reg [31 : 0] key5_new;
- reg [31 : 0] key6_reg;
- reg [31 : 0] key6_new;
- reg [31 : 0] key7_reg;
- reg [31 : 0] key7_new;
-
- reg keylen_reg;
- reg keylen_new;
-
- reg [31 : 0] iv0_reg;
- reg [31 : 0] iv0_new;
- reg [31 : 0] iv1_reg;
- reg [31 : 0] iv1_new;
-
- reg [31 : 0] state0_reg;
- reg [31 : 0] state0_new;
- reg [31 : 0] state1_reg;
- reg [31 : 0] state1_new;
- reg [31 : 0] state2_reg;
- reg [31 : 0] state2_new;
- reg [31 : 0] state3_reg;
- reg [31 : 0] state3_new;
- reg [31 : 0] state4_reg;
- reg [31 : 0] state4_new;
- reg [31 : 0] state5_reg;
- reg [31 : 0] state5_new;
- reg [31 : 0] state6_reg;
- reg [31 : 0] state6_new;
- reg [31 : 0] state7_reg;
- reg [31 : 0] state7_new;
- reg [31 : 0] state8_reg;
- reg [31 : 0] state8_new;
- reg [31 : 0] state9_reg;
- reg [31 : 0] state9_new;
- reg [31 : 0] state10_reg;
- reg [31 : 0] state10_new;
- reg [31 : 0] state11_reg;
- reg [31 : 0] state11_new;
- reg [31 : 0] state12_reg;
- reg [31 : 0] state12_new;
- reg [31 : 0] state13_reg;
- reg [31 : 0] state13_new;
- reg [31 : 0] state14_reg;
- reg [31 : 0] state14_new;
- reg [31 : 0] state15_reg;
- reg [31 : 0] state15_new;
- reg state_we;
-
- reg [31 : 0] x0_reg;
- reg [31 : 0] x0_new;
- reg x0_we;
-
- reg [31 : 0] x1_reg;
- reg [31 : 0] x1_new;
- reg x1_we;
-
- reg [31 : 0] x2_reg;
- reg [31 : 0] x2_new;
- reg x2_we;
-
- reg [31 : 0] x3_reg;
- reg [31 : 0] x3_new;
- reg x3_we;
-
- reg [31 : 0] x4_reg;
- reg [31 : 0] x4_new;
- reg x4_we;
-
- reg [31 : 0] x5_reg;
- reg [31 : 0] x5_new;
- reg x5_we;
-
- reg [31 : 0] x6_reg;
- reg [31 : 0] x6_new;
- reg x6_we;
-
- reg [31 : 0] x7_reg;
- reg [31 : 0] x7_new;
- reg x7_we;
-
- reg [31 : 0] x8_reg;
- reg [31 : 0] x8_new;
- reg x8_we;
-
- reg [31 : 0] x9_reg;
- reg [31 : 0] x9_new;
- reg x9_we;
-
- reg [31 : 0] x10_reg;
- reg [31 : 0] x10_new;
- reg x10_we;
-
- reg [31 : 0] x11_reg;
- reg [31 : 0] x11_new;
- reg x11_we;
-
- reg [31 : 0] x12_reg;
- reg [31 : 0] x12_new;
- reg x12_we;
-
- reg [31 : 0] x13_reg;
- reg [31 : 0] x13_new;
- reg x13_we;
-
- reg [31 : 0] x14_reg;
- reg [31 : 0] x14_new;
- reg x14_we;
-
- reg [31 : 0] x15_reg;
- reg [31 : 0] x15_new;
- reg x15_we;
-
- reg [3 : 0] rounds_reg;
- reg [3 : 0] rounds_new;
-
- reg [511 : 0] data_in_reg;
- reg data_in_we;
+ reg [31 : 0] state_reg [0 : 15];
+ reg [31 : 0] state_new [0 : 15];
+ reg state_we;
reg [511 : 0] data_out_reg;
reg [511 : 0] data_out_new;
- reg data_out_we;
-
- reg data_out_valid_reg;
- reg data_out_valid_new;
- reg data_out_valid_we;
-
- reg ready_reg;
- reg ready_new;
- reg ready_we;
-
- reg qr_ctr_reg;
- reg qr_ctr_new;
- reg qr_ctr_we;
- reg qr_ctr_inc;
- reg qr_ctr_rst;
-
- reg [3 : 0] dr_ctr_reg;
- reg [3 : 0] dr_ctr_new;
- reg dr_ctr_we;
- reg dr_ctr_inc;
- reg dr_ctr_rst;
-
- reg [31 : 0] block0_ctr_reg;
- reg [31 : 0] block0_ctr_new;
- reg block0_ctr_we;
- reg [31 : 0] block1_ctr_reg;
- reg [31 : 0] block1_ctr_new;
- reg block1_ctr_we;
- reg block_ctr_inc;
- reg block_ctr_rst;
-
- reg [2 : 0] chacha_ctrl_reg;
- reg [2 : 0] chacha_ctrl_new;
- reg chacha_ctrl_we;
+
+ reg data_out_valid_reg;
+ reg data_out_valid_new;
+ reg data_out_valid_we;
+
+ reg qr_ctr_reg;
+ reg qr_ctr_new;
+ reg qr_ctr_we;
+ reg qr_ctr_inc;
+ reg qr_ctr_rst;
+
+ reg [3 : 0] dr_ctr_reg;
+ reg [3 : 0] dr_ctr_new;
+ reg dr_ctr_we;
+ reg dr_ctr_inc;
+ reg dr_ctr_rst;
+
+ reg [31 : 0] block0_ctr_reg;
+ reg [31 : 0] block0_ctr_new;
+ reg block0_ctr_we;
+ reg [31 : 0] block1_ctr_reg;
+ reg [31 : 0] block1_ctr_new;
+ reg block1_ctr_we;
+ reg block_ctr_inc;
+ reg block_ctr_set;
+
+ reg ready_reg;
+ reg ready_new;
+ reg ready_we;
+
+ reg [2 : 0] chacha_ctrl_reg;
+ reg [2 : 0] chacha_ctrl_new;
+ reg chacha_ctrl_we;
//----------------------------------------------------------------
// Wires.
//----------------------------------------------------------------
- reg sample_params;
+ reg [31 : 0] init_state_word [0 : 15];
+
reg init_state;
reg update_state;
reg update_output;
@@ -356,455 +243,127 @@ module chacha_core(
// Concurrent connectivity for ports etc.
//----------------------------------------------------------------
assign data_out = data_out_reg;
-
assign data_out_valid = data_out_valid_reg;
-
assign ready = ready_reg;
-
//----------------------------------------------------------------
// reg_update
//
// Update functionality for all registers in the core.
- // All registers are positive edge triggered with asynchronous
+ // All registers are positive edge triggered with synchronous
// active low reset. All registers have write enable.
//----------------------------------------------------------------
- always @ (posedge clk or negedge reset_n)
+ always @ (posedge clk)
begin : reg_update
+ integer i;
+
if (!reset_n)
begin
- key0_reg <= 32'h00000000;
- key1_reg <= 32'h00000000;
- key2_reg <= 32'h00000000;
- key3_reg <= 32'h00000000;
- key4_reg <= 32'h00000000;
- key5_reg <= 32'h00000000;
- key6_reg <= 32'h00000000;
- key7_reg <= 32'h00000000;
- iv0_reg <= 32'h00000000;
- iv1_reg <= 32'h00000000;
- state0_reg <= 32'h00000000;
- state1_reg <= 32'h00000000;
- state2_reg <= 32'h00000000;
- state3_reg <= 32'h00000000;
- state4_reg <= 32'h00000000;
- state5_reg <= 32'h00000000;
- state6_reg <= 32'h00000000;
- state7_reg <= 32'h00000000;
- state8_reg <= 32'h00000000;
- state9_reg <= 32'h00000000;
- state10_reg <= 32'h00000000;
- state11_reg <= 32'h00000000;
- state12_reg <= 32'h00000000;
- state13_reg <= 32'h00000000;
- state14_reg <= 32'h00000000;
- state15_reg <= 32'h00000000;
- x0_reg <= 32'h00000000;
- x1_reg <= 32'h00000000;
- x2_reg <= 32'h00000000;
- x3_reg <= 32'h00000000;
- x4_reg <= 32'h00000000;
- x5_reg <= 32'h00000000;
- x6_reg <= 32'h00000000;
- x7_reg <= 32'h00000000;
- x8_reg <= 32'h00000000;
- x9_reg <= 32'h00000000;
- x10_reg <= 32'h00000000;
- x11_reg <= 32'h00000000;
- x12_reg <= 32'h00000000;
- x13_reg <= 32'h00000000;
- x14_reg <= 32'h00000000;
- x15_reg <= 32'h00000000;
- data_in_reg <= 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000;
- data_out_reg <= 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000;
- rounds_reg <= 4'h0;
- ready_reg <= 1;
+ for (i = 0 ; i < 16 ; i = i + 1)
+ state_reg[i] <= 32'h0;
+
+ data_out_reg <= 512'h0;
data_out_valid_reg <= 0;
- qr_ctr_reg <= STATE_QR0;
+ qr_ctr_reg <= QR0;
dr_ctr_reg <= 0;
- block0_ctr_reg <= 32'h00000000;
- block1_ctr_reg <= 32'h00000000;
+ block0_ctr_reg <= 32'h0;
+ block1_ctr_reg <= 32'h0;
chacha_ctrl_reg <= CTRL_IDLE;
+ ready_reg <= 1;
end
else
begin
- if (sample_params)
- begin
- key0_reg <= key0_new;
- key1_reg <= key1_new;
- key2_reg <= key2_new;
- key3_reg <= key3_new;
- key4_reg <= key4_new;
- key5_reg <= key5_new;
- key6_reg <= key6_new;
- key7_reg <= key7_new;
- iv0_reg <= iv0_new;
- iv1_reg <= iv1_new;
- rounds_reg <= rounds_new;
- keylen_reg <= keylen_new;
- end
-
- if (data_in_we)
- begin
- data_in_reg <= data_in;
- end
-
if (state_we)
begin
- state0_reg <= state0_new;
- state1_reg <= state1_new;
- state2_reg <= state2_new;
- state3_reg <= state3_new;
- state4_reg <= state4_new;
- state5_reg <= state5_new;
- state6_reg <= state6_new;
- state7_reg <= state7_new;
- state8_reg <= state8_new;
- state9_reg <= state9_new;
- state10_reg <= state10_new;
- state11_reg <= state11_new;
- state12_reg <= state12_new;
- state13_reg <= state13_new;
- state14_reg <= state14_new;
- state15_reg <= state15_new;
- end
-
- if (x0_we)
- begin
- x0_reg <= x0_new;
- end
-
- if (x1_we)
- begin
- x1_reg <= x1_new;
- end
-
- if (x2_we)
- begin
- x2_reg <= x2_new;
- end
-
- if (x3_we)
- begin
- x3_reg <= x3_new;
- end
-
- if (x4_we)
- begin
- x4_reg <= x4_new;
- end
-
- if (x5_we)
- begin
- x5_reg <= x5_new;
- end
-
- if (x6_we)
- begin
- x6_reg <= x6_new;
- end
-
- if (x7_we)
- begin
- x7_reg <= x7_new;
- end
-
- if (x8_we)
- begin
- x8_reg <= x8_new;
- end
-
- if (x9_we)
- begin
- x9_reg <= x9_new;
- end
-
- if (x10_we)
- begin
- x10_reg <= x10_new;
- end
-
- if (x11_we)
- begin
- x11_reg <= x11_new;
- end
-
- if (x12_we)
- begin
- x12_reg <= x12_new;
+ for (i = 0 ; i < 16 ; i = i + 1)
+ state_reg[i] <= state_new[i];
end
- if (x13_we)
- begin
- x13_reg <= x13_new;
- end
-
- if (x14_we)
- begin
- x14_reg <= x14_new;
- end
-
- if (x15_we)
- begin
- x15_reg <= x15_new;
- end
-
- if (data_out_we)
- begin
- data_out_reg <= data_out_new;
- end
-
- if (ready_we)
- begin
- ready_reg <= ready_new;
- end
+ if (update_output)
+ data_out_reg <= data_out_new;
if (data_out_valid_we)
- begin
- data_out_valid_reg <= data_out_valid_new;
- end
+ data_out_valid_reg <= data_out_valid_new;
if (qr_ctr_we)
- begin
- qr_ctr_reg <= qr_ctr_new;
- end
+ qr_ctr_reg <= qr_ctr_new;
if (dr_ctr_we)
- begin
- dr_ctr_reg <= dr_ctr_new;
- end
+ dr_ctr_reg <= dr_ctr_new;
if (block0_ctr_we)
- begin
- block0_ctr_reg <= block0_ctr_new;
- end
+ block0_ctr_reg <= block0_ctr_new;
if (block1_ctr_we)
- begin
- block1_ctr_reg <= block1_ctr_new;
- end
+ block1_ctr_reg <= block1_ctr_new;
+
+ if (ready_we)
+ ready_reg <= ready_new;
if (chacha_ctrl_we)
- begin
- chacha_ctrl_reg <= chacha_ctrl_new;
- end
+ chacha_ctrl_reg <= chacha_ctrl_new;
end
end // reg_update
//----------------------------------------------------------------
- // data_out_logic
- // Final output logic that combines the result from procceing
- // with the input word. This adds a final layer of XOR gates.
+ // init_state_logic
//
- // Note that we also remap all the words into LSB format.
+ // Calculates the initial state for a given block.
//----------------------------------------------------------------
always @*
- begin : data_out_logic
- reg [31 : 0] msb_block_state0;
- reg [31 : 0] msb_block_state1;
- reg [31 : 0] msb_block_state2;
- reg [31 : 0] msb_block_state3;
- reg [31 : 0] msb_block_state4;
- reg [31 : 0] msb_block_state5;
- reg [31 : 0] msb_block_state6;
- reg [31 : 0] msb_block_state7;
- reg [31 : 0] msb_block_state8;
- reg [31 : 0] msb_block_state9;
- reg [31 : 0] msb_block_state10;
- reg [31 : 0] msb_block_state11;
- reg [31 : 0] msb_block_state12;
- reg [31 : 0] msb_block_state13;
- reg [31 : 0] msb_block_state14;
- reg [31 : 0] msb_block_state15;
-
- reg [31 : 0] lsb_block_state0;
- reg [31 : 0] lsb_block_state1;
- reg [31 : 0] lsb_block_state2;
- reg [31 : 0] lsb_block_state3;
- reg [31 : 0] lsb_block_state4;
- reg [31 : 0] lsb_block_state5;
- reg [31 : 0] lsb_block_state6;
- reg [31 : 0] lsb_block_state7;
- reg [31 : 0] lsb_block_state8;
- reg [31 : 0] lsb_block_state9;
- reg [31 : 0] lsb_block_state10;
- reg [31 : 0] lsb_block_state11;
- reg [31 : 0] lsb_block_state12;
- reg [31 : 0] lsb_block_state13;
- reg [31 : 0] lsb_block_state14;
- reg [31 : 0] lsb_block_state15;
-
- reg [511 : 0] lsb_block_state;
-
- lsb_block_state = 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000;
-
- data_out_new = 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000;
- data_out_we = 0;
-
- if (update_output)
+ begin : init_state_logic
+ reg [31 : 0] key0;
+ reg [31 : 0] key1;
+ reg [31 : 0] key2;
+ reg [31 : 0] key3;
+ reg [31 : 0] key4;
+ reg [31 : 0] key5;
+ reg [31 : 0] key6;
+ reg [31 : 0] key7;
+
+ key0 = l2b(key[255 : 224]);
+ key1 = l2b(key[223 : 192]);
+ key2 = l2b(key[191 : 160]);
+ key3 = l2b(key[159 : 128]);
+ key4 = l2b(key[127 : 96]);
+ key5 = l2b(key[95 : 64]);
+ key6 = l2b(key[63 : 32]);
+ key7 = l2b(key[31 : 0]);
+
+ init_state_word[04] = key0;
+ init_state_word[05] = key1;
+ init_state_word[06] = key2;
+ init_state_word[07] = key3;
+ init_state_word[12] = block0_ctr_reg;
+ init_state_word[13] = block1_ctr_reg;
+ init_state_word[14] = l2b(iv[63 : 32]);
+ init_state_word[15] = l2b(iv[31 : 0]);
+
+ if (keylen)
begin
- msb_block_state0 = state0_reg + x0_reg;
- msb_block_state1 = state1_reg + x1_reg;
- msb_block_state2 = state2_reg + x2_reg;
- msb_block_state3 = state3_reg + x3_reg;
- msb_block_state4 = state4_reg + x4_reg;
- msb_block_state5 = state5_reg + x5_reg;
- msb_block_state6 = state6_reg + x6_reg;
- msb_block_state7 = state7_reg + x7_reg;
- msb_block_state8 = state8_reg + x8_reg;
- msb_block_state9 = state9_reg + x9_reg;
- msb_block_state10 = state10_reg + x10_reg;
- msb_block_state11 = state11_reg + x11_reg;
- msb_block_state12 = state12_reg + x12_reg;
- msb_block_state13 = state13_reg + x13_reg;
- msb_block_state14 = state14_reg + x14_reg;
- msb_block_state15 = state15_reg + x15_reg;
-
- lsb_block_state0 = {msb_block_state0[7 : 0],
- msb_block_state0[15 : 8],
- msb_block_state0[23 : 16],
- msb_block_state0[31 : 24]};
-
- lsb_block_state1 = {msb_block_state1[7 : 0],
- msb_block_state1[15 : 8],
- msb_block_state1[23 : 16],
- msb_block_state1[31 : 24]};
-
- lsb_block_state2 = {msb_block_state2[7 : 0],
- msb_block_state2[15 : 8],
- msb_block_state2[23 : 16],
- msb_block_state2[31 : 24]};
-
- lsb_block_state3 = {msb_block_state3[7 : 0],
- msb_block_state3[15 : 8],
- msb_block_state3[23 : 16],
- msb_block_state3[31 : 24]};
-
- lsb_block_state4 = {msb_block_state4[7 : 0],
- msb_block_state4[15 : 8],
- msb_block_state4[23 : 16],
- msb_block_state4[31 : 24]};
-
- lsb_block_state5 = {msb_block_state5[7 : 0],
- msb_block_state5[15 : 8],
- msb_block_state5[23 : 16],
- msb_block_state5[31 : 24]};
-
- lsb_block_state6 = {msb_block_state6[7 : 0],
- msb_block_state6[15 : 8],
- msb_block_state6[23 : 16],
- msb_block_state6[31 : 24]};
-
- lsb_block_state7 = {msb_block_state7[7 : 0],
- msb_block_state7[15 : 8],
- msb_block_state7[23 : 16],
- msb_block_state7[31 : 24]};
-
- lsb_block_state8 = {msb_block_state8[7 : 0],
- msb_block_state8[15 : 8],
- msb_block_state8[23 : 16],
- msb_block_state8[31 : 24]};
-
- lsb_block_state9 = {msb_block_state9[7 : 0],
- msb_block_state9[15 : 8],
- msb_block_state9[23 : 16],
- msb_block_state9[31 : 24]};
-
- lsb_block_state10 = {msb_block_state10[7 : 0],
- msb_block_state10[15 : 8],
- msb_block_state10[23 : 16],
- msb_block_state10[31 : 24]};
-
- lsb_block_state11 = {msb_block_state11[7 : 0],
- msb_block_state11[15 : 8],
- msb_block_state11[23 : 16],
- msb_block_state11[31 : 24]};
-
- lsb_block_state12 = {msb_block_state12[7 : 0],
- msb_block_state12[15 : 8],
- msb_block_state12[23 : 16],
- msb_block_state12[31 : 24]};
-
- lsb_block_state13 = {msb_block_state13[7 : 0],
- msb_block_state13[15 : 8],
- msb_block_state13[23 : 16],
- msb_block_state13[31 : 24]};
-
- lsb_block_state14 = {msb_block_state14[7 : 0],
- msb_block_state14[15 : 8],
- msb_block_state14[23 : 16],
- msb_block_state14[31 : 24]};
-
- lsb_block_state15 = {msb_block_state15[7 : 0],
- msb_block_state15[15 : 8],
- msb_block_state15[23 : 16],
- msb_block_state15[31 : 24]};
-
- lsb_block_state = {lsb_block_state0, lsb_block_state1,
- lsb_block_state2, lsb_block_state3,
- lsb_block_state4, lsb_block_state5,
- lsb_block_state6, lsb_block_state7,
- lsb_block_state8, lsb_block_state9,
- lsb_block_state10, lsb_block_state11,
- lsb_block_state12, lsb_block_state13,
- lsb_block_state14, lsb_block_state15};
-
- data_out_new = data_in_reg ^ lsb_block_state;
- data_out_we = 1;
- end // if (update_output)
- end // data_out_logic
-
-
- //----------------------------------------------------------------
- // sample_parameters
- // Logic (wires) that convert parameter input to appropriate
- // format for processing.
- //----------------------------------------------------------------
- always @*
- begin : sample_parameters
- key0_new = 32'h00000000;
- key1_new = 32'h00000000;
- key2_new = 32'h00000000;
- key3_new = 32'h00000000;
- key4_new = 32'h00000000;
- key5_new = 32'h00000000;
- key6_new = 32'h00000000;
- key7_new = 32'h00000000;
- iv0_new = 32'h00000000;
- iv1_new = 32'h00000000;
- rounds_new = 4'h0;
- keylen_new = 1'b0;
-
- if (sample_params)
+ // 256 bit key.
+ init_state_word[00] = SIGMA0;
+ init_state_word[01] = SIGMA1;
+ init_state_word[02] = SIGMA2;
+ init_state_word[03] = SIGMA3;
+ init_state_word[08] = key4;
+ init_state_word[09] = key5;
+ init_state_word[10] = key6;
+ init_state_word[11] = key7;
+ end
+ else
begin
- key0_new = {key[231 : 224], key[239 : 232],
- key[247 : 240], key[255 : 248]};
- key1_new = {key[199 : 192], key[207 : 200],
- key[215 : 208], key[223 : 216]};
- key2_new = {key[167 : 160], key[175 : 168],
- key[183 : 176], key[191 : 184]};
- key3_new = {key[135 : 128], key[143 : 136],
- key[151 : 144], key[159 : 152]};
- key4_new = {key[103 : 96], key[111 : 104],
- key[119 : 112], key[127 : 120]};
- key5_new = {key[71 : 64], key[79 : 72],
- key[87 : 80], key[95 : 88]};
- key6_new = {key[39 : 32], key[47 : 40],
- key[55 : 48], key[63 : 56]};
- key7_new = {key[7 : 0], key[15 : 8],
- key[23 : 16], key[31 : 24]};
-
- iv0_new = {iv[39 : 32], iv[47 : 40],
- iv[55 : 48], iv[63 : 56]};
- iv1_new = {iv[7 : 0], iv[15 : 8],
- iv[23 : 16], iv[31 : 24]};
-
- // Div by two since we count double rounds.
- rounds_new = rounds[4 : 1];
-
- keylen_new = keylen;
+ // 128 bit key.
+ init_state_word[00] = TAU0;
+ init_state_word[01] = TAU1;
+ init_state_word[02] = TAU2;
+ init_state_word[03] = TAU3;
+ init_state_word[08] = key0;
+ init_state_word[09] = key1;
+ init_state_word[10] = key2;
+ init_state_word[11] = key3;
end
end
@@ -815,260 +374,110 @@ module chacha_core(
//----------------------------------------------------------------
always @*
begin : state_logic
- reg [31 : 0] new_state_word0;
- reg [31 : 0] new_state_word1;
- reg [31 : 0] new_state_word2;
- reg [31 : 0] new_state_word3;
- reg [31 : 0] new_state_word4;
- reg [31 : 0] new_state_word5;
- reg [31 : 0] new_state_word6;
- reg [31 : 0] new_state_word7;
- reg [31 : 0] new_state_word8;
- reg [31 : 0] new_state_word9;
- reg [31 : 0] new_state_word10;
- reg [31 : 0] new_state_word11;
- reg [31 : 0] new_state_word12;
- reg [31 : 0] new_state_word13;
- reg [31 : 0] new_state_word14;
- reg [31 : 0] new_state_word15;
-
- new_state_word0 = 32'h00000000;
- new_state_word1 = 32'h00000000;
- new_state_word2 = 32'h00000000;
- new_state_word3 = 32'h00000000;
- new_state_word4 = 32'h00000000;
- new_state_word5 = 32'h00000000;
- new_state_word6 = 32'h00000000;
- new_state_word7 = 32'h00000000;
- new_state_word8 = 32'h00000000;
- new_state_word9 = 32'h00000000;
- new_state_word10 = 32'h00000000;
- new_state_word11 = 32'h00000000;
- new_state_word12 = 32'h00000000;
- new_state_word13 = 32'h00000000;
- new_state_word14 = 32'h00000000;
- new_state_word15 = 32'h00000000;
-
- x0_new = 32'h00000000;
- x1_new = 32'h00000000;
- x2_new = 32'h00000000;
- x3_new = 32'h00000000;
- x4_new = 32'h00000000;
- x5_new = 32'h00000000;
- x6_new = 32'h00000000;
- x7_new = 32'h00000000;
- x8_new = 32'h00000000;
- x9_new = 32'h00000000;
- x10_new = 32'h00000000;
- x11_new = 32'h00000000;
- x12_new = 32'h00000000;
- x13_new = 32'h00000000;
- x14_new = 32'h00000000;
- x15_new = 32'h00000000;
- x0_we = 0;
- x1_we = 0;
- x2_we = 0;
- x3_we = 0;
- x4_we = 0;
- x5_we = 0;
- x6_we = 0;
- x7_we = 0;
- x8_we = 0;
- x9_we = 0;
- x10_we = 0;
- x11_we = 0;
- x12_we = 0;
- x13_we = 0;
- x14_we = 0;
- x15_we = 0;
-
- state0_new = 32'h00000000;
- state1_new = 32'h00000000;
- state2_new = 32'h00000000;
- state3_new = 32'h00000000;
- state4_new = 32'h00000000;
- state5_new = 32'h00000000;
- state6_new = 32'h00000000;
- state7_new = 32'h00000000;
- state8_new = 32'h00000000;
- state9_new = 32'h00000000;
- state10_new = 32'h00000000;
- state11_new = 32'h00000000;
- state12_new = 32'h00000000;
- state13_new = 32'h00000000;
- state14_new = 32'h00000000;
- state15_new = 32'h00000000;
+ integer i;
+
+ for (i = 0 ; i < 16 ; i = i + 1)
+ state_new[i] = 32'h0;
state_we = 0;
+ qr0_a = 32'h0;
+ qr0_b = 32'h0;
+ qr0_c = 32'h0;
+ qr0_d = 32'h0;
+ qr1_a = 32'h0;
+ qr1_b = 32'h0;
+ qr1_c = 32'h0;
+ qr1_d = 32'h0;
+ qr2_a = 32'h0;
+ qr2_b = 32'h0;
+ qr2_c = 32'h0;
+ qr2_d = 32'h0;
+ qr3_a = 32'h0;
+ qr3_b = 32'h0;
+ qr3_c = 32'h0;
+ qr3_d = 32'h0;
+
if (init_state)
begin
- new_state_word4 = key0_reg;
- new_state_word5 = key1_reg;
- new_state_word6 = key2_reg;
- new_state_word7 = key3_reg;
-
- new_state_word12 = block0_ctr_reg;
- new_state_word13 = block1_ctr_reg;
-
- new_state_word14 = iv0_reg;
- new_state_word15 = iv1_reg;
-
- if (keylen_reg)
- begin
- // 256 bit key.
- new_state_word0 = SIGMA0;
- new_state_word1 = SIGMA1;
- new_state_word2 = SIGMA2;
- new_state_word3 = SIGMA3;
- new_state_word8 = key4_reg;
- new_state_word9 = key5_reg;
- new_state_word10 = key6_reg;
- new_state_word11 = key7_reg;
- end
- else
- begin
- // 128 bit key.
- new_state_word0 = TAU0;
- new_state_word1 = TAU1;
- new_state_word2 = TAU2;
- new_state_word3 = TAU3;
- new_state_word8 = key0_reg;
- new_state_word9 = key1_reg;
- new_state_word10 = key2_reg;
- new_state_word11 = key3_reg;
- end
-
- x0_new = new_state_word0;
- x1_new = new_state_word1;
- x2_new = new_state_word2;
- x3_new = new_state_word3;
- x4_new = new_state_word4;
- x5_new = new_state_word5;
- x6_new = new_state_word6;
- x7_new = new_state_word7;
- x8_new = new_state_word8;
- x9_new = new_state_word9;
- x10_new = new_state_word10;
- x11_new = new_state_word11;
- x12_new = new_state_word12;
- x13_new = new_state_word13;
- x14_new = new_state_word14;
- x15_new = new_state_word15;
- x0_we = 1;
- x1_we = 1;
- x2_we = 1;
- x3_we = 1;
- x4_we = 1;
- x5_we = 1;
- x6_we = 1;
- x7_we = 1;
- x8_we = 1;
- x9_we = 1;
- x10_we = 1;
- x11_we = 1;
- x12_we = 1;
- x13_we = 1;
- x14_we = 1;
- x15_we = 1;
-
- state0_new = new_state_word0;
- state1_new = new_state_word1;
- state2_new = new_state_word2;
- state3_new = new_state_word3;
- state4_new = new_state_word4;
- state5_new = new_state_word5;
- state6_new = new_state_word6;
- state7_new = new_state_word7;
- state8_new = new_state_word8;
- state9_new = new_state_word9;
- state10_new = new_state_word10;
- state11_new = new_state_word11;
- state12_new = new_state_word12;
- state13_new = new_state_word13;
- state14_new = new_state_word14;
- state15_new = new_state_word15;
- state_we = 1;
+ for (i = 0 ; i < 16 ; i = i + 1)
+ state_new[i] = init_state_word[i];
+ state_we = 1;
end // if (init_state)
- else if (update_state)
+ if (update_state)
begin
+ state_we = 1;
case (qr_ctr_reg)
- STATE_QR0:
+ QR0:
begin
- x0_new = qr0_a_prim;
- x4_new = qr0_b_prim;
- x8_new = qr0_c_prim;
- x12_new = qr0_d_prim;
- x0_we = 1;
- x4_we = 1;
- x8_we = 1;
- x12_we = 1;
-
- x1_new = qr1_a_prim;
- x5_new = qr1_b_prim;
- x9_new = qr1_c_prim;
- x13_new = qr1_d_prim;
- x1_we = 1;
- x5_we = 1;
- x9_we = 1;
- x13_we = 1;
-
- x2_new = qr2_a_prim;
- x6_new = qr2_b_prim;
- x10_new = qr2_c_prim;
- x14_new = qr2_d_prim;
- x2_we = 1;
- x6_we = 1;
- x10_we = 1;
- x14_we = 1;
-
- x3_new = qr3_a_prim;
- x7_new = qr3_b_prim;
- x11_new = qr3_c_prim;
- x15_new = qr3_d_prim;
- x3_we = 1;
- x7_we = 1;
- x11_we = 1;
- x15_we = 1;
+ qr0_a = state_reg[00];
+ qr0_b = state_reg[04];
+ qr0_c = state_reg[08];
+ qr0_d = state_reg[12];
+ qr1_a = state_reg[01];
+ qr1_b = state_reg[05];
+ qr1_c = state_reg[09];
+ qr1_d = state_reg[13];
+ qr2_a = state_reg[02];
+ qr2_b = state_reg[06];
+ qr2_c = state_reg[10];
+ qr2_d = state_reg[14];
+ qr3_a = state_reg[03];
+ qr3_b = state_reg[07];
+ qr3_c = state_reg[11];
+ qr3_d = state_reg[15];
+ state_new[00] = qr0_a_prim;
+ state_new[04] = qr0_b_prim;
+ state_new[08] = qr0_c_prim;
+ state_new[12] = qr0_d_prim;
+ state_new[01] = qr1_a_prim;
+ state_new[05] = qr1_b_prim;
+ state_new[09] = qr1_c_prim;
+ state_new[13] = qr1_d_prim;
+ state_new[02] = qr2_a_prim;
+ state_new[06] = qr2_b_prim;
+ state_new[10] = qr2_c_prim;
+ state_new[14] = qr2_d_prim;
+ state_new[03] = qr3_a_prim;
+ state_new[07] = qr3_b_prim;
+ state_new[11] = qr3_c_prim;
+ state_new[15] = qr3_d_prim;
end
- STATE_QR1:
+ QR1:
begin
- x0_new = qr0_a_prim;
- x5_new = qr0_b_prim;
- x10_new = qr0_c_prim;
- x15_new = qr0_d_prim;
- x0_we = 1;
- x5_we = 1;
- x10_we = 1;
- x15_we = 1;
-
- x1_new = qr1_a_prim;
- x6_new = qr1_b_prim;
- x11_new = qr1_c_prim;
- x12_new = qr1_d_prim;
- x1_we = 1;
- x6_we = 1;
- x11_we = 1;
- x12_we = 1;
-
- x2_new = qr2_a_prim;
- x7_new = qr2_b_prim;
- x8_new = qr2_c_prim;
- x13_new = qr2_d_prim;
- x2_we = 1;
- x7_we = 1;
- x8_we = 1;
- x13_we = 1;
-
- x3_new = qr3_a_prim;
- x4_new = qr3_b_prim;
- x9_new = qr3_c_prim;
- x14_new = qr3_d_prim;
- x3_we = 1;
- x4_we = 1;
- x9_we = 1;
- x14_we = 1;
+ qr0_a = state_reg[00];
+ qr0_b = state_reg[05];
+ qr0_c = state_reg[10];
+ qr0_d = state_reg[15];
+ qr1_a = state_reg[01];
+ qr1_b = state_reg[06];
+ qr1_c = state_reg[11];
+ qr1_d = state_reg[12];
+ qr2_a = state_reg[02];
+ qr2_b = state_reg[07];
+ qr2_c = state_reg[08];
+ qr2_d = state_reg[13];
+ qr3_a = state_reg[03];
+ qr3_b = state_reg[04];
+ qr3_c = state_reg[09];
+ qr3_d = state_reg[14];
+ state_new[00] = qr0_a_prim;
+ state_new[05] = qr0_b_prim;
+ state_new[10] = qr0_c_prim;
+ state_new[15] = qr0_d_prim;
+ state_new[01] = qr1_a_prim;
+ state_new[06] = qr1_b_prim;
+ state_new[11] = qr1_c_prim;
+ state_new[12] = qr1_d_prim;
+ state_new[02] = qr2_a_prim;
+ state_new[07] = qr2_b_prim;
+ state_new[08] = qr2_c_prim;
+ state_new[13] = qr2_d_prim;
+ state_new[03] = qr3_a_prim;
+ state_new[04] = qr3_b_prim;
+ state_new[09] = qr3_c_prim;
+ state_new[14] = qr3_d_prim;
end
endcase // case (quarterround_select)
end // if (update_state)
@@ -1076,59 +485,37 @@ module chacha_core(
//----------------------------------------------------------------
- // quarterround_mux
- // Quarterround muxes that selects operands for quarterrounds.
+ // data_out_logic
+ // Final output logic that combines the result from state
+ // update with the input block. This adds a 16 rounds and
+ // a final layer of XOR gates.
+ //
+ // Note that we also remap all the words into LSB format.
//----------------------------------------------------------------
always @*
- begin : quarterround_mux
- case (qr_ctr_reg)
- STATE_QR0:
- begin
- qr0_a = x0_reg;
- qr0_b = x4_reg;
- qr0_c = x8_reg;
- qr0_d = x12_reg;
-
- qr1_a = x1_reg;
- qr1_b = x5_reg;
- qr1_c = x9_reg;
- qr1_d = x13_reg;
-
- qr2_a = x2_reg;
- qr2_b = x6_reg;
- qr2_c = x10_reg;
- qr2_d = x14_reg;
-
- qr3_a = x3_reg;
- qr3_b = x7_reg;
- qr3_c = x11_reg;
- qr3_d = x15_reg;
- end
+ begin : data_out_logic
+ integer i;
+ reg [31 : 0] msb_block_state [0 : 15];
+ reg [31 : 0] lsb_block_state [0 : 15];
+ reg [511 : 0] block_state;
- STATE_QR1:
- begin
- qr0_a = x0_reg;
- qr0_b = x5_reg;
- qr0_c = x10_reg;
- qr0_d = x15_reg;
-
- qr1_a = x1_reg;
- qr1_b = x6_reg;
- qr1_c = x11_reg;
- qr1_d = x12_reg;
-
- qr2_a = x2_reg;
- qr2_b = x7_reg;
- qr2_c = x8_reg;
- qr2_d = x13_reg;
-
- qr3_a = x3_reg;
- qr3_b = x4_reg;
- qr3_c = x9_reg;
- qr3_d = x14_reg;
- end
- endcase // case (quarterround_select)
- end // quarterround_mux
+ for (i = 0 ; i < 16 ; i = i + 1)
+ begin
+ msb_block_state[i] = init_state_word[i] + state_reg[i];
+ lsb_block_state[i] = l2b(msb_block_state[i][31 : 0]);
+ end
+
+ block_state = {lsb_block_state[00], lsb_block_state[01],
+ lsb_block_state[02], lsb_block_state[03],
+ lsb_block_state[04], lsb_block_state[05],
+ lsb_block_state[06], lsb_block_state[07],
+ lsb_block_state[08], lsb_block_state[09],
+ lsb_block_state[10], lsb_block_state[11],
+ lsb_block_state[12], lsb_block_state[13],
+ lsb_block_state[14], lsb_block_state[15]};
+
+ data_out_new = data_in ^ block_state;
+ end // data_out_logic
//----------------------------------------------------------------
@@ -1186,13 +573,12 @@ module chacha_core(
//----------------------------------------------------------------
always @*
begin : block_ctr
- // Defult assignments
- block0_ctr_new = 32'h00000000;
- block1_ctr_new = 32'h00000000;
+ block0_ctr_new = 32'h0;
+ block1_ctr_new = 32'h0;
block0_ctr_we = 0;
block1_ctr_we = 0;
- if (block_ctr_rst)
+ if (block_ctr_set)
begin
block0_ctr_new = ctr[31 : 00];
block1_ctr_new = ctr[63 : 32];
@@ -1223,46 +609,33 @@ module chacha_core(
begin : chacha_ctrl_fsm
init_state = 0;
update_state = 0;
- sample_params = 0;
update_output = 0;
-
qr_ctr_inc = 0;
qr_ctr_rst = 0;
-
dr_ctr_inc = 0;
dr_ctr_rst = 0;
-
block_ctr_inc = 0;
- block_ctr_rst = 0;
-
- data_in_we = 0;
-
+ block_ctr_set = 0;
ready_new = 0;
ready_we = 0;
-
data_out_valid_new = 0;
data_out_valid_we = 0;
-
chacha_ctrl_new = CTRL_IDLE;
chacha_ctrl_we = 0;
-
case (chacha_ctrl_reg)
CTRL_IDLE:
begin
if (init)
begin
+ block_ctr_set = 1;
ready_new = 0;
ready_we = 1;
- data_in_we = 1;
- sample_params = 1;
- block_ctr_rst = 1;
chacha_ctrl_new = CTRL_INIT;
chacha_ctrl_we = 1;
end
end
-
CTRL_INIT:
begin
init_state = 1;
@@ -1272,15 +645,14 @@ module chacha_core(
chacha_ctrl_we = 1;
end
-
CTRL_ROUNDS:
begin
update_state = 1;
qr_ctr_inc = 1;
- if (qr_ctr_reg == STATE_QR1)
+ if (qr_ctr_reg == QR1)
begin
dr_ctr_inc = 1;
- if (dr_ctr_reg == (rounds_reg - 1))
+ if (dr_ctr_reg == (rounds[4 : 1] - 1))
begin
chacha_ctrl_new = CTRL_FINALIZE;
chacha_ctrl_we = 1;
@@ -1288,7 +660,6 @@ module chacha_core(
end
end
-
CTRL_FINALIZE:
begin
ready_new = 1;
@@ -1300,7 +671,6 @@ module chacha_core(
chacha_ctrl_we = 1;
end
-
CTRL_DONE:
begin
if (init)
@@ -1309,9 +679,7 @@ module chacha_core(
ready_we = 1;
data_out_valid_new = 0;
data_out_valid_we = 1;
- data_in_we = 1;
- sample_params = 1;
- block_ctr_rst = 1;
+ block_ctr_set = 1;
chacha_ctrl_new = CTRL_INIT;
chacha_ctrl_we = 1;
end
@@ -1321,14 +689,12 @@ module chacha_core(
ready_we = 1;
data_out_valid_new = 0;
data_out_valid_we = 1;
- data_in_we = 1;
block_ctr_inc = 1;
chacha_ctrl_new = CTRL_INIT;
chacha_ctrl_we = 1;
end
end
-
default:
begin