diff options
author | Joachim StroĢmbergson <joachim@secworks.se> | 2016-12-28 10:55:30 +0100 |
---|---|---|
committer | Joachim StroĢmbergson <joachim@secworks.se> | 2016-12-28 10:55:30 +0100 |
commit | f4731e83511a3b35f05e4a6222ba27af5920fcd8 (patch) | |
tree | f5efaf2985ffa4a4943cd7e0d68f2d09f1d801f4 /src/rtl | |
parent | 549b75a635817ce263c368c9b8b5b0b07f90ec21 (diff) |
(1) Cleanup of top an core code with no functional changes. The code is now much more compact. (2) Fixed how the QR modules are used in parallel to actually work in parallel. This increases performance. (3) Changed registers into arrays and cleaned up how operands and data are accessed. This decreased total design size.cleanup
Diffstat (limited to 'src/rtl')
-rw-r--r-- | src/rtl/chacha.v | 793 | ||||
-rw-r--r-- | src/rtl/chacha_core.v | 1176 |
2 files changed, 364 insertions, 1605 deletions
diff --git a/src/rtl/chacha.v b/src/rtl/chacha.v index 497f51d..bc891f7 100644 --- a/src/rtl/chacha.v +++ b/src/rtl/chacha.v @@ -38,15 +38,10 @@ //====================================================================== module chacha( - // Clock and reset. input wire clk, input wire reset_n, - - // Control. input wire cs, input wire we, - - // Data ports. input wire [7 : 0] address, input wire [31 : 0] write_data, output wire [31 : 0] read_data, @@ -74,156 +69,51 @@ module chacha( localparam ROUNDS_LOW_BIT = 0; localparam ADDR_KEY0 = 8'h10; - localparam ADDR_KEY1 = 8'h11; - localparam ADDR_KEY2 = 8'h12; - localparam ADDR_KEY3 = 8'h13; - localparam ADDR_KEY4 = 8'h14; - localparam ADDR_KEY5 = 8'h15; - localparam ADDR_KEY6 = 8'h16; localparam ADDR_KEY7 = 8'h17; localparam ADDR_IV0 = 8'h20; localparam ADDR_IV1 = 8'h21; localparam ADDR_DATA_IN0 = 8'h40; - localparam ADDR_DATA_IN1 = 8'h41; - localparam ADDR_DATA_IN2 = 8'h42; - localparam ADDR_DATA_IN3 = 8'h43; - localparam ADDR_DATA_IN4 = 8'h44; - localparam ADDR_DATA_IN5 = 8'h45; - localparam ADDR_DATA_IN6 = 8'h46; - localparam ADDR_DATA_IN7 = 8'h47; - localparam ADDR_DATA_IN8 = 8'h48; - localparam ADDR_DATA_IN9 = 8'h49; - localparam ADDR_DATA_IN10 = 8'h4a; - localparam ADDR_DATA_IN11 = 8'h4b; - localparam ADDR_DATA_IN12 = 8'h4c; - localparam ADDR_DATA_IN13 = 8'h4d; - localparam ADDR_DATA_IN14 = 8'h4e; localparam ADDR_DATA_IN15 = 8'h4f; localparam ADDR_DATA_OUT0 = 8'h80; - localparam ADDR_DATA_OUT1 = 8'h81; - localparam ADDR_DATA_OUT2 = 8'h82; - localparam ADDR_DATA_OUT3 = 8'h83; - localparam ADDR_DATA_OUT4 = 8'h84; - localparam ADDR_DATA_OUT5 = 8'h85; - localparam ADDR_DATA_OUT6 = 8'h86; - localparam ADDR_DATA_OUT7 = 8'h87; - localparam ADDR_DATA_OUT8 = 8'h88; - localparam ADDR_DATA_OUT9 = 8'h89; - localparam ADDR_DATA_OUT10 = 8'h8a; - localparam ADDR_DATA_OUT11 = 8'h8b; - localparam ADDR_DATA_OUT12 = 8'h8c; - localparam ADDR_DATA_OUT13 = 8'h8d; - localparam ADDR_DATA_OUT14 = 8'h8e; localparam ADDR_DATA_OUT15 = 8'h8f; - localparam DEFAULT_CTR_INIT = 64'h0000000000000000; + localparam CORE_NAME0 = 32'h63686163; // "chac" + localparam CORE_NAME1 = 32'h68612020; // "ha " + localparam CORE_VERSION = 32'h302e3831; // "0.81" - localparam CORE_NAME0 = 32'h63686163; // "chac" - localparam CORE_NAME1 = 32'h68612020; // "ha " - localparam CORE_VERSION = 32'h302e3830; // "0.80" + localparam DEFAULT_CTR_INIT = 64'h0; //---------------------------------------------------------------- // Registers including update variables and write enable. //---------------------------------------------------------------- - reg init_reg; - reg next_reg; - reg ctrl_we; - - reg ready_reg; - - reg keylen_reg; - reg keylen_we; - - reg [4 : 0] rounds_reg; - reg rounds_we; - - reg data_out_valid_reg; - - reg [31 : 0] key0_reg; - reg key0_we; - reg [31 : 0] key1_reg; - reg key1_we; - reg [31 : 0] key2_reg; - reg key2_we; - reg [31 : 0] key3_reg; - reg key3_we; - reg [31 : 0] key4_reg; - reg key4_we; - reg [31 : 0] key5_reg; - reg key5_we; - reg [31 : 0] key6_reg; - reg key6_we; - reg [31 : 0] key7_reg; - reg key7_we; - - reg [31 : 0] iv0_reg; - reg iv0_we; - reg [31 : 0] iv1_reg; - reg iv1_we; - - reg [31 : 0] data_in0_reg; - reg data_in0_we; - reg [31 : 0] data_in1_reg; - reg data_in1_we; - reg [31 : 0] data_in2_reg; - reg data_in2_we; - reg [31 : 0] data_in3_reg; - reg data_in3_we; - reg [31 : 0] data_in4_reg; - reg data_in4_we; - reg [31 : 0] data_in5_reg; - reg data_in5_we; - reg [31 : 0] data_in6_reg; - reg data_in6_we; - reg [31 : 0] data_in7_reg; - reg data_in7_we; - reg [31 : 0] data_in8_reg; - reg data_in8_we; - reg [31 : 0] data_in9_reg; - reg data_in9_we; - reg [31 : 0] data_in10_reg; - reg data_in10_we; - reg [31 : 0] data_in11_reg; - reg data_in11_we; - reg [31 : 0] data_in12_reg; - reg data_in12_we; - reg [31 : 0] data_in13_reg; - reg data_in13_we; - reg [31 : 0] data_in14_reg; - reg data_in14_we; - reg [31 : 0] data_in15_reg; - reg data_in15_we; - - reg [31 : 0] data_out0_reg; - reg [31 : 0] data_out1_reg; - reg [31 : 0] data_out2_reg; - reg [31 : 0] data_out3_reg; - reg [31 : 0] data_out4_reg; - reg [31 : 0] data_out5_reg; - reg [31 : 0] data_out6_reg; - reg [31 : 0] data_out7_reg; - reg [31 : 0] data_out8_reg; - reg [31 : 0] data_out9_reg; - reg [31 : 0] data_out10_reg; - reg [31 : 0] data_out11_reg; - reg [31 : 0] data_out12_reg; - reg [31 : 0] data_out13_reg; - reg [31 : 0] data_out14_reg; - reg [31 : 0] data_out15_reg; + reg init_reg; + reg next_reg; + reg ctrl_we; + + reg keylen_reg; + reg keylen_we; + + reg [4 : 0] rounds_reg; + reg rounds_we; + + reg [31 : 0] key_reg [0 : 7]; + reg key_we; + + reg [31 : 0] iv_reg[0 : 1]; + reg iv_we; + + reg [31 : 0] data_in_reg [0 : 15]; + reg data_in_we; //---------------------------------------------------------------- // Wires. //---------------------------------------------------------------- - wire core_init; - wire core_next; wire [255 : 0] core_key; - wire core_keylen; - wire [4 : 0] core_rounds; wire [63 : 0] core_iv; wire core_ready; wire [511 : 0] core_data_in; @@ -231,32 +121,24 @@ module chacha( wire core_data_out_valid; reg [31 : 0] tmp_read_data; - reg tmp_error; //---------------------------------------------------------------- // Concurrent connectivity for ports etc. //---------------------------------------------------------------- - assign core_init = init_reg; - - assign core_next = next_reg; - - assign core_keylen = keylen_reg; + assign core_key = {key_reg[0], key_reg[1], key_reg[2], key_reg[3], + key_reg[4], key_reg[5], key_reg[6], key_reg[7]}; - assign core_rounds = rounds_reg; + assign core_iv = {iv_reg[0], iv_reg[1]}; - assign core_key = {key0_reg, key1_reg, key2_reg, key3_reg, - key4_reg, key5_reg, key6_reg, key7_reg}; + assign core_data_in = {data_in_reg[00], data_in_reg[01], data_in_reg[02], data_in_reg[03], + data_in_reg[04], data_in_reg[05], data_in_reg[06], data_in_reg[07], + data_in_reg[08], data_in_reg[09], data_in_reg[10], data_in_reg[11], + data_in_reg[12], data_in_reg[13], data_in_reg[14], data_in_reg[15]}; - assign core_iv = {iv0_reg, iv1_reg}; + assign read_data = tmp_read_data; - assign core_data_in = {data_in0_reg, data_in1_reg, data_in2_reg, data_in3_reg, - data_in4_reg, data_in5_reg, data_in6_reg, data_in7_reg, - data_in8_reg, data_in9_reg, data_in10_reg, data_in11_reg, - data_in12_reg, data_in13_reg, data_in14_reg, data_in15_reg}; - - assign read_data = tmp_read_data; - assign error = tmp_error; + assign error = 1'b0; //---------------------------------------------------------------- @@ -265,20 +147,15 @@ module chacha( chacha_core core ( .clk(clk), .reset_n(reset_n), - - .init(core_init), - .next(core_next), - + .init(init_reg), + .next(next_reg), .key(core_key), - .keylen(core_keylen), + .keylen(keylen_reg), .iv(core_iv), .ctr(DEFAULT_CTR_INIT), - .rounds(core_rounds), - + .rounds(rounds_reg), .data_in(core_data_in), - .ready(core_ready), - .data_out(core_data_out), .data_out_valid(core_data_out_valid) ); @@ -286,72 +163,31 @@ module chacha( //---------------------------------------------------------------- // reg_update + // // Update functionality for all registers in the core. // All registers are positive edge triggered with asynchronous - // active low reset. + // active low reset. All registers have write enable. //---------------------------------------------------------------- - always @ (posedge clk or negedge reset_n) - begin + always @ (posedge clk) + begin : reg_update + integer i; if (!reset_n) begin - init_reg <= 0; - next_reg <= 0; - ready_reg <= 0; - keylen_reg <= 0; - rounds_reg <= 5'b00000; - data_out_valid_reg <= 0; - - key0_reg <= 32'h00000000; - key1_reg <= 32'h00000000; - key2_reg <= 32'h00000000; - key3_reg <= 32'h00000000; - key4_reg <= 32'h00000000; - key5_reg <= 32'h00000000; - key6_reg <= 32'h00000000; - key7_reg <= 32'h00000000; - - iv0_reg <= 32'h00000000; - iv1_reg <= 32'h00000000; - - data_in0_reg <= 32'h00000000; - data_in1_reg <= 32'h00000000; - data_in2_reg <= 32'h00000000; - data_in3_reg <= 32'h00000000; - data_in4_reg <= 32'h00000000; - data_in5_reg <= 32'h00000000; - data_in6_reg <= 32'h00000000; - data_in7_reg <= 32'h00000000; - data_in8_reg <= 32'h00000000; - data_in9_reg <= 32'h00000000; - data_in10_reg <= 32'h00000000; - data_in11_reg <= 32'h00000000; - data_in12_reg <= 32'h00000000; - data_in13_reg <= 32'h00000000; - data_in14_reg <= 32'h00000000; - data_in15_reg <= 32'h00000000; - - data_out0_reg <= 32'h00000000; - data_out1_reg <= 32'h00000000; - data_out2_reg <= 32'h00000000; - data_out3_reg <= 32'h00000000; - data_out4_reg <= 32'h00000000; - data_out5_reg <= 32'h00000000; - data_out6_reg <= 32'h00000000; - data_out7_reg <= 32'h00000000; - data_out8_reg <= 32'h00000000; - data_out9_reg <= 32'h00000000; - data_out10_reg <= 32'h00000000; - data_out11_reg <= 32'h00000000; - data_out12_reg <= 32'h00000000; - data_out13_reg <= 32'h00000000; - data_out14_reg <= 32'h00000000; - data_out15_reg <= 32'h00000000; + init_reg <= 0; + next_reg <= 0; + keylen_reg <= 0; + rounds_reg <= 5'h0; + iv_reg[0] <= 32'h0; + iv_reg[1] <= 32'h0; + + for (i = 0 ; i < 8 ; i = i + 1) + key_reg[i] <= 32'h0; + + for (i = 0 ; i < 16 ; i = i + 1) + data_in_reg[i] <= 32'h0; end else begin - ready_reg <= core_ready; - data_out_valid_reg <= core_data_out_valid; - if (ctrl_we) begin init_reg <= write_data[CTRL_INIT_BIT]; @@ -359,164 +195,19 @@ module chacha( end if (keylen_we) - begin - keylen_reg <= write_data[KEYLEN_BIT]; - end + keylen_reg <= write_data[KEYLEN_BIT]; if (rounds_we) - begin - rounds_reg <= write_data[ROUNDS_HIGH_BIT : ROUNDS_LOW_BIT]; - end + rounds_reg <= write_data[ROUNDS_HIGH_BIT : ROUNDS_LOW_BIT]; - if (key0_we) - begin - key0_reg <= write_data; - end + if (key_we) + key_reg[address[2 : 0]] <= write_data; - if (key1_we) - begin - key1_reg <= write_data; - end - - if (key2_we) - begin - key2_reg <= write_data; - end - - if (key3_we) - begin - key3_reg <= write_data; - end - - if (key4_we) - begin - key4_reg <= write_data; - end - - if (key5_we) - begin - key5_reg <= write_data; - end - - if (key6_we) - begin - key6_reg <= write_data; - end - - if (key7_we) - begin - key7_reg <= write_data; - end - - if (iv0_we) - begin - iv0_reg <= write_data; - end - - if (iv1_we) - begin - iv1_reg <= write_data; - end - - if (data_in0_we) - begin - data_in0_reg <= write_data; - end - - if (data_in1_we) - begin - data_in1_reg <= write_data; - end - - if (data_in2_we) - begin - data_in2_reg <= write_data; - end - - if (data_in3_we) - begin - data_in3_reg <= write_data; - end - - if (data_in4_we) - begin - data_in4_reg <= write_data; - end - - if (data_in5_we) - begin - data_in5_reg <= write_data; - end - - if (data_in6_we) - begin - data_in6_reg <= write_data; - end + if (iv_we) + iv_reg[address[0]] <= write_data; - if (data_in7_we) - begin - data_in7_reg <= write_data; - end - - if (data_in8_we) - begin - data_in8_reg <= write_data; - end - - if (data_in9_we) - begin - data_in9_reg <= write_data; - end - - if (data_in10_we) - begin - data_in10_reg <= write_data; - end - - if (data_in11_we) - begin - data_in11_reg <= write_data; - end - - if (data_in12_we) - begin - data_in12_reg <= write_data; - end - - if (data_in13_we) - begin - data_in13_reg <= write_data; - end - - if (data_in14_we) - begin - data_in14_reg <= write_data; - end - - if (data_in15_we) - begin - data_in15_reg <= write_data; - end - - if (core_data_out_valid) - begin - data_out0_reg <= core_data_out[511 : 480]; - data_out1_reg <= core_data_out[479 : 448]; - data_out2_reg <= core_data_out[447 : 416]; - data_out3_reg <= core_data_out[415 : 384]; - data_out4_reg <= core_data_out[383 : 352]; - data_out5_reg <= core_data_out[351 : 320]; - data_out6_reg <= core_data_out[319 : 288]; - data_out7_reg <= core_data_out[287 : 256]; - data_out8_reg <= core_data_out[255 : 224]; - data_out9_reg <= core_data_out[223 : 192]; - data_out10_reg <= core_data_out[191 : 160]; - data_out11_reg <= core_data_out[159 : 128]; - data_out12_reg <= core_data_out[127 : 96]; - data_out13_reg <= core_data_out[95 : 64]; - data_out14_reg <= core_data_out[63 : 32]; - data_out15_reg <= core_data_out[31 : 0]; - end + if (data_in_we) + data_in_reg[address[3 : 0]] <= write_data; end end // reg_update @@ -526,356 +217,58 @@ module chacha( //---------------------------------------------------------------- always @* begin : addr_decoder - ctrl_we = 0; - keylen_we = 0; - rounds_we = 0; - - key0_we = 0; - key1_we = 0; - key2_we = 0; - key3_we = 0; - key4_we = 0; - key5_we = 0; - key6_we = 0; - key7_we = 0; - - iv0_we = 0; - iv1_we = 0; - - data_in0_we = 0; - data_in1_we = 0; - data_in2_we = 0; - data_in3_we = 0; - data_in4_we = 0; - data_in5_we = 0; - data_in6_we = 0; - data_in7_we = 0; - data_in8_we = 0; - data_in9_we = 0; - data_in10_we = 0; - data_in11_we = 0; - data_in12_we = 0; - data_in13_we = 0; - data_in14_we = 0; - data_in15_we = 0; - - tmp_read_data = 32'h00000000; - tmp_error = 0; + ctrl_we = 0; + keylen_we = 0; + rounds_we = 0; + key_we = 0; + iv_we = 0; + data_in_we = 0; + tmp_read_data = 32'h0; if (cs) begin if (we) begin - case (address) - ADDR_CTRL: - begin - ctrl_we = 1; - end - - ADDR_KEYLEN: - begin - keylen_we = 1; - end - - ADDR_ROUNDS: - begin - rounds_we = 1; - end - - ADDR_KEY0: - begin - key0_we = 1; - end - - ADDR_KEY1: - begin - key1_we = 1; - end - - ADDR_KEY2: - begin - key2_we = 1; - end - - ADDR_KEY3: - begin - key3_we = 1; - end - - ADDR_KEY4: - begin - key4_we = 1; - end - - ADDR_KEY5: - begin - key5_we = 1; - end - - ADDR_KEY6: - begin - key6_we = 1; - end - - ADDR_KEY7: - begin - key7_we = 1; - end - - ADDR_IV0: - begin - iv0_we = 1; - end - - ADDR_IV1: - begin - iv1_we = 1; - end + if (address == ADDR_CTRL) + ctrl_we = 1; - ADDR_DATA_IN0: - begin - data_in0_we = 1; - end - - ADDR_DATA_IN1: - begin - data_in1_we = 1; - end - - ADDR_DATA_IN2: - begin - data_in2_we = 1; - end - - ADDR_DATA_IN3: - begin - data_in3_we = 1; - end + if (address == ADDR_KEYLEN) + keylen_we = 1; - ADDR_DATA_IN4: - begin - data_in4_we = 1; - end + if (address == ADDR_ROUNDS) + rounds_we = 1; - ADDR_DATA_IN5: - begin - data_in5_we = 1; - end - - ADDR_DATA_IN6: - begin - data_in6_we = 1; - end + if ((address >= ADDR_KEY0) && (address <= ADDR_KEY7)) + key_we = 1; - ADDR_DATA_IN7: - begin - data_in7_we = 1; - end - - ADDR_DATA_IN8: - begin - data_in8_we = 1; - end - - ADDR_DATA_IN9: - begin - data_in9_we = 1; - end - - ADDR_DATA_IN10: - begin - data_in10_we = 1; - end - - ADDR_DATA_IN11: - begin - data_in11_we = 1; - end - - ADDR_DATA_IN12: - begin - data_in12_we = 1; - end + if ((address >= ADDR_IV0) && (address <= ADDR_IV1)) + iv_we = 1; - ADDR_DATA_IN13: - begin - data_in13_we = 1; - end - - ADDR_DATA_IN14: - begin - data_in14_we = 1; - end - - ADDR_DATA_IN15: - begin - data_in15_we = 1; - end - - default: - begin - tmp_error = 1; - end - endcase // case (address) + if ((address >= ADDR_DATA_IN0) && (address <= ADDR_DATA_IN15)) + data_in_we = 1; end // if (we) else begin - case (address) - ADDR_CTRL: - begin - tmp_read_data = {28'h0000000, 2'b00, next_reg, init_reg}; - end - - ADDR_STATUS: - begin - tmp_read_data = {28'h0000000, 2'b00, - {data_out_valid_reg, ready_reg}}; - end + if ((address >= ADDR_KEY0) && (address <= ADDR_KEY7)) + tmp_read_data = key_reg[address[2 : 0]]; - ADDR_KEYLEN: - begin - tmp_read_data = {28'h0000000, 3'b000, keylen_reg}; - end + if ((address >= ADDR_DATA_OUT0) && (address <= ADDR_DATA_OUT15)) + tmp_read_data = core_data_out[(15 - (address - ADDR_DATA_OUT0)) * 32 +: 32]; - ADDR_ROUNDS: - begin - tmp_read_data = {24'h000000, 3'b000, rounds_reg}; - end - - ADDR_KEY0: - begin - tmp_read_data = key0_reg; - end - - ADDR_KEY1: - begin - tmp_read_data = key1_reg; - end - - ADDR_KEY2: - begin - tmp_read_data = key2_reg; - end - - ADDR_KEY3: - begin - tmp_read_data = key3_reg; - end - - ADDR_KEY4: - begin - tmp_read_data = key4_reg; - end - - ADDR_KEY5: - begin - tmp_read_data = key5_reg; - end - - ADDR_KEY6: - begin - tmp_read_data = key6_reg; - end - - ADDR_KEY7: - begin - tmp_read_data = key7_reg; - end - - ADDR_IV0: - begin - tmp_read_data = iv0_reg; - end - - ADDR_IV1: - begin - tmp_read_data = iv1_reg; - end - - ADDR_DATA_OUT0: - begin - tmp_read_data = data_out0_reg; - end - - ADDR_DATA_OUT1: - begin - tmp_read_data = data_out1_reg; - end - - ADDR_DATA_OUT2: - begin - tmp_read_data = data_out2_reg; - end - - ADDR_DATA_OUT3: - begin - tmp_read_data = data_out3_reg; - end - - ADDR_DATA_OUT4: - begin - tmp_read_data = data_out4_reg; - end - - ADDR_DATA_OUT5: - begin - tmp_read_data = data_out5_reg; - end - - ADDR_DATA_OUT6: - begin - tmp_read_data = data_out6_reg; - end - - ADDR_DATA_OUT7: - begin - tmp_read_data = data_out7_reg; - end - - ADDR_DATA_OUT8: - begin - tmp_read_data = data_out8_reg; - end - - ADDR_DATA_OUT9: - begin - tmp_read_data = data_out9_reg; - end - - ADDR_DATA_OUT10: - begin - tmp_read_data = data_out10_reg; - end - - ADDR_DATA_OUT11: - begin - tmp_read_data = data_out11_reg; - end - - ADDR_DATA_OUT12: - begin - tmp_read_data = data_out12_reg; - end - - ADDR_DATA_OUT13: - begin - tmp_read_data = data_out13_reg; - end - - ADDR_DATA_OUT14: - begin - tmp_read_data = data_out14_reg; - end - - ADDR_DATA_OUT15: - begin - tmp_read_data = data_out15_reg; - end + case (address) + ADDR_NAME0: tmp_read_data = CORE_NAME0; + ADDR_NAME1: tmp_read_data = CORE_NAME1; + ADDR_VERSION: tmp_read_data = CORE_VERSION; + ADDR_CTRL: tmp_read_data = {30'h0, next_reg, init_reg}; + ADDR_STATUS: tmp_read_data = {30'h0, core_data_out_valid, core_ready}; + ADDR_KEYLEN: tmp_read_data = {31'h0, keylen_reg}; + ADDR_ROUNDS: tmp_read_data = {27'h0, rounds_reg}; + ADDR_IV0: tmp_read_data = iv_reg[0]; + ADDR_IV1: tmp_read_data = iv_reg[1]; default: begin - tmp_error = 1; end endcase // case (address) end diff --git a/src/rtl/chacha_core.v b/src/rtl/chacha_core.v index 0e1158b..5f496a4 100644 --- a/src/rtl/chacha_core.v +++ b/src/rtl/chacha_core.v @@ -7,7 +7,7 @@ // // // Author: Joachim Strombergson -// Copyright (c) 2014, NORDUnet A/S All rights reserved. +// Copyright (c) 2011, NORDUnet A/S All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -63,202 +63,89 @@ module chacha_core( // Internal constant and parameter definitions. //---------------------------------------------------------------- // Datapath quartterround states names. - parameter STATE_QR0 = 1'b0; - parameter STATE_QR1 = 1'b1; + localparam QR0 = 0; + localparam QR1 = 1; - parameter NUM_ROUNDS = 4'h8; + localparam NUM_ROUNDS = 4'h8; - parameter TAU0 = 32'h61707865; - parameter TAU1 = 32'h3120646e; - parameter TAU2 = 32'h79622d36; - parameter TAU3 = 32'h6b206574; + localparam TAU0 = 32'h61707865; + localparam TAU1 = 32'h3120646e; + localparam TAU2 = 32'h79622d36; + localparam TAU3 = 32'h6b206574; - parameter SIGMA0 = 32'h61707865; - parameter SIGMA1 = 32'h3320646e; - parameter SIGMA2 = 32'h79622d32; - parameter SIGMA3 = 32'h6b206574; + localparam SIGMA0 = 32'h61707865; + localparam SIGMA1 = 32'h3320646e; + localparam SIGMA2 = 32'h79622d32; + localparam SIGMA3 = 32'h6b206574; - parameter CTRL_IDLE = 3'h0; - parameter CTRL_INIT = 3'h1; - parameter CTRL_ROUNDS = 3'h2; - parameter CTRL_FINALIZE = 3'h3; - parameter CTRL_DONE = 3'h4; + localparam CTRL_IDLE = 3'h0; + localparam CTRL_INIT = 3'h1; + localparam CTRL_ROUNDS = 3'h2; + localparam CTRL_FINALIZE = 3'h3; + localparam CTRL_DONE = 3'h4; + + + //---------------------------------------------------------------- + // l2b() + // + // Swap bytes from little to big endian byte order. + //---------------------------------------------------------------- + function [31 : 0] l2b(input [31 : 0] op); + begin + l2b = {op[7 : 0], op[15 : 8], op[23 : 16], op[31 : 24]}; + end + endfunction // b2l //---------------------------------------------------------------- // Registers including update variables and write enable. //---------------------------------------------------------------- - reg [31 : 0] key0_reg; - reg [31 : 0] key0_new; - reg [31 : 0] key1_reg; - reg [31 : 0] key1_new; - reg [31 : 0] key2_reg; - reg [31 : 0] key2_new; - reg [31 : 0] key3_reg; - reg [31 : 0] key3_new; - reg [31 : 0] key4_reg; - reg [31 : 0] key4_new; - reg [31 : 0] key5_reg; - reg [31 : 0] key5_new; - reg [31 : 0] key6_reg; - reg [31 : 0] key6_new; - reg [31 : 0] key7_reg; - reg [31 : 0] key7_new; - - reg keylen_reg; - reg keylen_new; - - reg [31 : 0] iv0_reg; - reg [31 : 0] iv0_new; - reg [31 : 0] iv1_reg; - reg [31 : 0] iv1_new; - - reg [31 : 0] state0_reg; - reg [31 : 0] state0_new; - reg [31 : 0] state1_reg; - reg [31 : 0] state1_new; - reg [31 : 0] state2_reg; - reg [31 : 0] state2_new; - reg [31 : 0] state3_reg; - reg [31 : 0] state3_new; - reg [31 : 0] state4_reg; - reg [31 : 0] state4_new; - reg [31 : 0] state5_reg; - reg [31 : 0] state5_new; - reg [31 : 0] state6_reg; - reg [31 : 0] state6_new; - reg [31 : 0] state7_reg; - reg [31 : 0] state7_new; - reg [31 : 0] state8_reg; - reg [31 : 0] state8_new; - reg [31 : 0] state9_reg; - reg [31 : 0] state9_new; - reg [31 : 0] state10_reg; - reg [31 : 0] state10_new; - reg [31 : 0] state11_reg; - reg [31 : 0] state11_new; - reg [31 : 0] state12_reg; - reg [31 : 0] state12_new; - reg [31 : 0] state13_reg; - reg [31 : 0] state13_new; - reg [31 : 0] state14_reg; - reg [31 : 0] state14_new; - reg [31 : 0] state15_reg; - reg [31 : 0] state15_new; - reg state_we; - - reg [31 : 0] x0_reg; - reg [31 : 0] x0_new; - reg x0_we; - - reg [31 : 0] x1_reg; - reg [31 : 0] x1_new; - reg x1_we; - - reg [31 : 0] x2_reg; - reg [31 : 0] x2_new; - reg x2_we; - - reg [31 : 0] x3_reg; - reg [31 : 0] x3_new; - reg x3_we; - - reg [31 : 0] x4_reg; - reg [31 : 0] x4_new; - reg x4_we; - - reg [31 : 0] x5_reg; - reg [31 : 0] x5_new; - reg x5_we; - - reg [31 : 0] x6_reg; - reg [31 : 0] x6_new; - reg x6_we; - - reg [31 : 0] x7_reg; - reg [31 : 0] x7_new; - reg x7_we; - - reg [31 : 0] x8_reg; - reg [31 : 0] x8_new; - reg x8_we; - - reg [31 : 0] x9_reg; - reg [31 : 0] x9_new; - reg x9_we; - - reg [31 : 0] x10_reg; - reg [31 : 0] x10_new; - reg x10_we; - - reg [31 : 0] x11_reg; - reg [31 : 0] x11_new; - reg x11_we; - - reg [31 : 0] x12_reg; - reg [31 : 0] x12_new; - reg x12_we; - - reg [31 : 0] x13_reg; - reg [31 : 0] x13_new; - reg x13_we; - - reg [31 : 0] x14_reg; - reg [31 : 0] x14_new; - reg x14_we; - - reg [31 : 0] x15_reg; - reg [31 : 0] x15_new; - reg x15_we; - - reg [3 : 0] rounds_reg; - reg [3 : 0] rounds_new; - - reg [511 : 0] data_in_reg; - reg data_in_we; + reg [31 : 0] state_reg [0 : 15]; + reg [31 : 0] state_new [0 : 15]; + reg state_we; reg [511 : 0] data_out_reg; reg [511 : 0] data_out_new; - reg data_out_we; - - reg data_out_valid_reg; - reg data_out_valid_new; - reg data_out_valid_we; - - reg ready_reg; - reg ready_new; - reg ready_we; - - reg qr_ctr_reg; - reg qr_ctr_new; - reg qr_ctr_we; - reg qr_ctr_inc; - reg qr_ctr_rst; - - reg [3 : 0] dr_ctr_reg; - reg [3 : 0] dr_ctr_new; - reg dr_ctr_we; - reg dr_ctr_inc; - reg dr_ctr_rst; - - reg [31 : 0] block0_ctr_reg; - reg [31 : 0] block0_ctr_new; - reg block0_ctr_we; - reg [31 : 0] block1_ctr_reg; - reg [31 : 0] block1_ctr_new; - reg block1_ctr_we; - reg block_ctr_inc; - reg block_ctr_rst; - - reg [2 : 0] chacha_ctrl_reg; - reg [2 : 0] chacha_ctrl_new; - reg chacha_ctrl_we; + + reg data_out_valid_reg; + reg data_out_valid_new; + reg data_out_valid_we; + + reg qr_ctr_reg; + reg qr_ctr_new; + reg qr_ctr_we; + reg qr_ctr_inc; + reg qr_ctr_rst; + + reg [3 : 0] dr_ctr_reg; + reg [3 : 0] dr_ctr_new; + reg dr_ctr_we; + reg dr_ctr_inc; + reg dr_ctr_rst; + + reg [31 : 0] block0_ctr_reg; + reg [31 : 0] block0_ctr_new; + reg block0_ctr_we; + reg [31 : 0] block1_ctr_reg; + reg [31 : 0] block1_ctr_new; + reg block1_ctr_we; + reg block_ctr_inc; + reg block_ctr_set; + + reg ready_reg; + reg ready_new; + reg ready_we; + + reg [2 : 0] chacha_ctrl_reg; + reg [2 : 0] chacha_ctrl_new; + reg chacha_ctrl_we; //---------------------------------------------------------------- // Wires. //---------------------------------------------------------------- - reg sample_params; + reg [31 : 0] init_state_word [0 : 15]; + reg init_state; reg update_state; reg update_output; @@ -356,455 +243,127 @@ module chacha_core( // Concurrent connectivity for ports etc. //---------------------------------------------------------------- assign data_out = data_out_reg; - assign data_out_valid = data_out_valid_reg; - assign ready = ready_reg; - //---------------------------------------------------------------- // reg_update // // Update functionality for all registers in the core. - // All registers are positive edge triggered with asynchronous + // All registers are positive edge triggered with synchronous // active low reset. All registers have write enable. //---------------------------------------------------------------- - always @ (posedge clk or negedge reset_n) + always @ (posedge clk) begin : reg_update + integer i; + if (!reset_n) begin - key0_reg <= 32'h00000000; - key1_reg <= 32'h00000000; - key2_reg <= 32'h00000000; - key3_reg <= 32'h00000000; - key4_reg <= 32'h00000000; - key5_reg <= 32'h00000000; - key6_reg <= 32'h00000000; - key7_reg <= 32'h00000000; - iv0_reg <= 32'h00000000; - iv1_reg <= 32'h00000000; - state0_reg <= 32'h00000000; - state1_reg <= 32'h00000000; - state2_reg <= 32'h00000000; - state3_reg <= 32'h00000000; - state4_reg <= 32'h00000000; - state5_reg <= 32'h00000000; - state6_reg <= 32'h00000000; - state7_reg <= 32'h00000000; - state8_reg <= 32'h00000000; - state9_reg <= 32'h00000000; - state10_reg <= 32'h00000000; - state11_reg <= 32'h00000000; - state12_reg <= 32'h00000000; - state13_reg <= 32'h00000000; - state14_reg <= 32'h00000000; - state15_reg <= 32'h00000000; - x0_reg <= 32'h00000000; - x1_reg <= 32'h00000000; - x2_reg <= 32'h00000000; - x3_reg <= 32'h00000000; - x4_reg <= 32'h00000000; - x5_reg <= 32'h00000000; - x6_reg <= 32'h00000000; - x7_reg <= 32'h00000000; - x8_reg <= 32'h00000000; - x9_reg <= 32'h00000000; - x10_reg <= 32'h00000000; - x11_reg <= 32'h00000000; - x12_reg <= 32'h00000000; - x13_reg <= 32'h00000000; - x14_reg <= 32'h00000000; - x15_reg <= 32'h00000000; - data_in_reg <= 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000; - data_out_reg <= 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000; - rounds_reg <= 4'h0; - ready_reg <= 1; + for (i = 0 ; i < 16 ; i = i + 1) + state_reg[i] <= 32'h0; + + data_out_reg <= 512'h0; data_out_valid_reg <= 0; - qr_ctr_reg <= STATE_QR0; + qr_ctr_reg <= QR0; dr_ctr_reg <= 0; - block0_ctr_reg <= 32'h00000000; - block1_ctr_reg <= 32'h00000000; + block0_ctr_reg <= 32'h0; + block1_ctr_reg <= 32'h0; chacha_ctrl_reg <= CTRL_IDLE; + ready_reg <= 1; end else begin - if (sample_params) - begin - key0_reg <= key0_new; - key1_reg <= key1_new; - key2_reg <= key2_new; - key3_reg <= key3_new; - key4_reg <= key4_new; - key5_reg <= key5_new; - key6_reg <= key6_new; - key7_reg <= key7_new; - iv0_reg <= iv0_new; - iv1_reg <= iv1_new; - rounds_reg <= rounds_new; - keylen_reg <= keylen_new; - end - - if (data_in_we) - begin - data_in_reg <= data_in; - end - if (state_we) begin - state0_reg <= state0_new; - state1_reg <= state1_new; - state2_reg <= state2_new; - state3_reg <= state3_new; - state4_reg <= state4_new; - state5_reg <= state5_new; - state6_reg <= state6_new; - state7_reg <= state7_new; - state8_reg <= state8_new; - state9_reg <= state9_new; - state10_reg <= state10_new; - state11_reg <= state11_new; - state12_reg <= state12_new; - state13_reg <= state13_new; - state14_reg <= state14_new; - state15_reg <= state15_new; - end - - if (x0_we) - begin - x0_reg <= x0_new; - end - - if (x1_we) - begin - x1_reg <= x1_new; - end - - if (x2_we) - begin - x2_reg <= x2_new; - end - - if (x3_we) - begin - x3_reg <= x3_new; - end - - if (x4_we) - begin - x4_reg <= x4_new; - end - - if (x5_we) - begin - x5_reg <= x5_new; - end - - if (x6_we) - begin - x6_reg <= x6_new; - end - - if (x7_we) - begin - x7_reg <= x7_new; - end - - if (x8_we) - begin - x8_reg <= x8_new; - end - - if (x9_we) - begin - x9_reg <= x9_new; - end - - if (x10_we) - begin - x10_reg <= x10_new; - end - - if (x11_we) - begin - x11_reg <= x11_new; - end - - if (x12_we) - begin - x12_reg <= x12_new; + for (i = 0 ; i < 16 ; i = i + 1) + state_reg[i] <= state_new[i]; end - if (x13_we) - begin - x13_reg <= x13_new; - end - - if (x14_we) - begin - x14_reg <= x14_new; - end - - if (x15_we) - begin - x15_reg <= x15_new; - end - - if (data_out_we) - begin - data_out_reg <= data_out_new; - end - - if (ready_we) - begin - ready_reg <= ready_new; - end + if (update_output) + data_out_reg <= data_out_new; if (data_out_valid_we) - begin - data_out_valid_reg <= data_out_valid_new; - end + data_out_valid_reg <= data_out_valid_new; if (qr_ctr_we) - begin - qr_ctr_reg <= qr_ctr_new; - end + qr_ctr_reg <= qr_ctr_new; if (dr_ctr_we) - begin - dr_ctr_reg <= dr_ctr_new; - end + dr_ctr_reg <= dr_ctr_new; if (block0_ctr_we) - begin - block0_ctr_reg <= block0_ctr_new; - end + block0_ctr_reg <= block0_ctr_new; if (block1_ctr_we) - begin - block1_ctr_reg <= block1_ctr_new; - end + block1_ctr_reg <= block1_ctr_new; + + if (ready_we) + ready_reg <= ready_new; if (chacha_ctrl_we) - begin - chacha_ctrl_reg <= chacha_ctrl_new; - end + chacha_ctrl_reg <= chacha_ctrl_new; end end // reg_update //---------------------------------------------------------------- - // data_out_logic - // Final output logic that combines the result from procceing - // with the input word. This adds a final layer of XOR gates. + // init_state_logic // - // Note that we also remap all the words into LSB format. + // Calculates the initial state for a given block. //---------------------------------------------------------------- always @* - begin : data_out_logic - reg [31 : 0] msb_block_state0; - reg [31 : 0] msb_block_state1; - reg [31 : 0] msb_block_state2; - reg [31 : 0] msb_block_state3; - reg [31 : 0] msb_block_state4; - reg [31 : 0] msb_block_state5; - reg [31 : 0] msb_block_state6; - reg [31 : 0] msb_block_state7; - reg [31 : 0] msb_block_state8; - reg [31 : 0] msb_block_state9; - reg [31 : 0] msb_block_state10; - reg [31 : 0] msb_block_state11; - reg [31 : 0] msb_block_state12; - reg [31 : 0] msb_block_state13; - reg [31 : 0] msb_block_state14; - reg [31 : 0] msb_block_state15; - - reg [31 : 0] lsb_block_state0; - reg [31 : 0] lsb_block_state1; - reg [31 : 0] lsb_block_state2; - reg [31 : 0] lsb_block_state3; - reg [31 : 0] lsb_block_state4; - reg [31 : 0] lsb_block_state5; - reg [31 : 0] lsb_block_state6; - reg [31 : 0] lsb_block_state7; - reg [31 : 0] lsb_block_state8; - reg [31 : 0] lsb_block_state9; - reg [31 : 0] lsb_block_state10; - reg [31 : 0] lsb_block_state11; - reg [31 : 0] lsb_block_state12; - reg [31 : 0] lsb_block_state13; - reg [31 : 0] lsb_block_state14; - reg [31 : 0] lsb_block_state15; - - reg [511 : 0] lsb_block_state; - - lsb_block_state = 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000; - - data_out_new = 512'h00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000; - data_out_we = 0; - - if (update_output) + begin : init_state_logic + reg [31 : 0] key0; + reg [31 : 0] key1; + reg [31 : 0] key2; + reg [31 : 0] key3; + reg [31 : 0] key4; + reg [31 : 0] key5; + reg [31 : 0] key6; + reg [31 : 0] key7; + + key0 = l2b(key[255 : 224]); + key1 = l2b(key[223 : 192]); + key2 = l2b(key[191 : 160]); + key3 = l2b(key[159 : 128]); + key4 = l2b(key[127 : 96]); + key5 = l2b(key[95 : 64]); + key6 = l2b(key[63 : 32]); + key7 = l2b(key[31 : 0]); + + init_state_word[04] = key0; + init_state_word[05] = key1; + init_state_word[06] = key2; + init_state_word[07] = key3; + init_state_word[12] = block0_ctr_reg; + init_state_word[13] = block1_ctr_reg; + init_state_word[14] = l2b(iv[63 : 32]); + init_state_word[15] = l2b(iv[31 : 0]); + + if (keylen) begin - msb_block_state0 = state0_reg + x0_reg; - msb_block_state1 = state1_reg + x1_reg; - msb_block_state2 = state2_reg + x2_reg; - msb_block_state3 = state3_reg + x3_reg; - msb_block_state4 = state4_reg + x4_reg; - msb_block_state5 = state5_reg + x5_reg; - msb_block_state6 = state6_reg + x6_reg; - msb_block_state7 = state7_reg + x7_reg; - msb_block_state8 = state8_reg + x8_reg; - msb_block_state9 = state9_reg + x9_reg; - msb_block_state10 = state10_reg + x10_reg; - msb_block_state11 = state11_reg + x11_reg; - msb_block_state12 = state12_reg + x12_reg; - msb_block_state13 = state13_reg + x13_reg; - msb_block_state14 = state14_reg + x14_reg; - msb_block_state15 = state15_reg + x15_reg; - - lsb_block_state0 = {msb_block_state0[7 : 0], - msb_block_state0[15 : 8], - msb_block_state0[23 : 16], - msb_block_state0[31 : 24]}; - - lsb_block_state1 = {msb_block_state1[7 : 0], - msb_block_state1[15 : 8], - msb_block_state1[23 : 16], - msb_block_state1[31 : 24]}; - - lsb_block_state2 = {msb_block_state2[7 : 0], - msb_block_state2[15 : 8], - msb_block_state2[23 : 16], - msb_block_state2[31 : 24]}; - - lsb_block_state3 = {msb_block_state3[7 : 0], - msb_block_state3[15 : 8], - msb_block_state3[23 : 16], - msb_block_state3[31 : 24]}; - - lsb_block_state4 = {msb_block_state4[7 : 0], - msb_block_state4[15 : 8], - msb_block_state4[23 : 16], - msb_block_state4[31 : 24]}; - - lsb_block_state5 = {msb_block_state5[7 : 0], - msb_block_state5[15 : 8], - msb_block_state5[23 : 16], - msb_block_state5[31 : 24]}; - - lsb_block_state6 = {msb_block_state6[7 : 0], - msb_block_state6[15 : 8], - msb_block_state6[23 : 16], - msb_block_state6[31 : 24]}; - - lsb_block_state7 = {msb_block_state7[7 : 0], - msb_block_state7[15 : 8], - msb_block_state7[23 : 16], - msb_block_state7[31 : 24]}; - - lsb_block_state8 = {msb_block_state8[7 : 0], - msb_block_state8[15 : 8], - msb_block_state8[23 : 16], - msb_block_state8[31 : 24]}; - - lsb_block_state9 = {msb_block_state9[7 : 0], - msb_block_state9[15 : 8], - msb_block_state9[23 : 16], - msb_block_state9[31 : 24]}; - - lsb_block_state10 = {msb_block_state10[7 : 0], - msb_block_state10[15 : 8], - msb_block_state10[23 : 16], - msb_block_state10[31 : 24]}; - - lsb_block_state11 = {msb_block_state11[7 : 0], - msb_block_state11[15 : 8], - msb_block_state11[23 : 16], - msb_block_state11[31 : 24]}; - - lsb_block_state12 = {msb_block_state12[7 : 0], - msb_block_state12[15 : 8], - msb_block_state12[23 : 16], - msb_block_state12[31 : 24]}; - - lsb_block_state13 = {msb_block_state13[7 : 0], - msb_block_state13[15 : 8], - msb_block_state13[23 : 16], - msb_block_state13[31 : 24]}; - - lsb_block_state14 = {msb_block_state14[7 : 0], - msb_block_state14[15 : 8], - msb_block_state14[23 : 16], - msb_block_state14[31 : 24]}; - - lsb_block_state15 = {msb_block_state15[7 : 0], - msb_block_state15[15 : 8], - msb_block_state15[23 : 16], - msb_block_state15[31 : 24]}; - - lsb_block_state = {lsb_block_state0, lsb_block_state1, - lsb_block_state2, lsb_block_state3, - lsb_block_state4, lsb_block_state5, - lsb_block_state6, lsb_block_state7, - lsb_block_state8, lsb_block_state9, - lsb_block_state10, lsb_block_state11, - lsb_block_state12, lsb_block_state13, - lsb_block_state14, lsb_block_state15}; - - data_out_new = data_in_reg ^ lsb_block_state; - data_out_we = 1; - end // if (update_output) - end // data_out_logic - - - //---------------------------------------------------------------- - // sample_parameters - // Logic (wires) that convert parameter input to appropriate - // format for processing. - //---------------------------------------------------------------- - always @* - begin : sample_parameters - key0_new = 32'h00000000; - key1_new = 32'h00000000; - key2_new = 32'h00000000; - key3_new = 32'h00000000; - key4_new = 32'h00000000; - key5_new = 32'h00000000; - key6_new = 32'h00000000; - key7_new = 32'h00000000; - iv0_new = 32'h00000000; - iv1_new = 32'h00000000; - rounds_new = 4'h0; - keylen_new = 1'b0; - - if (sample_params) + // 256 bit key. + init_state_word[00] = SIGMA0; + init_state_word[01] = SIGMA1; + init_state_word[02] = SIGMA2; + init_state_word[03] = SIGMA3; + init_state_word[08] = key4; + init_state_word[09] = key5; + init_state_word[10] = key6; + init_state_word[11] = key7; + end + else begin - key0_new = {key[231 : 224], key[239 : 232], - key[247 : 240], key[255 : 248]}; - key1_new = {key[199 : 192], key[207 : 200], - key[215 : 208], key[223 : 216]}; - key2_new = {key[167 : 160], key[175 : 168], - key[183 : 176], key[191 : 184]}; - key3_new = {key[135 : 128], key[143 : 136], - key[151 : 144], key[159 : 152]}; - key4_new = {key[103 : 96], key[111 : 104], - key[119 : 112], key[127 : 120]}; - key5_new = {key[71 : 64], key[79 : 72], - key[87 : 80], key[95 : 88]}; - key6_new = {key[39 : 32], key[47 : 40], - key[55 : 48], key[63 : 56]}; - key7_new = {key[7 : 0], key[15 : 8], - key[23 : 16], key[31 : 24]}; - - iv0_new = {iv[39 : 32], iv[47 : 40], - iv[55 : 48], iv[63 : 56]}; - iv1_new = {iv[7 : 0], iv[15 : 8], - iv[23 : 16], iv[31 : 24]}; - - // Div by two since we count double rounds. - rounds_new = rounds[4 : 1]; - - keylen_new = keylen; + // 128 bit key. + init_state_word[00] = TAU0; + init_state_word[01] = TAU1; + init_state_word[02] = TAU2; + init_state_word[03] = TAU3; + init_state_word[08] = key0; + init_state_word[09] = key1; + init_state_word[10] = key2; + init_state_word[11] = key3; end end @@ -815,260 +374,110 @@ module chacha_core( //---------------------------------------------------------------- always @* begin : state_logic - reg [31 : 0] new_state_word0; - reg [31 : 0] new_state_word1; - reg [31 : 0] new_state_word2; - reg [31 : 0] new_state_word3; - reg [31 : 0] new_state_word4; - reg [31 : 0] new_state_word5; - reg [31 : 0] new_state_word6; - reg [31 : 0] new_state_word7; - reg [31 : 0] new_state_word8; - reg [31 : 0] new_state_word9; - reg [31 : 0] new_state_word10; - reg [31 : 0] new_state_word11; - reg [31 : 0] new_state_word12; - reg [31 : 0] new_state_word13; - reg [31 : 0] new_state_word14; - reg [31 : 0] new_state_word15; - - new_state_word0 = 32'h00000000; - new_state_word1 = 32'h00000000; - new_state_word2 = 32'h00000000; - new_state_word3 = 32'h00000000; - new_state_word4 = 32'h00000000; - new_state_word5 = 32'h00000000; - new_state_word6 = 32'h00000000; - new_state_word7 = 32'h00000000; - new_state_word8 = 32'h00000000; - new_state_word9 = 32'h00000000; - new_state_word10 = 32'h00000000; - new_state_word11 = 32'h00000000; - new_state_word12 = 32'h00000000; - new_state_word13 = 32'h00000000; - new_state_word14 = 32'h00000000; - new_state_word15 = 32'h00000000; - - x0_new = 32'h00000000; - x1_new = 32'h00000000; - x2_new = 32'h00000000; - x3_new = 32'h00000000; - x4_new = 32'h00000000; - x5_new = 32'h00000000; - x6_new = 32'h00000000; - x7_new = 32'h00000000; - x8_new = 32'h00000000; - x9_new = 32'h00000000; - x10_new = 32'h00000000; - x11_new = 32'h00000000; - x12_new = 32'h00000000; - x13_new = 32'h00000000; - x14_new = 32'h00000000; - x15_new = 32'h00000000; - x0_we = 0; - x1_we = 0; - x2_we = 0; - x3_we = 0; - x4_we = 0; - x5_we = 0; - x6_we = 0; - x7_we = 0; - x8_we = 0; - x9_we = 0; - x10_we = 0; - x11_we = 0; - x12_we = 0; - x13_we = 0; - x14_we = 0; - x15_we = 0; - - state0_new = 32'h00000000; - state1_new = 32'h00000000; - state2_new = 32'h00000000; - state3_new = 32'h00000000; - state4_new = 32'h00000000; - state5_new = 32'h00000000; - state6_new = 32'h00000000; - state7_new = 32'h00000000; - state8_new = 32'h00000000; - state9_new = 32'h00000000; - state10_new = 32'h00000000; - state11_new = 32'h00000000; - state12_new = 32'h00000000; - state13_new = 32'h00000000; - state14_new = 32'h00000000; - state15_new = 32'h00000000; + integer i; + + for (i = 0 ; i < 16 ; i = i + 1) + state_new[i] = 32'h0; state_we = 0; + qr0_a = 32'h0; + qr0_b = 32'h0; + qr0_c = 32'h0; + qr0_d = 32'h0; + qr1_a = 32'h0; + qr1_b = 32'h0; + qr1_c = 32'h0; + qr1_d = 32'h0; + qr2_a = 32'h0; + qr2_b = 32'h0; + qr2_c = 32'h0; + qr2_d = 32'h0; + qr3_a = 32'h0; + qr3_b = 32'h0; + qr3_c = 32'h0; + qr3_d = 32'h0; + if (init_state) begin - new_state_word4 = key0_reg; - new_state_word5 = key1_reg; - new_state_word6 = key2_reg; - new_state_word7 = key3_reg; - - new_state_word12 = block0_ctr_reg; - new_state_word13 = block1_ctr_reg; - - new_state_word14 = iv0_reg; - new_state_word15 = iv1_reg; - - if (keylen_reg) - begin - // 256 bit key. - new_state_word0 = SIGMA0; - new_state_word1 = SIGMA1; - new_state_word2 = SIGMA2; - new_state_word3 = SIGMA3; - new_state_word8 = key4_reg; - new_state_word9 = key5_reg; - new_state_word10 = key6_reg; - new_state_word11 = key7_reg; - end - else - begin - // 128 bit key. - new_state_word0 = TAU0; - new_state_word1 = TAU1; - new_state_word2 = TAU2; - new_state_word3 = TAU3; - new_state_word8 = key0_reg; - new_state_word9 = key1_reg; - new_state_word10 = key2_reg; - new_state_word11 = key3_reg; - end - - x0_new = new_state_word0; - x1_new = new_state_word1; - x2_new = new_state_word2; - x3_new = new_state_word3; - x4_new = new_state_word4; - x5_new = new_state_word5; - x6_new = new_state_word6; - x7_new = new_state_word7; - x8_new = new_state_word8; - x9_new = new_state_word9; - x10_new = new_state_word10; - x11_new = new_state_word11; - x12_new = new_state_word12; - x13_new = new_state_word13; - x14_new = new_state_word14; - x15_new = new_state_word15; - x0_we = 1; - x1_we = 1; - x2_we = 1; - x3_we = 1; - x4_we = 1; - x5_we = 1; - x6_we = 1; - x7_we = 1; - x8_we = 1; - x9_we = 1; - x10_we = 1; - x11_we = 1; - x12_we = 1; - x13_we = 1; - x14_we = 1; - x15_we = 1; - - state0_new = new_state_word0; - state1_new = new_state_word1; - state2_new = new_state_word2; - state3_new = new_state_word3; - state4_new = new_state_word4; - state5_new = new_state_word5; - state6_new = new_state_word6; - state7_new = new_state_word7; - state8_new = new_state_word8; - state9_new = new_state_word9; - state10_new = new_state_word10; - state11_new = new_state_word11; - state12_new = new_state_word12; - state13_new = new_state_word13; - state14_new = new_state_word14; - state15_new = new_state_word15; - state_we = 1; + for (i = 0 ; i < 16 ; i = i + 1) + state_new[i] = init_state_word[i]; + state_we = 1; end // if (init_state) - else if (update_state) + if (update_state) begin + state_we = 1; case (qr_ctr_reg) - STATE_QR0: + QR0: begin - x0_new = qr0_a_prim; - x4_new = qr0_b_prim; - x8_new = qr0_c_prim; - x12_new = qr0_d_prim; - x0_we = 1; - x4_we = 1; - x8_we = 1; - x12_we = 1; - - x1_new = qr1_a_prim; - x5_new = qr1_b_prim; - x9_new = qr1_c_prim; - x13_new = qr1_d_prim; - x1_we = 1; - x5_we = 1; - x9_we = 1; - x13_we = 1; - - x2_new = qr2_a_prim; - x6_new = qr2_b_prim; - x10_new = qr2_c_prim; - x14_new = qr2_d_prim; - x2_we = 1; - x6_we = 1; - x10_we = 1; - x14_we = 1; - - x3_new = qr3_a_prim; - x7_new = qr3_b_prim; - x11_new = qr3_c_prim; - x15_new = qr3_d_prim; - x3_we = 1; - x7_we = 1; - x11_we = 1; - x15_we = 1; + qr0_a = state_reg[00]; + qr0_b = state_reg[04]; + qr0_c = state_reg[08]; + qr0_d = state_reg[12]; + qr1_a = state_reg[01]; + qr1_b = state_reg[05]; + qr1_c = state_reg[09]; + qr1_d = state_reg[13]; + qr2_a = state_reg[02]; + qr2_b = state_reg[06]; + qr2_c = state_reg[10]; + qr2_d = state_reg[14]; + qr3_a = state_reg[03]; + qr3_b = state_reg[07]; + qr3_c = state_reg[11]; + qr3_d = state_reg[15]; + state_new[00] = qr0_a_prim; + state_new[04] = qr0_b_prim; + state_new[08] = qr0_c_prim; + state_new[12] = qr0_d_prim; + state_new[01] = qr1_a_prim; + state_new[05] = qr1_b_prim; + state_new[09] = qr1_c_prim; + state_new[13] = qr1_d_prim; + state_new[02] = qr2_a_prim; + state_new[06] = qr2_b_prim; + state_new[10] = qr2_c_prim; + state_new[14] = qr2_d_prim; + state_new[03] = qr3_a_prim; + state_new[07] = qr3_b_prim; + state_new[11] = qr3_c_prim; + state_new[15] = qr3_d_prim; end - STATE_QR1: + QR1: begin - x0_new = qr0_a_prim; - x5_new = qr0_b_prim; - x10_new = qr0_c_prim; - x15_new = qr0_d_prim; - x0_we = 1; - x5_we = 1; - x10_we = 1; - x15_we = 1; - - x1_new = qr1_a_prim; - x6_new = qr1_b_prim; - x11_new = qr1_c_prim; - x12_new = qr1_d_prim; - x1_we = 1; - x6_we = 1; - x11_we = 1; - x12_we = 1; - - x2_new = qr2_a_prim; - x7_new = qr2_b_prim; - x8_new = qr2_c_prim; - x13_new = qr2_d_prim; - x2_we = 1; - x7_we = 1; - x8_we = 1; - x13_we = 1; - - x3_new = qr3_a_prim; - x4_new = qr3_b_prim; - x9_new = qr3_c_prim; - x14_new = qr3_d_prim; - x3_we = 1; - x4_we = 1; - x9_we = 1; - x14_we = 1; + qr0_a = state_reg[00]; + qr0_b = state_reg[05]; + qr0_c = state_reg[10]; + qr0_d = state_reg[15]; + qr1_a = state_reg[01]; + qr1_b = state_reg[06]; + qr1_c = state_reg[11]; + qr1_d = state_reg[12]; + qr2_a = state_reg[02]; + qr2_b = state_reg[07]; + qr2_c = state_reg[08]; + qr2_d = state_reg[13]; + qr3_a = state_reg[03]; + qr3_b = state_reg[04]; + qr3_c = state_reg[09]; + qr3_d = state_reg[14]; + state_new[00] = qr0_a_prim; + state_new[05] = qr0_b_prim; + state_new[10] = qr0_c_prim; + state_new[15] = qr0_d_prim; + state_new[01] = qr1_a_prim; + state_new[06] = qr1_b_prim; + state_new[11] = qr1_c_prim; + state_new[12] = qr1_d_prim; + state_new[02] = qr2_a_prim; + state_new[07] = qr2_b_prim; + state_new[08] = qr2_c_prim; + state_new[13] = qr2_d_prim; + state_new[03] = qr3_a_prim; + state_new[04] = qr3_b_prim; + state_new[09] = qr3_c_prim; + state_new[14] = qr3_d_prim; end endcase // case (quarterround_select) end // if (update_state) @@ -1076,59 +485,37 @@ module chacha_core( //---------------------------------------------------------------- - // quarterround_mux - // Quarterround muxes that selects operands for quarterrounds. + // data_out_logic + // Final output logic that combines the result from state + // update with the input block. This adds a 16 rounds and + // a final layer of XOR gates. + // + // Note that we also remap all the words into LSB format. //---------------------------------------------------------------- always @* - begin : quarterround_mux - case (qr_ctr_reg) - STATE_QR0: - begin - qr0_a = x0_reg; - qr0_b = x4_reg; - qr0_c = x8_reg; - qr0_d = x12_reg; - - qr1_a = x1_reg; - qr1_b = x5_reg; - qr1_c = x9_reg; - qr1_d = x13_reg; - - qr2_a = x2_reg; - qr2_b = x6_reg; - qr2_c = x10_reg; - qr2_d = x14_reg; - - qr3_a = x3_reg; - qr3_b = x7_reg; - qr3_c = x11_reg; - qr3_d = x15_reg; - end + begin : data_out_logic + integer i; + reg [31 : 0] msb_block_state [0 : 15]; + reg [31 : 0] lsb_block_state [0 : 15]; + reg [511 : 0] block_state; - STATE_QR1: - begin - qr0_a = x0_reg; - qr0_b = x5_reg; - qr0_c = x10_reg; - qr0_d = x15_reg; - - qr1_a = x1_reg; - qr1_b = x6_reg; - qr1_c = x11_reg; - qr1_d = x12_reg; - - qr2_a = x2_reg; - qr2_b = x7_reg; - qr2_c = x8_reg; - qr2_d = x13_reg; - - qr3_a = x3_reg; - qr3_b = x4_reg; - qr3_c = x9_reg; - qr3_d = x14_reg; - end - endcase // case (quarterround_select) - end // quarterround_mux + for (i = 0 ; i < 16 ; i = i + 1) + begin + msb_block_state[i] = init_state_word[i] + state_reg[i]; + lsb_block_state[i] = l2b(msb_block_state[i][31 : 0]); + end + + block_state = {lsb_block_state[00], lsb_block_state[01], + lsb_block_state[02], lsb_block_state[03], + lsb_block_state[04], lsb_block_state[05], + lsb_block_state[06], lsb_block_state[07], + lsb_block_state[08], lsb_block_state[09], + lsb_block_state[10], lsb_block_state[11], + lsb_block_state[12], lsb_block_state[13], + lsb_block_state[14], lsb_block_state[15]}; + + data_out_new = data_in ^ block_state; + end // data_out_logic //---------------------------------------------------------------- @@ -1186,13 +573,12 @@ module chacha_core( //---------------------------------------------------------------- always @* begin : block_ctr - // Defult assignments - block0_ctr_new = 32'h00000000; - block1_ctr_new = 32'h00000000; + block0_ctr_new = 32'h0; + block1_ctr_new = 32'h0; block0_ctr_we = 0; block1_ctr_we = 0; - if (block_ctr_rst) + if (block_ctr_set) begin block0_ctr_new = ctr[31 : 00]; block1_ctr_new = ctr[63 : 32]; @@ -1223,46 +609,33 @@ module chacha_core( begin : chacha_ctrl_fsm init_state = 0; update_state = 0; - sample_params = 0; update_output = 0; - qr_ctr_inc = 0; qr_ctr_rst = 0; - dr_ctr_inc = 0; dr_ctr_rst = 0; - block_ctr_inc = 0; - block_ctr_rst = 0; - - data_in_we = 0; - + block_ctr_set = 0; ready_new = 0; ready_we = 0; - data_out_valid_new = 0; data_out_valid_we = 0; - chacha_ctrl_new = CTRL_IDLE; chacha_ctrl_we = 0; - case (chacha_ctrl_reg) CTRL_IDLE: begin if (init) begin + block_ctr_set = 1; ready_new = 0; ready_we = 1; - data_in_we = 1; - sample_params = 1; - block_ctr_rst = 1; chacha_ctrl_new = CTRL_INIT; chacha_ctrl_we = 1; end end - CTRL_INIT: begin init_state = 1; @@ -1272,15 +645,14 @@ module chacha_core( chacha_ctrl_we = 1; end - CTRL_ROUNDS: begin update_state = 1; qr_ctr_inc = 1; - if (qr_ctr_reg == STATE_QR1) + if (qr_ctr_reg == QR1) begin dr_ctr_inc = 1; - if (dr_ctr_reg == (rounds_reg - 1)) + if (dr_ctr_reg == (rounds[4 : 1] - 1)) begin chacha_ctrl_new = CTRL_FINALIZE; chacha_ctrl_we = 1; @@ -1288,7 +660,6 @@ module chacha_core( end end - CTRL_FINALIZE: begin ready_new = 1; @@ -1300,7 +671,6 @@ module chacha_core( chacha_ctrl_we = 1; end - CTRL_DONE: begin if (init) @@ -1309,9 +679,7 @@ module chacha_core( ready_we = 1; data_out_valid_new = 0; data_out_valid_we = 1; - data_in_we = 1; - sample_params = 1; - block_ctr_rst = 1; + block_ctr_set = 1; chacha_ctrl_new = CTRL_INIT; chacha_ctrl_we = 1; end @@ -1321,14 +689,12 @@ module chacha_core( ready_we = 1; data_out_valid_new = 0; data_out_valid_we = 1; - data_in_we = 1; block_ctr_inc = 1; chacha_ctrl_new = CTRL_INIT; chacha_ctrl_we = 1; end end - default: begin |