From cd8f45d313fe760d7f71a425bdbb567afac219d1 Mon Sep 17 00:00:00 2001 From: "Pavel V. Shatov" Date: Thu, 28 May 2015 01:51:26 +0400 Subject: Initial version of GOST 34.11-2012 (aka Streebog) hash core --- streebog_hash/streebog_core_adder_s6.v | 152 +++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 streebog_hash/streebog_core_adder_s6.v (limited to 'streebog_hash/streebog_core_adder_s6.v') diff --git a/streebog_hash/streebog_core_adder_s6.v b/streebog_hash/streebog_core_adder_s6.v new file mode 100644 index 0000000..3c254eb --- /dev/null +++ b/streebog_hash/streebog_core_adder_s6.v @@ -0,0 +1,152 @@ +`timescale 1ns / 1ps + +module streebog_core_adder_s6 + ( + clk, + ena, rdy, + x, y, sum + ); + + + // + // Ports + // + input wire clk; // core clock + input wire ena; // start addition flag + output wire rdy; // addition done flag (sum is valid) + input wire [511:0] x; // item x + input wire [511:0] y; // item y + output wire [511:0] sum; // x+y + + + /* + * ISE cannot synthesize adders using fabric that are more than 256 bits wide. Items X and Y are 512-bit wide, so + * Spartan-6 DSP blocks are used to overcome this issue. Every DSP block is configured to add 32 bits at a time, + * so total of 512/32=16 DSP blocks are required to implement addition. Every DSP block is configured to expose + * carry input and output ports. Overflow at 512-bit boundary should be ignored according to the specification, + * that's why only 15 intermediate carry lines are required. + * + * +-------------------+-------------------+- -+-------------------+ + * [X] | 511 : 480 | 479 : 448 | ... | 31 : 0 | + * +------*------------+------*------------+- -+------*------------+ + * | | | + * +------|------------+------|------------+- -+------|------------+ + * [Y] | | 511 : 480 | | 479 : 448 | ... | | 31 : 0 | + * +------|-----*------+------|------------+- -+------|------------+ + * | | | | | | + * | | | | | | + * v v v v v v + * +---+-+---+ +---+-+---+ +---+-+---+ + * | A | | B | | A | | B | | A | | B | + * +---------+ +---+-+---+ +---+-+---+ + * | DSP #15 | | DSP #15 | | DSP #0 | + * |---------| |---------| |---------| + * | Carry | | Carry | | Carry | + * X --<-Out In-<--C[14]--<-Out In-<--C[13]- ... -C[ 0]--<-Out In-<-- 0 + * +---------+ +---------+ +---------+ + * | S | | S | | S | + * +---------+ +---------+ +---------+ + * | | | + * v v v + * +---------*---------+---------*---------+- -+---------*---------+ + * [Z] | 511 : 480 | 479 : 448 | ... | 31 : 0 | + * +-------------------+-------------------+- -+-------------------+ + * + */ + + + // + // Internals + // + wire [511:0] z; // concatenated outputs of adders + wire [14:0] z_carry; // carry lines + reg [511:0] sum_reg; // output register + + assign sum = sum_reg; + + + // + // Shift Register + // + + /* + * This shift register is re-loaded with "walking one" bit pattern whenever enable + * input is active and adder core is ready. The most significant bit [17] acts as a + * ready flag. Lower 16 bits [15:0] control DSP blocks (Clock Enable). Intermediate + * bit [16] is required to compensate for 1-cycle latency of DSP blocks. + * + */ + + reg [17: 0] ce_shreg = {1'b1, 1'b0, 16'h0000}; + + assign rdy = ce_shreg[17]; + + + // + // Shift Register Logic + // + always @(posedge clk) + // + if (! rdy) ce_shreg <= {ce_shreg[16:0], 1'b0}; + else if (ena) ce_shreg <= {1'b0, 1'b0, 16'h0001}; + + + // + // Output Register Logic + // + always @(posedge clk) + // + if (ce_shreg[16] == 1'b1) sum_reg <= z; + + + // + // LSB Adder + // + adder_s6 adder_s6_lsb + ( + .clk (clk), // + .ce (ce_shreg[0]), // clock enable [0] + .a (x[ 31: 0]), // + .b (y[ 31: 0]), // + .s (z[ 31: 0]), // + .c_in (1'b0), // carry input tied to 0 + .c_out (z_carry[0]) // carry[0] to next adder + ); + + + // + // MSB Adder + // + adder_s6 adder_s6_msb + ( + .clk (clk), // + .ce (ce_shreg[15]), // clock enable [15] + .a (x[511:480]), // + .b (y[511:480]), // + .s (z[511:480]), // + .c_in (z_carry[14]), // carry[14] from previous adder + .c_out () // carry output not connected + ); + + + // + // Intermediate Adders + // + genvar i; + generate for (i=1; i<=14; i=i+1) + begin: gen_adder_s6 + adder_s6 adder_s6_int + ( + .clk (clk), // + .ce (ce_shreg[i]), // clock enable [1..14] + .a (x[32*i+31:32*i]), // + .b (y[32*i+31:32*i]), // + .s (z[32*i+31:32*i]), // + .c_in (z_carry[i-1]), // carry[0..13] from previous adder + .c_out (z_carry[i]) // carry[1..14] to next adder + ); + end + endgenerate + + +endmodule -- cgit v1.2.3