summaryrefslogtreecommitdiff
path: root/streebog_hash/streebog_core_adder_s6.v
diff options
context:
space:
mode:
Diffstat (limited to 'streebog_hash/streebog_core_adder_s6.v')
-rw-r--r--streebog_hash/streebog_core_adder_s6.v152
1 files changed, 152 insertions, 0 deletions
diff --git a/streebog_hash/streebog_core_adder_s6.v b/streebog_hash/streebog_core_adder_s6.v
new file mode 100644
index 0000000..3c254eb
--- /dev/null
+++ b/streebog_hash/streebog_core_adder_s6.v
@@ -0,0 +1,152 @@
+`timescale 1ns / 1ps
+
+module streebog_core_adder_s6
+ (
+ clk,
+ ena, rdy,
+ x, y, sum
+ );
+
+
+ //
+ // Ports
+ //
+ input wire clk; // core clock
+ input wire ena; // start addition flag
+ output wire rdy; // addition done flag (sum is valid)
+ input wire [511:0] x; // item x
+ input wire [511:0] y; // item y
+ output wire [511:0] sum; // x+y
+
+
+ /*
+ * ISE cannot synthesize adders using fabric that are more than 256 bits wide. Items X and Y are 512-bit wide, so
+ * Spartan-6 DSP blocks are used to overcome this issue. Every DSP block is configured to add 32 bits at a time,
+ * so total of 512/32=16 DSP blocks are required to implement addition. Every DSP block is configured to expose
+ * carry input and output ports. Overflow at 512-bit boundary should be ignored according to the specification,
+ * that's why only 15 intermediate carry lines are required.
+ *
+ * +-------------------+-------------------+- -+-------------------+
+ * [X] | 511 : 480 | 479 : 448 | ... | 31 : 0 |
+ * +------*------------+------*------------+- -+------*------------+
+ * | | |
+ * +------|------------+------|------------+- -+------|------------+
+ * [Y] | | 511 : 480 | | 479 : 448 | ... | | 31 : 0 |
+ * +------|-----*------+------|------------+- -+------|------------+
+ * | | | | | |
+ * | | | | | |
+ * v v v v v v
+ * +---+-+---+ +---+-+---+ +---+-+---+
+ * | A | | B | | A | | B | | A | | B |
+ * +---------+ +---+-+---+ +---+-+---+
+ * | DSP #15 | | DSP #15 | | DSP #0 |
+ * |---------| |---------| |---------|
+ * | Carry | | Carry | | Carry |
+ * X --<-Out In-<--C[14]--<-Out In-<--C[13]- ... -C[ 0]--<-Out In-<-- 0
+ * +---------+ +---------+ +---------+
+ * | S | | S | | S |
+ * +---------+ +---------+ +---------+
+ * | | |
+ * v v v
+ * +---------*---------+---------*---------+- -+---------*---------+
+ * [Z] | 511 : 480 | 479 : 448 | ... | 31 : 0 |
+ * +-------------------+-------------------+- -+-------------------+
+ *
+ */
+
+
+ //
+ // Internals
+ //
+ wire [511:0] z; // concatenated outputs of adders
+ wire [14:0] z_carry; // carry lines
+ reg [511:0] sum_reg; // output register
+
+ assign sum = sum_reg;
+
+
+ //
+ // Shift Register
+ //
+
+ /*
+ * This shift register is re-loaded with "walking one" bit pattern whenever enable
+ * input is active and adder core is ready. The most significant bit [17] acts as a
+ * ready flag. Lower 16 bits [15:0] control DSP blocks (Clock Enable). Intermediate
+ * bit [16] is required to compensate for 1-cycle latency of DSP blocks.
+ *
+ */
+
+ reg [17: 0] ce_shreg = {1'b1, 1'b0, 16'h0000};
+
+ assign rdy = ce_shreg[17];
+
+
+ //
+ // Shift Register Logic
+ //
+ always @(posedge clk)
+ //
+ if (! rdy) ce_shreg <= {ce_shreg[16:0], 1'b0};
+ else if (ena) ce_shreg <= {1'b0, 1'b0, 16'h0001};
+
+
+ //
+ // Output Register Logic
+ //
+ always @(posedge clk)
+ //
+ if (ce_shreg[16] == 1'b1) sum_reg <= z;
+
+
+ //
+ // LSB Adder
+ //
+ adder_s6 adder_s6_lsb
+ (
+ .clk (clk), //
+ .ce (ce_shreg[0]), // clock enable [0]
+ .a (x[ 31: 0]), //
+ .b (y[ 31: 0]), //
+ .s (z[ 31: 0]), //
+ .c_in (1'b0), // carry input tied to 0
+ .c_out (z_carry[0]) // carry[0] to next adder
+ );
+
+
+ //
+ // MSB Adder
+ //
+ adder_s6 adder_s6_msb
+ (
+ .clk (clk), //
+ .ce (ce_shreg[15]), // clock enable [15]
+ .a (x[511:480]), //
+ .b (y[511:480]), //
+ .s (z[511:480]), //
+ .c_in (z_carry[14]), // carry[14] from previous adder
+ .c_out () // carry output not connected
+ );
+
+
+ //
+ // Intermediate Adders
+ //
+ genvar i;
+ generate for (i=1; i<=14; i=i+1)
+ begin: gen_adder_s6
+ adder_s6 adder_s6_int
+ (
+ .clk (clk), //
+ .ce (ce_shreg[i]), // clock enable [1..14]
+ .a (x[32*i+31:32*i]), //
+ .b (y[32*i+31:32*i]), //
+ .s (z[32*i+31:32*i]), //
+ .c_in (z_carry[i-1]), // carry[0..13] from previous adder
+ .c_out (z_carry[i]) // carry[1..14] to next adder
+ );
+ end
+ endgenerate
+
+
+endmodule