1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
`timescale 1ns / 1ps
module streebog_core_adder_s6
(
clk,
ena, rdy,
x, y, sum
);
//
// Ports
//
input wire clk; // core clock
input wire ena; // start addition flag
output wire rdy; // addition done flag (sum is valid)
input wire [511:0] x; // item x
input wire [511:0] y; // item y
output wire [511:0] sum; // x+y
/*
* ISE cannot synthesize adders using fabric that are more than 256 bits wide. Items X and Y are 512-bit wide, so
* Spartan-6 DSP blocks are used to overcome this issue. Every DSP block is configured to add 32 bits at a time,
* so total of 512/32=16 DSP blocks are required to implement addition. Every DSP block is configured to expose
* carry input and output ports. Overflow at 512-bit boundary should be ignored according to the specification,
* that's why only 15 intermediate carry lines are required.
*
* +-------------------+-------------------+- -+-------------------+
* [X] | 511 : 480 | 479 : 448 | ... | 31 : 0 |
* +------*------------+------*------------+- -+------*------------+
* | | |
* +------|------------+------|------------+- -+------|------------+
* [Y] | | 511 : 480 | | 479 : 448 | ... | | 31 : 0 |
* +------|-----*------+------|------------+- -+------|------------+
* | | | | | |
* | | | | | |
* v v v v v v
* +---+-+---+ +---+-+---+ +---+-+---+
* | A | | B | | A | | B | | A | | B |
* +---------+ +---+-+---+ +---+-+---+
* | DSP #15 | | DSP #15 | | DSP #0 |
* |---------| |---------| |---------|
* | Carry | | Carry | | Carry |
* X --<-Out In-<--C[14]--<-Out In-<--C[13]- ... -C[ 0]--<-Out In-<-- 0
* +---------+ +---------+ +---------+
* | S | | S | | S |
* +---------+ +---------+ +---------+
* | | |
* v v v
* +---------*---------+---------*---------+- -+---------*---------+
* [Z] | 511 : 480 | 479 : 448 | ... | 31 : 0 |
* +-------------------+-------------------+- -+-------------------+
*
*/
//
// Internals
//
wire [511:0] z; // concatenated outputs of adders
wire [14:0] z_carry; // carry lines
reg [511:0] sum_reg; // output register
assign sum = sum_reg;
//
// Shift Register
//
/*
* This shift register is re-loaded with "walking one" bit pattern whenever enable
* input is active and adder core is ready. The most significant bit [17] acts as a
* ready flag. Lower 16 bits [15:0] control DSP blocks (Clock Enable). Intermediate
* bit [16] is required to compensate for 1-cycle latency of DSP blocks.
*
*/
reg [17: 0] ce_shreg = {1'b1, 1'b0, 16'h0000};
assign rdy = ce_shreg[17];
//
// Shift Register Logic
//
always @(posedge clk)
//
if (! rdy) ce_shreg <= {ce_shreg[16:0], 1'b0};
else if (ena) ce_shreg <= {1'b0, 1'b0, 16'h0001};
//
// Output Register Logic
//
always @(posedge clk)
//
if (ce_shreg[16] == 1'b1) sum_reg <= z;
//
// LSB Adder
//
adder_s6 adder_s6_lsb
(
.clk (clk), //
.ce (ce_shreg[0]), // clock enable [0]
.a (x[ 31: 0]), //
.b (y[ 31: 0]), //
.s (z[ 31: 0]), //
.c_in (1'b0), // carry input tied to 0
.c_out (z_carry[0]) // carry[0] to next adder
);
//
// MSB Adder
//
adder_s6 adder_s6_msb
(
.clk (clk), //
.ce (ce_shreg[15]), // clock enable [15]
.a (x[511:480]), //
.b (y[511:480]), //
.s (z[511:480]), //
.c_in (z_carry[14]), // carry[14] from previous adder
.c_out () // carry output not connected
);
//
// Intermediate Adders
//
genvar i;
generate for (i=1; i<=14; i=i+1)
begin: gen_adder_s6
adder_s6 adder_s6_int
(
.clk (clk), //
.ce (ce_shreg[i]), // clock enable [1..14]
.a (x[32*i+31:32*i]), //
.b (y[32*i+31:32*i]), //
.s (z[32*i+31:32*i]), //
.c_in (z_carry[i-1]), // carry[0..13] from previous adder
.c_out (z_carry[i]) // carry[1..14] to next adder
);
end
endgenerate
endmodule
|