From c32c11f9c72cf629a30a77afec3c2b74acaa096c Mon Sep 17 00:00:00 2001
From: "Pavel V. Shatov (Meister)" <meisterpaul1@yandex.ru>
Date: Tue, 21 Jan 2020 15:23:25 +0300
Subject: New FMC arbiter. FMC bus now runs at 45 MHz, while the system clock
 is 90 MHz, so the arbiter now does primitive synchronous clock domain
 crossing.

---
 src/rtl/fmc_arbiter.v | 401 ++++++++++++++++++++++++++++----------------------
 1 file changed, 228 insertions(+), 173 deletions(-)

diff --git a/src/rtl/fmc_arbiter.v b/src/rtl/fmc_arbiter.v
index 037d640..376e85b 100644
--- a/src/rtl/fmc_arbiter.v
+++ b/src/rtl/fmc_arbiter.v
@@ -7,7 +7,7 @@
 //
 //
 // Author: Pavel Shatov
-// Copyright (c) 2015, 2018 NORDUnet A/S All rights reserved.
+// Copyright (c) 2015, 2018-2019 NORDUnet A/S All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -38,174 +38,208 @@
 //======================================================================
 
 module fmc_arbiter
-  (
-   // fmc bus
-   fmc_a, fmc_d,
-   fmc_ne1, fmc_nl, fmc_nwe, fmc_noe, fmc_nwait,
-
-   // system clock
-   sys_clk,
-
-   // user bus
-   sys_addr,
-   sys_wr_en,
-   sys_data_out,
-   sys_rd_en,
-   sys_data_in
-   );
-
-
-   //
-   // Parameters
-   //
-   parameter NUM_ADDR_BITS = 22;
-
-
-   //
-   // Ports
-   //
-   input        wire [NUM_ADDR_BITS-1:0] fmc_a;
-   inout        wire [             31:0] fmc_d;
-   input        wire                     fmc_ne1;
-   input        wire                     fmc_nl;
-   input        wire                     fmc_nwe;
-   input        wire                     fmc_noe;
-   output       wire                     fmc_nwait;
-
-   input        wire                     sys_clk;
-
-   output       wire [NUM_ADDR_BITS-1:0] sys_addr;
-   output       wire                     sys_wr_en;
-   output       wire [             31:0] sys_data_out;
-   output       wire                     sys_rd_en;
-   input        wire [             31:0] sys_data_in;
-
-
-   //
-   // Data Bus PHY
-   //
-
-   /* PHY is needed to control bi-directional data bus. */
-
-   wire [31: 0] fmc_d_ro; // value read from pins (receiver output)
-   wire [31: 0] fmc_d_di; // value drives onto pins (driver input)
-
-   fmc_d_phy #
-     (
-      .BUS_WIDTH(32)
-      )
-   d_phy
-     (
-      .buf_io(fmc_d),          // <-- connect directly to top-level bi-dir port
-      .buf_di(fmc_d_di),
-      .buf_ro(fmc_d_ro),
-      .buf_t(fmc_noe)          // <-- bus direction is controlled by STM32
-      );
+(
+    // fmc bus
+    fmc_a, fmc_d,
+    fmc_ne1, fmc_nl, fmc_nwe, fmc_noe, fmc_nwait,
+
+    // system clock, i/o clock
+    sys_clk,
+    io_clk,
+
+    // user bus
+    sys_addr,
+    sys_wr_en,
+    sys_data_out,
+    sys_rd_en,
+    sys_data_in
+);
+
+
+    //
+    // Parameters
+    //
+    parameter NUM_ADDR_BITS = 24;
+
+
+    //
+    // Ports
+    //
+    input        wire [NUM_ADDR_BITS-1:0] fmc_a;
+    inout        wire [             31:0] fmc_d;
+    input        wire                     fmc_ne1;
+    input        wire                     fmc_nl;
+    input        wire                     fmc_nwe;
+    input        wire                     fmc_noe;
+    output       wire                     fmc_nwait;
+
+    input        wire                     sys_clk;
+    input        wire                     io_clk;
+
+    output       wire [NUM_ADDR_BITS-1:0] sys_addr;
+    output       wire                     sys_wr_en;
+    output       wire [             31:0] sys_data_out;
+    output       wire                     sys_rd_en;
+    input        wire [             31:0] sys_data_in;
+
+
+    //
+    // Data Bus PHY
+    //
+
+    /* PHY is needed to control bi-directional data bus. */
+
+    wire [31: 0] fmc_d_ro; // value read from pins (receiver output)
+    wire [31: 0] fmc_d_di; // value drives onto pins (driver input)
+
+    fmc_d_phy #
+    (
+        .BUS_WIDTH(32)
+    )
+    d_phy
+    (
+        //.buf_io (fmc_d_swapped), // <-- connect directly to top-level bi-dir port
+        
+        // we just swap the order of wires in the bi-directional data bus
+        .buf_io({fmc_d[7:0], fmc_d[15:8], fmc_d[23:16], fmc_d[31:24]}),
+
+        .buf_di (fmc_d_di),      // driver input (from FPGA to STM32)
+        .buf_ro (fmc_d_ro),      // receiver output (from STM32 to FPGA)
+        .buf_t  (fmc_noe)        // <-- bus direction is controlled by STM32
+    );
+
+
+    //
+    // CDC Helper Signals
+    //
+    reg cdc_slow_ff = 1'b0;
+    reg cdc_fast_ff = 1'b0;
+
+    always @(posedge io_clk)  cdc_slow_ff <= ~cdc_slow_ff;
+    always @(posedge sys_clk) cdc_fast_ff <= cdc_slow_ff;
+
+    reg cdc_same_edges;
+
+    always @(posedge sys_clk)
+        cdc_same_edges <= cdc_slow_ff ^ cdc_fast_ff;
+
 
     //
-    // Two-Stage Synchronizer
+    // Synchronizer
     //
-    (* SHREG_EXTRACT="NO" *) (* IOB="FALSE" *) reg [23: 0] fmc_a_ff1;
-    (* SHREG_EXTRACT="NO" *)                   reg [23: 0] fmc_a_ff2;
+    (* SHREG_EXTRACT="NO" *) (* IOB="FALSE" *) reg [NUM_ADDR_BITS-1:0] fmc_a_ff1;
+    (* SHREG_EXTRACT="NO" *)                   reg [NUM_ADDR_BITS-1:0] fmc_a_ff2;
+    (* SHREG_EXTRACT="NO" *)                   reg [NUM_ADDR_BITS-1:0] fmc_a_ff3;
 
-    (* SHREG_EXTRACT="NO" *) (* IOB="TRUE" *)  reg [31: 0] fmc_d_ro_ff1;
-    (* SHREG_EXTRACT="NO" *)                   reg [31: 0] fmc_d_ro_ff2;
+    (* SHREG_EXTRACT="NO" *) (* IOB="TRUE" *)  reg [           32-1:0] fmc_d_ro_ff1;
+    (* SHREG_EXTRACT="NO" *)                   reg [           32-1:0] fmc_d_ro_ff2;
+    (* SHREG_EXTRACT="NO" *)                   reg [           32-1:0] fmc_d_ro_ff3;
 
     (* SHREG_EXTRACT="NO" *) (* IOB="FALSE" *) reg fmc_ne1_ff1;
     (* SHREG_EXTRACT="NO" *)                   reg fmc_ne1_ff2;
+    (* SHREG_EXTRACT="NO" *)                   reg fmc_ne1_ff3;
 
     (* SHREG_EXTRACT="NO" *) (* IOB="FALSE" *) reg fmc_nwe_ff1;
     (* SHREG_EXTRACT="NO" *)                   reg fmc_nwe_ff2;
+    (* SHREG_EXTRACT="NO" *)                   reg fmc_nwe_ff3;
 
     (* SHREG_EXTRACT="NO" *) (* IOB="FALSE" *) reg fmc_nl_ff1;
     (* SHREG_EXTRACT="NO" *)                   reg fmc_nl_ff2;
+    (* SHREG_EXTRACT="NO" *)                   reg fmc_nl_ff3;
+
+    wire [NUM_ADDR_BITS-1:0] fmc_a_sync_sys    = fmc_a_ff3;
+    wire [           32-1:0] fmc_d_ro_sync_sys = fmc_d_ro_ff3;
+    wire                     fmc_ne1_sync_sys  = fmc_ne1_ff3;
+    wire                     fmc_nwe_sync_sys  = fmc_nwe_ff3;
+    wire                     fmc_nl_sync_sys   = fmc_nl_ff3;
+
+    always @(posedge io_clk) begin
+        //
+        {fmc_a_ff2,    fmc_a_ff1}    <= {fmc_a_ff1,    fmc_a};
+        {fmc_d_ro_ff2, fmc_d_ro_ff1} <= {fmc_d_ro_ff1, fmc_d_ro};		
+        {fmc_ne1_ff2,  fmc_ne1_ff1}  <= {fmc_ne1_ff1,  fmc_ne1};
+        {fmc_nwe_ff2,  fmc_nwe_ff1}  <= {fmc_nwe_ff1,  fmc_nwe};
+        {fmc_nl_ff2,   fmc_nl_ff1}   <= {fmc_nl_ff1,   fmc_nl};
+        //
+    end
+
+    always @(posedge sys_clk)
+        //
+        if (cdc_same_edges) begin
+            fmc_a_ff3    <= fmc_a_ff2;
+            fmc_d_ro_ff3 <= fmc_d_ro_ff2;
+            fmc_ne1_ff3  <= fmc_ne1_ff2;
+            fmc_nwe_ff3  <= fmc_nwe_ff2;
+            fmc_nl_ff3   <= fmc_nl_ff2;
+        end
+
+
+    //
+    // FSM
+    //
+    localparam FSM_STATE_IDLE            = 4'd00;
 
-    wire [23: 0] fmc_a_sync    = fmc_a_ff2;
-    wire [31: 0] fmc_d_ro_sync = fmc_d_ro_ff2;
-    wire         fmc_ne1_sync  = fmc_ne1_ff2;
-    wire         fmc_nwe_sync  = fmc_nwe_ff2;
-    wire         fmc_nl_sync   = fmc_nl_ff2;
-		
-	always @(posedge sys_clk) begin
-		fmc_a_ff1    <= fmc_a;
-		fmc_a_ff2    <= fmc_a_ff1;
-		
-		fmc_d_ro_ff1 <= fmc_d_ro;
-		fmc_d_ro_ff2 <= fmc_d_ro_ff1;
-		
-		fmc_ne1_ff1  <= fmc_ne1;
-		fmc_ne1_ff2  <= fmc_ne1_ff1;
-				
-		fmc_nwe_ff1  <= fmc_nwe;
-		fmc_nwe_ff2  <= fmc_nwe_ff1;
-		
-		fmc_nl_ff1   <= fmc_nl;
-		fmc_nl_ff2   <= fmc_nl_ff1;
-	end
-
-
-		//
-		// FSM
-		//
-	localparam	FSM_STATE_IDLE              = 4'd0;
-	
-	localparam	FSM_STATE_WRITE_START       = 4'd1;
-	localparam	FSM_STATE_WRITE_LATENCY_1   = 4'd2;
-	localparam	FSM_STATE_WRITE_LATENCY_2   = 4'd3;
-	localparam	FSM_STATE_WRITE_LATENCY_3   = 4'd4;
-	localparam	FSM_STATE_WRITE_LATENCY_4   = 4'd5;
-	localparam	FSM_STATE_WRITE_STOP        = 4'd6;
-	
-	localparam	FSM_STATE_READ_START        = 4'd7;
-	localparam	FSM_STATE_READ_LATENCY_1    = 4'd8;
-	localparam	FSM_STATE_READ_STOP         = 4'd9;
-
-	reg	[ 3: 0]	fsm_state = FSM_STATE_IDLE;
-	reg	[ 3: 0]	fsm_state_next;
-	
-	always @(posedge sys_clk)
-		//
-		fsm_state <= fsm_state_next;
-
-	
-		//
-		// FSM Transition Logic
-		//
-	always @*
-		//
-		if (fmc_ne1_sync)               fsm_state_next = FSM_STATE_IDLE;
-		else case (fsm_state)
-			FSM_STATE_IDLE:             fsm_state_next = !fmc_nwe_sync ? FSM_STATE_WRITE_START : FSM_STATE_READ_START;
-			FSM_STATE_WRITE_START:      fsm_state_next = FSM_STATE_WRITE_LATENCY_1;
-			FSM_STATE_WRITE_LATENCY_1:  fsm_state_next = FSM_STATE_WRITE_LATENCY_2;
-			FSM_STATE_WRITE_LATENCY_2:  fsm_state_next = FSM_STATE_WRITE_LATENCY_3;
-			FSM_STATE_WRITE_LATENCY_3:  fsm_state_next = FSM_STATE_WRITE_LATENCY_4;
-			FSM_STATE_WRITE_LATENCY_4,
-			FSM_STATE_WRITE_STOP:       fsm_state_next = FSM_STATE_WRITE_STOP;
-			FSM_STATE_READ_START:       fsm_state_next = FSM_STATE_READ_LATENCY_1;
-			FSM_STATE_READ_LATENCY_1,
-			FSM_STATE_READ_STOP:        fsm_state_next = FSM_STATE_READ_STOP;
-			default:                    fsm_state_next = FSM_STATE_IDLE;
-		endcase
+    localparam FSM_STATE_WRITE_START     = 4'd01;
+    localparam FSM_STATE_WRITE_LATENCY_1 = 4'd02;
+    localparam FSM_STATE_WRITE_LATENCY_2 = 4'd03;
+    localparam FSM_STATE_WRITE_LATENCY_3 = 4'd04;
+    localparam FSM_STATE_WRITE_LATENCY_4 = 4'd05;
+    localparam FSM_STATE_WRITE_LATENCY_5 = 4'd06;
+    localparam FSM_STATE_WRITE_LATENCY_6 = 4'd07;
+    localparam FSM_STATE_WRITE_STOP      = 4'd08;
 
+    localparam FSM_STATE_READ_START      = 4'd09;
+    localparam FSM_STATE_READ_LATENCY_1  = 4'd10;
+    localparam FSM_STATE_READ_LATENCY_2  = 4'd11;
+    localparam FSM_STATE_READ_LATENCY_3  = 4'd12;
+    localparam FSM_STATE_READ_STOP       = 4'd13;
+
+    reg	[ 3: 0]	fsm_state = FSM_STATE_IDLE;
+    reg	[ 3: 0]	fsm_state_next;
+
+    always @(posedge sys_clk)
+        //
+        fsm_state <= fsm_state_next;
+
+
+    //
+    // FSM Transition Logic
+    //
+    always @*
+        //
+        if (!cdc_same_edges)                fsm_state_next = fsm_state;
+        else begin
+            //
+            if (fmc_ne1_sync_sys)           fsm_state_next = FSM_STATE_IDLE;
+            else case (fsm_state)
+                FSM_STATE_IDLE:             fsm_state_next = !fmc_nwe_sync_sys ? FSM_STATE_WRITE_START : FSM_STATE_READ_START;
+                FSM_STATE_WRITE_START:      fsm_state_next = FSM_STATE_WRITE_LATENCY_1;
+                FSM_STATE_WRITE_LATENCY_1:  fsm_state_next = FSM_STATE_WRITE_LATENCY_2;
+                FSM_STATE_WRITE_LATENCY_2:  fsm_state_next = FSM_STATE_WRITE_LATENCY_3;
+                FSM_STATE_WRITE_LATENCY_3:  fsm_state_next = FSM_STATE_WRITE_LATENCY_4;
+                FSM_STATE_WRITE_LATENCY_4:  fsm_state_next = FSM_STATE_WRITE_LATENCY_5;
+                FSM_STATE_WRITE_LATENCY_5:  fsm_state_next = FSM_STATE_WRITE_LATENCY_6;
+                FSM_STATE_WRITE_LATENCY_6,
+                FSM_STATE_WRITE_STOP:       fsm_state_next = FSM_STATE_WRITE_STOP;
+                FSM_STATE_READ_START:       fsm_state_next = FSM_STATE_READ_LATENCY_1;
+                FSM_STATE_READ_LATENCY_1:   fsm_state_next = FSM_STATE_READ_LATENCY_2;
+                FSM_STATE_READ_LATENCY_2:   fsm_state_next = FSM_STATE_READ_LATENCY_3;
+                FSM_STATE_READ_LATENCY_3,
+                FSM_STATE_READ_STOP:        fsm_state_next = FSM_STATE_READ_STOP;
+                default:                    fsm_state_next = FSM_STATE_IDLE;
+            endcase
+            //
+        end
+        
 
     //
     // Output Data Latch
     //
+    reg	[31:0] sys_data_in_latch_fast;
     (* IOB="TRUE" *)
-	reg	[31:0] sys_data_in_latch;
+    reg	[31:0] sys_data_in_latch_slow;
     
-	assign fmc_d_di = sys_data_in_latch;
-	
-    
-    //
-    // Address Latch
-    //
-	always @(posedge sys_clk)
-		//
-		if (!fmc_ne1_sync && !fmc_nl_sync)
-			//
-			sys_addr_reg <= fmc_a_sync;
+    assign fmc_d_di = sys_data_in_latch_slow;
 
 
     //
@@ -222,52 +256,73 @@ module fmc_arbiter
     assign sys_rd_en    = sys_rd_en_reg;
 
 
+    //
+    // Address Latch
+    //
+    always @(posedge sys_clk)
+        //
+        if (!fmc_ne1_sync_sys && !fmc_nl_sync_sys && cdc_same_edges)
+            sys_addr_reg <= fmc_a_sync_sys;
+
+
     //
     // Write Enable Logic
     //
-	always @(posedge sys_clk)
-		//
-		case (fsm_state)
-			FSM_STATE_WRITE_LATENCY_4:  sys_wr_en_reg <= 1'b1;
-			default:                    sys_wr_en_reg <= 1'b0;
-		endcase
+    always @(posedge sys_clk)
+        //
+        case (fsm_state)
+            FSM_STATE_WRITE_LATENCY_6:  sys_wr_en_reg <= ~cdc_same_edges;
+            default:                    sys_wr_en_reg <= 1'b0;
+        endcase
+
 
-    
     //
     // Read Enable Logic
     //
-	always @(posedge sys_clk)
-		//
-		case (fsm_state_next)
-			FSM_STATE_READ_START:   sys_rd_en_reg <= 1'b1;
-			default:                sys_rd_en_reg <= 1'b0;
-		endcase
+    always @(posedge sys_clk)
+        //
+        case (fsm_state_next)
+            FSM_STATE_READ_START:   sys_rd_en_reg <= cdc_same_edges;
+            default:                sys_rd_en_reg <= 1'b0;
+        endcase
 
 
     //
-    // Output Data Latch
+    // Output Data Latches
     //
-	always @(posedge sys_clk)
-		//
-		case (fsm_state)
-			FSM_STATE_READ_LATENCY_1:   sys_data_in_latch <= sys_data_in;
-		endcase
+    always @(posedge sys_clk)
+        //
+        case (fsm_state)
+            FSM_STATE_READ_LATENCY_2:
+                if (cdc_same_edges)
+                    sys_data_in_latch_fast <= sys_data_in; 
+        endcase
+        
+    always @(negedge io_clk)
+        //
+        case (fsm_state)
+            FSM_STATE_READ_LATENCY_3:
+                sys_data_in_latch_slow <= sys_data_in_latch_fast; 
+        endcase
+        
 
 
     //
     // Input Data Latch
     //
-	always @(posedge sys_clk)
-		//
-		case (fsm_state)
-			FSM_STATE_WRITE_LATENCY_4:  sys_data_out_reg <= fmc_d_ro_sync;
-		endcase
+    always @(posedge sys_clk)
+        //
+        case (fsm_state)
+            FSM_STATE_WRITE_LATENCY_6:
+                if (!cdc_same_edges)
+                    sys_data_out_reg <= fmc_d_ro_sync_sys;
+        endcase
 
 
     //
     // Unused NWAIT tieoff
     //
-	assign fmc_nwait = 1'b0;
+    assign fmc_nwait = 1'b0;
 
 
 endmodule
-- 
cgit v1.2.3