Skip to content

Commit

Permalink
HyperRAM: Improve stability
Browse files Browse the repository at this point in the history
Use proper CDC
  • Loading branch information
MJoergen committed Feb 17, 2024
1 parent 54bead0 commit 8044fdc
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 99 deletions.
25 changes: 11 additions & 14 deletions M2M/common.xdc
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,23 @@
create_clock -period 10.000 -name clk [get_ports {clk_i}]

## Name Autogenerated Clocks
create_generated_clock -name qnice_clk [get_pins i_framework/i_clk_m2m/i_clk_qnice/CLKOUT0]
create_generated_clock -name hr_clk_x1 [get_pins i_framework/i_clk_m2m/i_clk_qnice/CLKOUT1]
create_generated_clock -name qnice_clk [get_pins i_framework/i_clk_m2m/i_clk_qnice/CLKOUT0]
create_generated_clock -name hr_clk_x1 [get_pins i_framework/i_clk_m2m/i_clk_qnice/CLKOUT1]
create_generated_clock -name hr_delay_refclk [get_pins i_framework/i_clk_m2m/i_clk_qnice/CLKOUT2]
create_generated_clock -name hr_clk_x1_del [get_pins i_framework/i_clk_m2m/i_clk_qnice/CLKOUT3]
create_generated_clock -name audio_clk [get_pins i_framework/i_clk_m2m/i_clk_audio/CLKOUT0]
create_generated_clock -name tmds_clk [get_pins i_framework/i_video_out_clock/MMCM/CLKOUT0]
create_generated_clock -name hdmi_clk [get_pins i_framework/i_video_out_clock/MMCM/CLKOUT1]
create_generated_clock -name audio_clk [get_pins i_framework/i_clk_m2m/i_clk_audio/CLKOUT0]
create_generated_clock -name tmds_clk [get_pins i_framework/i_video_out_clock/MMCM/CLKOUT0]
create_generated_clock -name hdmi_clk [get_pins i_framework/i_video_out_clock/MMCM/CLKOUT1]

# HyperRAM output clock relative to delayed clock
create_generated_clock -name hr_ck [get_ports hr_clk_p_o] \
create_generated_clock -name hr_ck [get_ports hr_clk_p_o] \
-source [get_pins i_framework/i_clk_m2m/i_clk_qnice/CLKOUT3] -multiply_by 1

# HyperRAM RWDS as a clock for the read path (hr_dq -> IDDR -> XPM FIFO)
create_clock -period 10.000 -name hr_rwds -waveform {2.5 7.5} [get_ports hr_rwds_io]

# Asynchronous clock groups
set_clock_groups -name cg_async -asynchronous \
-group [get_clocks -include_generated_clocks clk] \
-group [get_clocks hr_rwds]

set_property IOB TRUE [get_cells i_framework/i_hyperram/i_hyperram_io/b_output.hr_rwds_oe_n_reg ]
set_property IOB TRUE [get_cells i_framework/i_hyperram/i_hyperram_io/b_output.hr_dq_oe_n_reg[*] ]

# Asynchronous clocks
set_false_path -from [get_ports hr_rwds_io] -to [get_clocks hr_ck]

################################################################################
# HyperRAM timing (correct for IS66WVH8M8DBLL-100B1LI)
Expand All @@ -47,6 +41,9 @@ set tDSHmin -0.8 ; # RWDS to data invalid, min
################################################################################
# FPGA to HyperRAM (address and write data)

set_property IOB TRUE [get_cells i_framework/i_hyperram/i_hyperram_io/output_block.hr_rwds_oe_n_reg ]
set_property IOB TRUE [get_cells i_framework/i_hyperram/i_hyperram_io/output_block.hr_dq_oe_n_reg[*] ]

# setup
set_output_delay -max $HR_tIS -clock hr_ck [get_ports {hr_rwds_io hr_d_io[*]}]
set_output_delay -max $HR_tIS -clock hr_ck [get_ports {hr_rwds_io hr_d_io[*]}] -clock_fall -add_delay
Expand Down
186 changes: 101 additions & 85 deletions M2M/vhdl/controllers/hyperram/hyperram_io.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -5,62 +5,63 @@
-- Created by Michael Jørgensen in 2023 (mjoergen.github.io/HyperRAM).

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

library unisim;
use unisim.vcomponents.all;
use unisim.vcomponents.all;

library xpm;
use xpm.vcomponents.all;
use xpm.vcomponents.all;

-- This is the HyperRAM I/O connections

entity hyperram_io is
port (
clk_x1_i : in std_logic;
clk_x1_del_i : in std_logic; -- phase shifted.
delay_refclk_i : in std_logic; -- 200 MHz
rst_i : in std_logic;
clk_x1_i : in std_logic;
clk_x1_del_i : in std_logic; -- phase shifted.
delay_refclk_i : in std_logic; -- 200 MHz
rst_i : in std_logic;

-- Connect to HyperRAM controller
ctrl_rstn_i : in std_logic;
ctrl_ck_ddr_i : in std_logic_vector(1 downto 0);
ctrl_csn_i : in std_logic;
ctrl_dq_ddr_in_o : out std_logic_vector(15 downto 0);
ctrl_dq_ddr_out_i : in std_logic_vector(15 downto 0);
ctrl_dq_oe_i : in std_logic;
ctrl_dq_ie_o : out std_logic;
ctrl_rwds_ddr_out_i : in std_logic_vector(1 downto 0);
ctrl_rwds_oe_i : in std_logic;
ctrl_rwds_in_o : out std_logic;
ctrl_rstn_i : in std_logic;
ctrl_ck_ddr_i : in std_logic_vector(1 downto 0);
ctrl_csn_i : in std_logic;
ctrl_dq_ddr_in_o : out std_logic_vector(15 downto 0);
ctrl_dq_ddr_out_i : in std_logic_vector(15 downto 0);
ctrl_dq_oe_i : in std_logic;
ctrl_dq_ie_o : out std_logic;
ctrl_rwds_ddr_out_i : in std_logic_vector(1 downto 0);
ctrl_rwds_oe_i : in std_logic;
ctrl_rwds_in_o : out std_logic;

-- Connect to HyperRAM device
hr_resetn_o : out std_logic;
hr_csn_o : out std_logic;
hr_ck_o : out std_logic;
hr_rwds_in_i : in std_logic;
hr_dq_in_i : in std_logic_vector(7 downto 0);
hr_rwds_out_o : out std_logic;
hr_dq_out_o : out std_logic_vector(7 downto 0);
hr_rwds_oe_n_o : out std_logic;
hr_dq_oe_n_o : out std_logic_vector(7 downto 0)
hr_resetn_o : out std_logic;
hr_csn_o : out std_logic;
hr_ck_o : out std_logic;
hr_rwds_in_i : in std_logic;
hr_dq_in_i : in std_logic_vector(7 downto 0);
hr_rwds_out_o : out std_logic;
hr_dq_out_o : out std_logic_vector(7 downto 0);
hr_rwds_oe_n_o : out std_logic;
hr_dq_oe_n_o : out std_logic_vector(7 downto 0)
);
end entity hyperram_io;

architecture synthesis of hyperram_io is

begin

hr_csn_o <= ctrl_csn_i;
hr_resetn_o <= ctrl_rstn_i;
hr_csn_o <= ctrl_csn_i;
hr_resetn_o <= ctrl_rstn_i;


------------------------------------------------
-- OUTPUT BUFFERS
------------------------------------------------

b_output : block
output_block : block is

signal hr_dq_oe_n : std_logic_vector(7 downto 0);
signal hr_rwds_oe_n : std_logic;

Expand All @@ -69,9 +70,10 @@ begin
-- set_property IOB TRUE constraint to have effect.
attribute dont_touch : string;
attribute dont_touch of hr_dq_oe_n : signal is "true";

begin

i_oddr_clk : ODDR
oddr_clk_inst : component ODDR
generic map (
DDR_CLK_EDGE => "SAME_EDGE"
)
Expand All @@ -81,9 +83,9 @@ begin
CE => '1',
Q => hr_ck_o,
C => clk_x1_del_i
); -- i_oddr_clk
); -- oddr_clk_inst

i_oddr_rwds : ODDR
oddr_rwds_inst : component ODDR
generic map (
DDR_CLK_EDGE => "SAME_EDGE"
)
Expand All @@ -93,10 +95,10 @@ begin
CE => '1',
Q => hr_rwds_out_o,
C => clk_x1_i
); -- i_oddr_rwds
); -- oddr_rwds_inst

gen_oddr_dq : for i in 0 to 7 generate
i_oddr_dq : ODDR
oddr_dq_gen : for i in 0 to 7 generate
oddr_dq_inst : component ODDR
generic map (
DDR_CLK_EDGE => "SAME_EDGE"
)
Expand All @@ -106,23 +108,23 @@ begin
CE => '1',
Q => hr_dq_out_o(i),
C => clk_x1_i
); -- i_oddr_dq
end generate gen_oddr_dq;
); -- oddr_dq_inst
end generate oddr_dq_gen;

-- The Output Enable signals are active low, because that maps
-- directly into the TriState pin of an IOBUFT primitive.
p_output : process (clk_x1_i)
output_proc : process (clk_x1_i)
begin
if rising_edge(clk_x1_i) then
hr_dq_oe_n <= (others => not ctrl_dq_oe_i);
hr_rwds_oe_n <= not ctrl_rwds_oe_i;
end if;
end process p_output;
end process output_proc;

hr_dq_oe_n_o <= hr_dq_oe_n;
hr_rwds_oe_n_o <= hr_rwds_oe_n;

end block b_output;
end block output_block;


------------------------------------------------
Expand All @@ -135,33 +137,30 @@ begin
-- The actual delay is, according to Vivado's timing report, 2.474 ns.
------------------------------------------------

b_input : block
signal hr_dq_in : std_logic_vector(15 downto 0);
signal hr_rwds_in_delay : std_logic;
signal hr_toggle : std_logic := '0';

signal ctrl_toggle : std_logic;
signal ctrl_toggle_d : std_logic;
signal ctrl_dq_ddr_in : std_logic_vector(15 downto 0);
signal ctrl_dq_ie : std_logic;
signal ctrl_rwds_in : std_logic;

attribute ASYNC_REG : string;
attribute ASYNC_REG of ctrl_toggle : signal is "TRUE";
attribute ASYNC_REG of ctrl_toggle_d : signal is "TRUE";
attribute ASYNC_REG of ctrl_dq_ddr_in : signal is "TRUE";
attribute ASYNC_REG of ctrl_dq_ie : signal is "TRUE";
attribute ASYNC_REG of ctrl_rwds_in : signal is "TRUE";
input_block : block is

-- Synchronuous to RWDS
signal rwds_dq_in : std_logic_vector(15 downto 0);
signal rwds_in_delay : std_logic;
signal rwds_toggle : std_logic := '0';

-- Synchronuous to hr_clk_x1
signal ctrl_dq_ddr_in : std_logic_vector(15 downto 0);
signal ctrl_dq_ie : std_logic;
signal ctrl_rwds_in : std_logic;
signal ctrl_toggle : std_logic;
signal ctrl_toggle_d : std_logic;

begin

i_delay_ctrl : IDELAYCTRL
delay_ctrl_inst : component IDELAYCTRL
port map (
RST => rst_i,
REFCLK => delay_refclk_i,
RDY => open
); -- i_delay_ctrl
); -- delay_ctrl_inst

i_delay_rwds : IDELAYE2
delay_rwds_inst : component IDELAYE2
generic map (
IDELAY_TYPE => "FIXED",
DELAY_SRC => "IDATAIN",
Expand All @@ -183,51 +182,68 @@ begin
IDATAIN => hr_rwds_in_i,
DATAIN => '0',
LDPIPEEN => '0',
DATAOUT => hr_rwds_in_delay,
DATAOUT => rwds_in_delay,
CNTVALUEOUT => open
); -- i_delay_rwds
); -- delay_rwds_inst

gen_iddr_dq : for i in 0 to 7 generate
i_iddr_dq : IDDR
iddr_dq_gen : for i in 0 to 7 generate
iddr_dq_inst : component IDDR
generic map (
DDR_CLK_EDGE => "SAME_EDGE"
)
port map (
D => hr_dq_in_i(i),
CE => '1',
Q1 => hr_dq_in(i),
Q2 => hr_dq_in(i+8),
C => not hr_rwds_in_delay
); -- i_iddr_dq
end generate gen_iddr_dq;
Q1 => rwds_dq_in(i),
Q2 => rwds_dq_in(i+8),
C => not rwds_in_delay
); -- iddr_dq_inst
end generate iddr_dq_gen;

-- This Clock Domain Crossing block is to synchronize the input signal to the
-- clk_x1_i clock domain. It's not possible to use an ordinary async fifo, because
-- the input clock RWDS is not free-running.
p_hr : process (hr_rwds_in_delay)
begin
if falling_edge(hr_rwds_in_delay) then
hr_toggle <= not hr_toggle;
end if;
end process p_hr;

-- Clock domain crossing
p_async : process (clk_x1_i)
rwds_toggle_proc : process (rwds_in_delay)
begin
if rising_edge(clk_x1_i) then
ctrl_toggle <= hr_toggle;
ctrl_toggle_d <= ctrl_toggle;
ctrl_dq_ddr_in <= hr_dq_in;
ctrl_rwds_in <= hr_rwds_in_delay;
if falling_edge(rwds_in_delay) then
rwds_toggle <= not rwds_toggle;
end if;
end process p_async;
end process rwds_toggle_proc;

xpm_cdc_array_single_inst : component xpm_cdc_array_single
generic map (
DEST_SYNC_FF => 2,
INIT_SYNC_FF => 0,
SIM_ASSERT_CHK => 0,
SRC_INPUT_REG => 0,
WIDTH => 18
)
port map (
src_clk => '0',
src_in(15 downto 0) => rwds_dq_in,
src_in(16) => rwds_in_delay,
src_in(17) => rwds_toggle,
dest_clk => clk_x1_i,
dest_out(15 downto 0) => ctrl_dq_ddr_in,
dest_out(16) => ctrl_rwds_in,
dest_out(17) => ctrl_toggle
);

ctrl_dq_ie <= ctrl_toggle_d xor ctrl_toggle;

ctrl_dq_ddr_in_o <= ctrl_dq_ddr_in;
ctrl_dq_ie_o <= ctrl_dq_ie;
ctrl_rwds_in_o <= ctrl_rwds_in;

end block b_input;
ctrl_dq_ie_proc : process (clk_x1_i)
begin
if rising_edge(clk_x1_i) then
ctrl_toggle_d <= ctrl_toggle;
end if;
end process ctrl_dq_ie_proc;

end block input_block;

end architecture synthesis;

0 comments on commit 8044fdc

Please sign in to comment.