// -------------------------------------------------------------------------
//
//  PROJECT: PCI Express
//  COMPANY: Northwest Logic, Inc.
//
// ------------------------- CONFIDENTIAL ----------------------------------
//
//                 Copyright 2011 by Northwest Logic, Inc.
//
//  All rights reserved.  No part of this source code may be reproduced or
//  transmitted in any form or by any means, electronic or mechanical,
//  including photocopying, recording, or any information storage and
//  retrieval system, without permission in writing from Northest Logic, Inc.
//
//  Further, no use of this source code is permitted in any form or means
//  without a valid, written license agreement with Northwest Logic, Inc.
//
//  $Date: 2012-05-09 16:15:33 -0700 (Wed, 09 May 2012) $
//  $Revision: 28360 $
//
//                         Northwest Logic, Inc.
//                  1100 NW Compton Drive, Suite 100
//                      Beaverton, OR 97006, USA
//
//                       Ph.  +1 503 533 5800
//                       Fax. +1 503 533 5900
//                          www.nwlogic.com
//
// -------------------------------------------------------------------------

`timescale 1ps / 1ps



// -----------------------
// -- Module Definition --
// -----------------------

module c2s_adr_pkt (

    pcie_rst_n,         // PCIe Clock and Reset
    pcie_clk,           //

    sdram_rst_n,        // SDRAM Clock and Reset; design supports different
    sdram_clk,          //   PCIe and SDRAM clock domains

    cmd_req,            // Card to System DMA Engine: Command
    cmd_ready,          //
    cmd_first_chain,    //
    cmd_last_chain,     //
    cmd_bcount,         //
    cmd_addr,           //
    cmd_user_control,   //
    cmd_abort,          //
    cmd_abort_ack,      //

    data_req,           // Card to System DMA Engine: Data : Control
    data_ready,         //
    data_req_remain,    //
    data_req_last_desc, //
    data_addr,          //
    data_bcount,        //
    data_stop,          //
    data_stop_bcount,   //

    data_en,            // Card to System DMA Engine: Data : Data
    data_remain,        //
    data_valid,         //
    data_first_req,     //
    data_last_req,      //
    data_first_desc,    //
    data_last_desc,     //
    data_first_chain,   //
    data_last_chain,    //
    data_sop,           //
    data_eop,           //
    data_data,          //
    data_user_status,   //

    l_r_req,            // SDRAM Multi-Port Front-End Interface
    l_w_req,            //
    l_req_pri,          //
    l_addr,             //
    l_b_size,           //
    l_auto_pch,         //
    l_busy,             //
    l_r_valid,          //
    l_r_valid_last,     //
    l_d_req,            //
    l_d_req_last,       //
    l_datain,           //
    l_dm_in,            //
    l_dataout           //

);



// ----------------
// -- Parameters --
// ----------------

// NOTE: Only values which are parameters are intended to be modified from their default values

// SDRAM Multi-Port Front End Parameters
localparam  SDRAM_DATA_WIDTH        = 128;  // SDRAM controller local bus data width
localparam  SDRAM_DM_WIDTH          = 16;   // SDRAM controller local bus data mask width == SDRAM_DATA_WIDTH/8
localparam  SDRAM_REMAIN_WIDTH      = 4;    // Number of bits required to address all of the bytes in a SDRAM word; 2^SDRAM_REMAIN_WIDTH must equal SDRAM_DM_WIDTH

parameter   SDRAM_ADDR_WIDTH        = 33;   // SDRAM controller Local address width
parameter   SDRAM_BSIZE_WIDTH       = 8;    // Width of l_b_size parameter

parameter   DMA_DEST_ADDR_WIDTH     = 28;   // Number of byte address bits implemented by the SDRAM device; SDRAM depth == 2^DMA_DEST_ADDR_WIDTH; 2^28 = 256 MByte
localparam  DMA_DEST_BCOUNT_WIDTH   = 32;   // Width required to hold the largest byte count that must be supported on cmd_bcount;
                                            //   Note: sets the largest supported cmd_bcount == 2^(DMA_DEST_BCOUNT_WIDTH-1)
                                            //   Note: This value is allowed to exceed the SDRAM xfer size allowed by BURST_SIZE_WIDTH,
                                            //         because this module breaks large transfers into smaller bursts that fall within BURST_SIZE_WIDTH
                                            //   Note: 2^(DMA_DEST_BCOUNT_WIDTH-1) must be at largest 1/4 the byte depth of the Data FIFO

parameter   BYTE_ADDRESSING         = 0;    // Use Byte Addressing for SDRAM Interface if 1

// Convert byte address, count to SDRAM word size address, count
localparam  DMA_SDRAM_ADDR_WIDTH    = (BYTE_ADDRESSING == 1) ? DMA_DEST_ADDR_WIDTH : (DMA_DEST_ADDR_WIDTH   - SDRAM_REMAIN_WIDTH);
localparam  DMA_SDRAM_COUNT_WIDTH   = (BYTE_ADDRESSING == 1) ? DMA_DEST_BCOUNT_WIDTH : DMA_DEST_BCOUNT_WIDTH - SDRAM_REMAIN_WIDTH;

// Command FIFO
parameter   FIFO_CADDR_WIDTH        = 4;    // Address width of command FIFO; set to larger of 4 or block RAM address width at max data width
localparam  FIFO_CDATA_WIDTH        = 1 + DMA_SDRAM_COUNT_WIDTH + DMA_DEST_ADDR_WIDTH;

// Data FIFO
parameter   FIFO_DADDR_WIDTH        = 7;    // Address width of data FIFO; set to larger of 7 or block RAM address width at max data width
localparam  FIFO_DDATA_WIDTH        = SDRAM_DATA_WIDTH;

// PCIe Complete Core Port Parameters
localparam  CORE_DATA_WIDTH         = 128;  // Width of input and output data
localparam  CORE_REMAIN_WIDTH       = 4;    // 2^CORE_REMAIN_WIDTH represents the number of bytes in CORE_DATA_WIDTH

localparam  FIFO_BCOUNT_WIDTH       = FIFO_DADDR_WIDTH + SDRAM_REMAIN_WIDTH;

// Set minimum burst size before a command will be started;
//   can still transfer less than this if that is all that is left in the command;
//   also will start bursts to align SDRAM addresses to MIN_XFER_SIZE
localparam  MAX_XFER_SIZE_BITS      = 4; // SDRAM Reads will be made at this SDRAM l_b_size (2^MAX_XFER_SIZE_BITS) when possible
                                          //   don't make any larger than 1/8 data FIFO depth
localparam  MAX_XFER_SIZE_WIDTH     = (BYTE_ADDRESSING == 1) ? MAX_XFER_SIZE_BITS + 1 + SDRAM_REMAIN_WIDTH : MAX_XFER_SIZE_BITS + 1; // Number of bits required to hold MAX_XFER_SIZE

// Set tresholds for which ready will be asserted; subtract from max level to accomodate latencies and required signaling protocol
localparam  CMD_FIFO_READY_THRESH   = (1 << FIFO_CADDR_WIDTH) - 5;  // Subtract 5 to safeguard against command level latency
localparam  SDRAM_DATA_READY_THRESH = (1 << FIFO_DADDR_WIDTH) - (1 << (MAX_XFER_SIZE_BITS+1)) - 1; // Not ready when can't store a full transfer

// State Machine States
localparam  IDLE                    = 3'h0;
localparam  WAIT                    = 3'h1;
localparam  DRDY                    = 3'h2;
localparam  DREQ                    = 3'h3;
localparam  DONE                    = 3'h4;

localparam  USER_CONTROL_WIDTH      = 64;



// ----------------------
// -- Port Definitions --
// ----------------------

input                                   pcie_rst_n;
input                                   pcie_clk;

input                                   sdram_rst_n;
input                                   sdram_clk;

input                                   cmd_req;
output                                  cmd_ready;
input                                   cmd_first_chain;
input                                   cmd_last_chain;
input   [31:0]                          cmd_bcount;
input   [63:0]                          cmd_addr;
input   [USER_CONTROL_WIDTH-1:0]        cmd_user_control;
input                                   cmd_abort;
output                                  cmd_abort_ack;

input                                   data_req;
output                                  data_ready;
input   [CORE_REMAIN_WIDTH-1:0]         data_req_remain;
input                                   data_req_last_desc;
input   [63:0]                          data_addr;
input   [9:0]                           data_bcount;
output                                  data_stop;
output  [9:0]                           data_stop_bcount;

input                                   data_en;
input   [CORE_REMAIN_WIDTH-1:0]         data_remain;
input   [CORE_REMAIN_WIDTH:0]           data_valid;
input                                   data_first_req;
input                                   data_last_req;
input                                   data_first_desc;
input                                   data_last_desc;
input                                   data_first_chain;
input                                   data_last_chain;
output                                  data_sop;
output                                  data_eop;
output  [CORE_DATA_WIDTH-1:0]           data_data;
output  [63:0]                          data_user_status;

output                                  l_r_req;
output                                  l_w_req;
output  [2:0]                           l_req_pri;
output  [SDRAM_ADDR_WIDTH-1:0]          l_addr;
output  [SDRAM_BSIZE_WIDTH-1:0]         l_b_size;
output                                  l_auto_pch;
input                                   l_busy;
input                                   l_r_valid;
input                                   l_r_valid_last;
input                                   l_d_req;
input                                   l_d_req_last;
output  [SDRAM_DATA_WIDTH-1:0]          l_datain;
output  [SDRAM_DM_WIDTH-1:0]            l_dm_in;
input   [SDRAM_DATA_WIDTH-1:0]          l_dataout;



// ----------------
// -- Port Types --
// ----------------

wire                                    pcie_rst_n;
wire                                    pcie_clk;

wire                                    sdram_rst_n;
wire                                    sdram_clk;

wire                                    cmd_req;
reg                                     cmd_ready;
wire                                    cmd_first_chain;
wire                                    cmd_last_chain;
wire    [31:0]                          cmd_bcount;
wire    [63:0]                          cmd_addr;
wire    [USER_CONTROL_WIDTH-1:0]        cmd_user_control;
wire                                    cmd_abort;
wire                                    cmd_abort_ack;

wire                                    data_req;
reg                                     data_ready;
wire    [CORE_REMAIN_WIDTH-1:0]         data_req_remain;
wire                                    data_req_last_desc;
wire    [63:0]                          data_addr;
wire    [9:0]                           data_bcount;
wire                                    data_stop;
wire    [9:0]                           data_stop_bcount;

wire                                    data_en;
wire    [CORE_REMAIN_WIDTH-1:0]         data_remain;
wire    [CORE_REMAIN_WIDTH:0]           data_valid;
wire                                    data_first_req;
wire                                    data_last_req;
wire                                    data_first_desc;
wire                                    data_last_desc;
wire                                    data_first_chain;
wire                                    data_last_chain;
reg                                     data_sop;
reg                                     data_eop;
wire    [CORE_DATA_WIDTH-1:0]           data_data;
wire    [63:0]                          data_user_status;

wire                                    l_r_req;
wire                                    l_w_req;
wire    [2:0]                           l_req_pri;
wire    [SDRAM_ADDR_WIDTH-1:0]          l_addr;
reg     [SDRAM_BSIZE_WIDTH-1:0]         l_b_size;
wire                                    l_auto_pch;
wire                                    l_busy;
wire                                    l_r_valid;
wire                                    l_r_valid_last;
wire                                    l_d_req;
wire                                    l_d_req_last;
wire    [SDRAM_DATA_WIDTH-1:0]          l_datain;
wire    [SDRAM_DM_WIDTH-1:0]            l_dm_in;
wire    [SDRAM_DATA_WIDTH-1:0]          l_dataout;



// -------------------
// -- Local Signals --
// -------------------

// Reset Generation and Propogation
reg                                     pcie_rst_n_d1;
reg                                     pcie_rst_n_d2;
wire                                    sdram_rst_n_int;
reg                                     sdram_rst_n_r;

// Command FIFO
wire    [DMA_DEST_BCOUNT_WIDTH-1:0]     c_offset_cmd_bcount;
wire    [DMA_SDRAM_COUNT_WIDTH-1:0]     c_cmd_count;
wire    [SDRAM_REMAIN_WIDTH:0]          c_misalign_sum;
wire                                    c_misalign_en;

reg                                     cmd_fifo_wr_en;
reg     [FIFO_CDATA_WIDTH-1:0]          cmd_fifo_wr_data;
wire    [FIFO_CADDR_WIDTH:0]            cmd_fifo_wr_level;
reg                                     d_cmd_fifo_wr_en;
reg     [DMA_SDRAM_COUNT_WIDTH-1:0]     r_cmd_bcount;
reg     [DMA_DEST_ADDR_WIDTH-1:0]       r_cmd_addr;


reg                                     cmd_fifo_rd_en;
wire    [FIFO_CDATA_WIDTH-1:0]          cmd_fifo_rd_data;
wire                                    cmd_fifo_rd_empty;

wire                                    sdram_cmd_pending;
reg                                     sdram_misalign;
reg     [DMA_SDRAM_COUNT_WIDTH-1:0]     sdram_count;
reg     [DMA_DEST_ADDR_WIDTH-1:0]       sdram_addr;

// Data FIFO
wire    [SDRAM_REMAIN_WIDTH:0]          c_remain_bcount;
reg     [SDRAM_REMAIN_WIDTH-1:0]        remain_bcount;
wire    [9:0]                           curr_bcount;
reg                                     req_pend;
wire                                    round_up;

reg     [9-SDRAM_REMAIN_WIDTH:0]        data_count;
wire                                    data_fifo_rd_empty;
wire                                    data_fifo_rd_adv_en;
wire    [FIFO_DADDR_WIDTH-2:0]          data_fifo_rd_adv_inc;

reg                                     data_fifo_wr_en;
wire    [FIFO_DDATA_WIDTH-1:0]          data_fifo_wr_data;
wire    [FIFO_DADDR_WIDTH:0]            data_fifo_wr_level;

wire                                    data_fifo_rd_en;
wire    [FIFO_DDATA_WIDTH-1:0]          data_fifo_rd_data;
wire    [FIFO_DADDR_WIDTH:0]            data_fifo_rd_level;

wire    [FIFO_BCOUNT_WIDTH-1:0]         data_fifo_rd_bcount;

reg                                     tf_rst_n;
wire    [FIFO_DDATA_WIDTH-1:0]          tf_rd_data;
wire                                    tf_dst_rdy;
wire                                    tf_rd_en;
wire                                    tf_out_src_rdy_unused;

reg     [SDRAM_REMAIN_WIDTH-1:0]        req_data_remain;

reg     [CORE_DATA_WIDTH-1:0]           r_data_data;
reg     [SDRAM_DATA_WIDTH-1:0]          hold_data;
wire                                    data_en_mask;

reg     [SDRAM_REMAIN_WIDTH-1:0]        hold_data_remain;

wire    [CORE_REMAIN_WIDTH:0]           held_plus_remain;

// SDRAM Control State Machine
wire    [MAX_XFER_SIZE_WIDTH-1:0]       max_size;

reg     [SDRAM_REMAIN_WIDTH-1:0]        r_sdram_addr_offset;
reg     [DMA_SDRAM_ADDR_WIDTH-1:0]      r_sdram_addr;
reg     [MAX_XFER_SIZE_WIDTH-1:0]       r_max_size;
reg     [DMA_SDRAM_COUNT_WIDTH-1:0]     cmd_remain_count;

reg                                     sdram_data_ready;
wire    [DMA_SDRAM_COUNT_WIDTH-1:0]     sdram_req_count;

reg                                     l_r_valid_misalign;
reg                                     first_l_r_valid;
reg                                     end_l_r_valid;
reg                                     r_extra_l_r_valid;

wire                                    post_cmd;

wire    [SDRAM_DATA_WIDTH-1:0]          l_dataout_in;
reg     [SDRAM_DATA_WIDTH-1:0]          r_l_dataout;
reg     [SDRAM_DATA_WIDTH-1:0]          sdram_offset_data;

reg     [FIFO_DADDR_WIDTH-MAX_XFER_SIZE_BITS:0] reads_outstanding;

reg                                     d_cmd_fifo_rd_empty_pclk;
reg                                     cmd_fifo_rd_empty_pclk;

reg                                     d_data_fifo_rd_empty_pclk;
reg                                     data_fifo_rd_empty_pclk;

reg                                     d_state_idle_pclk;
reg                                     state_idle_pclk;

reg     [2:0]                           state;
reg                                     state_idle;

// Unused Outputs
wire                                    cmd_wr_full_unused;
wire [FIFO_CADDR_WIDTH:0]               cmd_rd_level_unused;
wire [FIFO_CADDR_WIDTH:0]               cmd_xfer_level_unused;
wire                                    data_wr_full_unused;
wire [FIFO_DADDR_WIDTH:0]               data_rd_level_unused;

`ifdef SIMULATION
reg     [63:0]                          abort_address;
reg                                     abort_enable;

initial begin
    abort_address = 64'b0;
    abort_enable = 1'b0;
end
`endif

// ---------------
// -- Equations --
// ---------------

// -----------------
// Reset Logic
//
// Propogate PCIe reset to SDRAM clock domain
// So that DMA Reset from DMA Back End will allow proper reset of
// FIFO logic

always @(posedge sdram_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        pcie_rst_n_d1 <= 1'b0;
        pcie_rst_n_d2 <= 1'b0;
    end
    else
    begin
        pcie_rst_n_d1 <= 1'b1;
        pcie_rst_n_d2 <= pcie_rst_n_d1;
    end
end

assign sdram_rst_n_int = sdram_rst_n & pcie_rst_n_d2;

always @(posedge sdram_clk or negedge sdram_rst_n_int)
begin
    if (sdram_rst_n_int == 1'b0)
        sdram_rst_n_r <= 1'b0;
    else
        sdram_rst_n_r <= 1'b1;
end


`ifdef SIMULATION
// remove X's from l_dataout. Causes simulation problems in unused bytes.
genvar i;
generate
    for (i=0; i<SDRAM_DATA_WIDTH; i=i+1)
    begin : remove_x_gen
        assign l_dataout_in[i] = (l_dataout[i] === 1'b1) ? 1'b1 : 1'b0;
    end
endgenerate
`else
assign l_dataout_in = l_dataout;
`endif



// ------------
// Unused Ports

assign data_user_status = 64'h0;

// -------------
// Handle Aborts

// Bring signals on SDRAM clock domain onto PCIe clock domain
always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        d_cmd_fifo_rd_empty_pclk  <= 1'b1;
        cmd_fifo_rd_empty_pclk    <= 1'b1;

        d_data_fifo_rd_empty_pclk <= 1'b1;
        data_fifo_rd_empty_pclk   <= 1'b1;

        d_state_idle_pclk         <= 1'b1;
        state_idle_pclk           <= 1'b1;
    end
    else
    begin
        d_cmd_fifo_rd_empty_pclk  <= cmd_fifo_rd_empty;
        cmd_fifo_rd_empty_pclk    <= d_cmd_fifo_rd_empty_pclk;

        d_data_fifo_rd_empty_pclk <= data_fifo_rd_empty;
        data_fifo_rd_empty_pclk   <= d_data_fifo_rd_empty_pclk;

        d_state_idle_pclk         <= state_idle;
        state_idle_pclk           <= d_state_idle_pclk;
    end
end


// Acknowledge abort once both the command and data FIFO are empty
//   indicating the outstanding operations are completed
assign cmd_abort_ack = cmd_abort & cmd_fifo_rd_empty_pclk & data_fifo_rd_empty_pclk & state_idle_pclk;


// SOP and EOP signals are already generated by the DMA Back End via first/last_chain.
//   Delay by one clock to line up with data
always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        data_sop <= 1'b0;
        data_eop <= 1'b0;
    end
    else
    begin
        data_sop <= data_first_chain;
        data_eop <= data_last_chain;
    end
end



// ------------
// Command FIFO

// Note:
//   cmd_short is not supported
//   cmd_control is unused

// The command FIFO carries the SDRAM starting address and length in bytes from the PCIe clock domain to the SDRAM clock domain

// Compute SDRAM count from byte count and starting address offset
assign c_offset_cmd_bcount = r_cmd_bcount + r_cmd_addr[SDRAM_REMAIN_WIDTH-1:0]; // Add starting address offset to cmd_bcount
assign c_cmd_count         = c_offset_cmd_bcount[DMA_DEST_BCOUNT_WIDTH-1:SDRAM_REMAIN_WIDTH] + {{(DMA_SDRAM_COUNT_WIDTH-1){1'b0}}, (c_offset_cmd_bcount[SDRAM_REMAIN_WIDTH-1:0] != {SDRAM_REMAIN_WIDTH{1'b0}})}; // Divide by SDRAM byte width and round up

// If c_misalign_en == 1, then we the SDRAM address offset + byte count will force one additional SDRAM word to be fetched than required by the byte count
assign c_misalign_sum = {1'b0, r_cmd_addr[SDRAM_REMAIN_WIDTH-1:0]} + {(r_cmd_bcount[SDRAM_REMAIN_WIDTH-1:0] == {SDRAM_REMAIN_WIDTH{1'b0}}), r_cmd_bcount[SDRAM_REMAIN_WIDTH-1:0]};
assign c_misalign_en  = (c_misalign_sum > {1'b1, {SDRAM_REMAIN_WIDTH{1'b0}}});

always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        d_cmd_fifo_wr_en <= 1'b0;
        cmd_fifo_wr_en   <= 1'b0;
        r_cmd_bcount     <= {DMA_SDRAM_COUNT_WIDTH{1'b0}};
        r_cmd_addr       <= {DMA_DEST_ADDR_WIDTH{1'b0}};
        cmd_fifo_wr_data <= {FIFO_CDATA_WIDTH{1'b0}};
        cmd_ready        <= 1'b0;
    end
    else
    begin
        d_cmd_fifo_wr_en <= cmd_req & cmd_ready;
        cmd_fifo_wr_en   <= d_cmd_fifo_wr_en;

        if (cmd_req & cmd_ready)
            r_cmd_bcount <= cmd_bcount[DMA_SDRAM_COUNT_WIDTH-1:0];

        if (cmd_req & cmd_ready)
            r_cmd_addr <= cmd_first_chain ? cmd_addr[DMA_DEST_ADDR_WIDTH-1:0] : r_cmd_addr + r_cmd_bcount;

        if (BYTE_ADDRESSING == 1)
            cmd_fifo_wr_data <= {c_misalign_en, r_cmd_bcount[DMA_SDRAM_COUNT_WIDTH-1:0], r_cmd_addr[DMA_DEST_ADDR_WIDTH-1:0]};
        else
            cmd_fifo_wr_data <= {c_misalign_en, c_cmd_count[DMA_SDRAM_COUNT_WIDTH-1:0], r_cmd_addr[DMA_DEST_ADDR_WIDTH-1:0]};

        cmd_ready <= (cmd_fifo_wr_level < CMD_FIFO_READY_THRESH);
    end
end

// Instantiate Command FIFO
ref_dc_fifo_shallow_ram #(

    .ADDR_WIDTH         (FIFO_CADDR_WIDTH           ),
    .DATA_WIDTH         (FIFO_CDATA_WIDTH           ),
    .EN_SPECULATIVE_RD  (1'b0                       ),
    .EN_LOOK_AHEAD      (1'b1                       ),
    .DLY_WR_FOR_RD_LVL  (1'b0                       )

) c2s_cmd_fifo (

    .wr_rst_n           (pcie_rst_n                 ),
    .wr_clk             (pcie_clk                   ),
    .wr_clr             (1'b0                       ),
    .wr_en              (cmd_fifo_wr_en             ),
    .wr_data            (cmd_fifo_wr_data           ),
    .wr_level           (cmd_fifo_wr_level          ),
    .wr_full            (cmd_wr_full_unused         ),

    .rd_rst_n           (sdram_rst_n_r              ),
    .rd_clk             (sdram_clk                  ),
    .rd_clr             (1'b0                       ),
    .rd_flush           (1'b0                       ),
    .rd_ack             (cmd_fifo_rd_en             ),
    .rd_xfer            (1'b0                       ),
    .rd_sync            (1'b0                       ),
    .rd_data            (cmd_fifo_rd_data           ),
    .rd_level           (cmd_rd_level_unused        ),
    .rd_xfer_level      (cmd_xfer_level_unused      ),
    .rd_empty           (cmd_fifo_rd_empty          )
);

assign sdram_cmd_pending = ~cmd_fifo_rd_empty;

always @(posedge sdram_clk or negedge sdram_rst_n_r)
begin
    if (sdram_rst_n_r == 1'b0)
    begin
        sdram_misalign <= 1'b0;
        sdram_count    <= {DMA_SDRAM_COUNT_WIDTH{1'b0}};
        sdram_addr     <= {DMA_DEST_ADDR_WIDTH{1'b0}};
    end
    else
    begin
        sdram_misalign <= cmd_fifo_rd_data[FIFO_CDATA_WIDTH-1];
        sdram_count    <= cmd_fifo_rd_data[FIFO_CDATA_WIDTH-2:DMA_DEST_ADDR_WIDTH];
        sdram_addr     <= cmd_fifo_rd_data[DMA_DEST_ADDR_WIDTH-1:0];
    end
end



// ---------
// Data FIFO

// The data FIFO carries data from the SDRAM clock domain to the PCIe clock domain

// Instantiate Data FIFO
ref_dc_fifo_adv_block_ram #(

    .ADDR_WIDTH         (FIFO_DADDR_WIDTH           ),
    .DATA_WIDTH         (FIFO_DDATA_WIDTH           ),
    .INC_WIDTH          (FIFO_DADDR_WIDTH-1         ),
    .EN_LOOK_AHEAD      (1'b1                       ),
    .DLY_WR_FOR_RD_LVL  (1'b1                       )

) c2s_data_fifo (

    .wr_rst_n           (sdram_rst_n_r              ),
    .wr_clk             (sdram_clk                  ),
    .wr_en              (data_fifo_wr_en            ),
    .wr_data            (data_fifo_wr_data          ),
    .wr_level           (data_fifo_wr_level         ),
    .wr_full            (data_wr_full_unused        ),

    .rd_rst_n           (pcie_rst_n                 ),
    .rd_clk             (pcie_clk                   ),
    .rd_en              (data_fifo_rd_en            ),
    .rd_data            (data_fifo_rd_data          ),
    .rd_level           (data_rd_level_unused       ),
    .rd_empty           (data_fifo_rd_empty         ),
    .rd_adv_en          (data_fifo_rd_adv_en        ),
    .rd_adv_inc         (data_fifo_rd_adv_inc       ),
    .rd_adv_level       (data_fifo_rd_level         )

);

assign data_fifo_rd_en = ~data_fifo_rd_empty & tf_dst_rdy;

always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
        tf_rst_n <= 1'b0;
    else
        // reset the tiny FIFO when an abort occurs
        tf_rst_n <= ~cmd_abort_ack;
end


// Register RAM output for improved timing
ref_tiny_fifo #(
    .DATA_WIDTH     (FIFO_DDATA_WIDTH       )
) tiny_fifo(

    .rst_n          (tf_rst_n               ),
    .clk            (pcie_clk               ),

    .in_src_rdy     (~data_fifo_rd_empty    ),
    .in_dst_rdy     (tf_dst_rdy             ),
    .in_data        (data_fifo_rd_data      ),

    .out_src_rdy    (tf_out_src_rdy_unused  ),
    .out_dst_rdy    (tf_rd_en               ),
    .out_data       (tf_rd_data             )

);

// Compute # of bytes which were read from SDRAM but did not transfer
assign c_remain_bcount = ({1'b1, {SDRAM_REMAIN_WIDTH{1'b0}}} - {1'b0, curr_bcount[SDRAM_REMAIN_WIDTH-1:0]}) + hold_data_remain[SDRAM_REMAIN_WIDTH-1:0];

always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
        remain_bcount <= {SDRAM_REMAIN_WIDTH{1'b0}};
    else if (cmd_req & cmd_ready)
        remain_bcount <= {SDRAM_REMAIN_WIDTH{1'b0}};
    else if (data_req & data_ready)
        remain_bcount <= c_remain_bcount[SDRAM_REMAIN_WIDTH-1:0];
end


`ifdef SIMULATION
wire   abort;
reg    abort_data_ready;
assign abort = ~((data_addr > abort_address) || (data_addr + data_bcount <= abort_address) || (abort_enable == 1'b0));
assign data_stop        = abort_data_ready;
assign data_stop_bcount = (abort) ? (abort_address - data_addr) : data_bcount;

assign curr_bcount = data_stop_bcount - {{(10-SDRAM_REMAIN_WIDTH){1'b0}}, remain_bcount};
assign round_up = (data_stop_bcount[SDRAM_REMAIN_WIDTH-1:0] > req_data_remain[SDRAM_REMAIN_WIDTH-1:0]);

always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        req_data_remain     <= {SDRAM_REMAIN_WIDTH{1'b0}};
        data_count          <= {(10 - SDRAM_REMAIN_WIDTH){1'b0}};
        req_pend            <= 1'b0;
        data_ready          <= 1'b0;
        abort_data_ready    <= 1'b0;
    end
    else
    begin
        if (data_req & data_ready)
        begin
            if (data_req_last_desc)
                req_data_remain <= {SDRAM_REMAIN_WIDTH{1'b0}};
            else
                //req_data_remain <= req_data_remain + data_req_remain;
                // Need to keep track of remainder modulo SDRAM width (16)
                req_data_remain <= req_data_remain - data_stop_bcount[SDRAM_REMAIN_WIDTH-1:0];
        end

        data_count <= round_up ? (data_stop_bcount[9:SDRAM_REMAIN_WIDTH] + {{(9 - SDRAM_REMAIN_WIDTH){1'b0}}, 1'b1}) : data_stop_bcount[9:SDRAM_REMAIN_WIDTH];

        if (data_req)
            req_pend <= ~(data_ready | abort_data_ready);

        // Force data_ready low for one clock after a grant so we can register data_ready
        if (data_req & data_ready)
            data_ready <= 1'b0;
        else if (req_pend == 1'b1 && data_fifo_rd_bcount >= { {(FIFO_BCOUNT_WIDTH-10){1'b0}},data_stop_bcount} && ~abort)
            data_ready <= 1'b1;
        else
            data_ready <= 1'b0;

        if (data_req & abort_data_ready)
            abort_data_ready <= 1'b0;
        else if (req_pend == 1'b1 && abort)
            abort_data_ready <= 1'b1;
        else
            abort_data_ready <= 1'b0;
    end
end
`else

assign data_stop        = 1'b0;
assign data_stop_bcount = 10'b0;

assign curr_bcount = data_bcount - {{(10-SDRAM_REMAIN_WIDTH){1'b0}}, remain_bcount};

// Only if the remainder of data_bcount > remain_bcount would we need to round up a remainder
assign round_up = (data_bcount[SDRAM_REMAIN_WIDTH-1:0] > req_data_remain[SDRAM_REMAIN_WIDTH-1:0]);

// Register ready term for better FMax; ready when there is a minimum amount of data available
//   or when there is at least 1 command that has completed in entirety
always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        req_data_remain     <= {SDRAM_REMAIN_WIDTH{1'b0}};
        data_count          <= {(10 - SDRAM_REMAIN_WIDTH){1'b0}};
        req_pend            <= 1'b0;
        data_ready          <= 1'b0;
    end
    else
    begin
        if (data_req & data_ready)
        begin
            if (data_req_last_desc)
                req_data_remain <= {SDRAM_REMAIN_WIDTH{1'b0}};
            else
                //req_data_remain <= req_data_remain + data_req_remain;
                // Need to keep track of remainder modulo SDRAM width (16)
                req_data_remain <= req_data_remain - data_bcount[SDRAM_REMAIN_WIDTH-1:0];
        end

        data_count <= round_up ? (data_bcount[9:SDRAM_REMAIN_WIDTH] + {{(9 - SDRAM_REMAIN_WIDTH){1'b0}}, 1'b1}) : data_bcount[9:SDRAM_REMAIN_WIDTH];

        if (data_req)
            req_pend <= ~data_ready;

        // Force data_ready low for one clock after a grant so we can register data_ready
        if (data_req & data_ready)
            data_ready <= 1'b0;
        else if (req_pend == 1'b1 && data_fifo_rd_bcount >= { {(FIFO_BCOUNT_WIDTH-10){1'b0}},data_bcount})
            data_ready <= 1'b1;
        else
            data_ready <= 1'b0;
    end
end
`endif

assign data_fifo_rd_adv_en  = data_req & data_ready;
assign data_fifo_rd_adv_inc = data_count; // data_count width must always be equal to or smaller than data_fifo_rd_adv_inc
assign data_fifo_rd_bcount  = {data_fifo_rd_level, req_data_remain};
assign tf_rd_en             = data_en & ~data_en_mask;
assign data_data            = r_data_data;

always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
        hold_data        <= {SDRAM_DATA_WIDTH{1'b0}};
    else
        // hold data on this clock that may be needed next clock
        if (tf_rd_en)
            hold_data <= tf_rd_data[SDRAM_DATA_WIDTH-1:0];
end

// If set we already have enough data to service the request
assign held_plus_remain     = {1'b0, hold_data_remain} + {1'b0, data_remain};
assign data_en_mask         = data_last_req & held_plus_remain[CORE_REMAIN_WIDTH];

always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        hold_data_remain <= {SDRAM_REMAIN_WIDTH{1'b0}};
        r_data_data      <= {CORE_DATA_WIDTH{1'b0}};
    end
    else
    begin
        // keep track of data remainder
        if (data_en & data_last_desc & data_last_req)
            hold_data_remain <= {SDRAM_REMAIN_WIDTH{1'b0}};
        else if (data_en & data_last_req)
            hold_data_remain <= hold_data_remain + data_remain;

        case (hold_data_remain)
            4'h1    : r_data_data <= {tf_rd_data[119:0], hold_data[127:120]};
            4'h2    : r_data_data <= {tf_rd_data[111:0], hold_data[127:112]};
            4'h3    : r_data_data <= {tf_rd_data[103:0], hold_data[127:104]};
            4'h4    : r_data_data <= {tf_rd_data[ 95:0], hold_data[127: 96]};
            4'h5    : r_data_data <= {tf_rd_data[ 87:0], hold_data[127: 88]};
            4'h6    : r_data_data <= {tf_rd_data[ 79:0], hold_data[127: 80]};
            4'h7    : r_data_data <= {tf_rd_data[ 71:0], hold_data[127: 72]};
            4'h8    : r_data_data <= {tf_rd_data[ 63:0], hold_data[127: 64]};
            4'h9    : r_data_data <= {tf_rd_data[ 55:0], hold_data[127: 56]};
            4'ha    : r_data_data <= {tf_rd_data[ 47:0], hold_data[127: 48]};
            4'hb    : r_data_data <= {tf_rd_data[ 39:0], hold_data[127: 40]};
            4'hc    : r_data_data <= {tf_rd_data[ 31:0], hold_data[127: 32]};
            4'hd    : r_data_data <= {tf_rd_data[ 23:0], hold_data[127: 24]};
            4'he    : r_data_data <= {tf_rd_data[ 15:0], hold_data[127: 16]};
            4'hf    : r_data_data <= {tf_rd_data[  7:0], hold_data[127:  8]};
            default : r_data_data <=  tf_rd_data[127:0];
        endcase
    end
end

// ---------------------------
// SDRAM Control State Machine

// Size constant
assign max_size = (BYTE_ADDRESSING == 1) ? {1'b1,{MAX_XFER_SIZE_BITS+SDRAM_REMAIN_WIDTH{1'b0}}} : {1'b1,{MAX_XFER_SIZE_BITS{1'b0}}};

// A command execution is started whenever the state machine is idle and there is something to do;
//   register enable because we can, due to min command to command latency, and this improves FMax
always @(posedge sdram_clk or negedge sdram_rst_n_r)
begin
    if (sdram_rst_n_r == 1'b0)
        cmd_fifo_rd_en <= 1'b0;
    else
        cmd_fifo_rd_en <= (state == IDLE) & sdram_cmd_pending;
end

always @(posedge sdram_clk or negedge sdram_rst_n_r)
begin
    if (sdram_rst_n_r == 1'b0)
    begin
        r_sdram_addr_offset <= {SDRAM_REMAIN_WIDTH{1'b0}};
        r_sdram_addr        <= {DMA_SDRAM_ADDR_WIDTH{1'b0}};
        r_max_size          <= {MAX_XFER_SIZE_WIDTH{1'b0}};
        cmd_remain_count    <= {DMA_SDRAM_COUNT_WIDTH{1'b0}};
    end
    else
    begin
        // Save starting SDRAM byte address offset
        if (cmd_fifo_rd_en)
            r_sdram_addr_offset <= sdram_addr[SDRAM_REMAIN_WIDTH-1:0];

        // Track SDRAM address
        if (cmd_fifo_rd_en)
            r_sdram_addr <= (BYTE_ADDRESSING == 1) ? sdram_addr : sdram_addr[DMA_DEST_ADDR_WIDTH-1:SDRAM_REMAIN_WIDTH]; // Convert address to SDRAM word size
        // Increment with each accepted burst read request
        else if ((state == DREQ) & (~l_busy))
            r_sdram_addr <= r_sdram_addr[DMA_SDRAM_ADDR_WIDTH-1:0] + {{(DMA_SDRAM_ADDR_WIDTH-SDRAM_BSIZE_WIDTH){1'b0}}, l_b_size};

        // Use preferred maximum size except for the first request; for the first request, align
        //   to a max_size address boundary to promote greater SDRAM efficieny on subsequent bursts
        if (cmd_fifo_rd_en)
        begin
            r_max_size <= (BYTE_ADDRESSING == 1) ? max_size - {1'b0, sdram_addr[MAX_XFER_SIZE_WIDTH-2:0]} :
                                                   max_size - {1'b0, sdram_addr[(SDRAM_REMAIN_WIDTH+(MAX_XFER_SIZE_WIDTH-1))-1:SDRAM_REMAIN_WIDTH]};
        end
        else if ((state == DREQ) & (~l_busy))
        begin
            r_max_size <= max_size;
        end

        // Count down SDRAM remaining count as data read requests are accepted
        if (cmd_fifo_rd_en)
            cmd_remain_count <= sdram_count;
        else if ((state == DREQ) & (~l_busy))
            cmd_remain_count <= cmd_remain_count - l_b_size; // Not width-linted because which is bigger changes by parameters

    end
end

// Test for enough words available for a maxiumum size SDRAM burst
always @(posedge sdram_clk or negedge sdram_rst_n_r)
begin
    if (sdram_rst_n_r == 1'b0)
    begin
        sdram_data_ready <= 1'b0;
        reads_outstanding <= {FIFO_DADDR_WIDTH-MAX_XFER_SIZE_BITS+1{1'b0}};
    end
    else
    begin
        // Need to add a healthy safegaurd on the FIFO level since data enables can
        //   follow command acceptance by a large amount of latency
        sdram_data_ready <= (({1'b0,data_fifo_wr_level} + {reads_outstanding,{MAX_XFER_SIZE_BITS{1'b0}}}) < SDRAM_DATA_READY_THRESH);

        if (l_r_req == 1'b1 && l_busy == 1'b0)
        begin
            if (l_r_valid_last == 1'b0)
                reads_outstanding <= reads_outstanding + {{FIFO_DADDR_WIDTH-MAX_XFER_SIZE_BITS{1'b0}}, 1'b1};
        end
        else if (l_r_valid_last == 1'b1)
            reads_outstanding <= reads_outstanding - {{FIFO_DADDR_WIDTH-MAX_XFER_SIZE_BITS{1'b0}}, 1'b1};
    end
end

assign sdram_req_count = (cmd_remain_count >= r_max_size) ? r_max_size : cmd_remain_count;

// Write SDRAM data into the FIFO; need to align data according to SDRAM address offset;
//   also need to mask the last word in the case where more SDRAM reads were required
//   than transmitted due to non-zero aligned startign SDRAM address and byte count
always @(posedge sdram_clk or negedge sdram_rst_n_r)
begin
    if (sdram_rst_n_r == 1'b0)
    begin
        l_r_valid_misalign   <= 1'b0;
        first_l_r_valid      <= 1'b0;
        end_l_r_valid        <= 1'b0;

        r_extra_l_r_valid    <= 1'b0;
        data_fifo_wr_en      <= 1'b0;
    end
    else
    begin
        // Save misalign condition
        if (cmd_fifo_rd_en)
            l_r_valid_misalign <= sdram_misalign;

        // Create a signal to identify the first l_r_valid in a DMA
        if (cmd_fifo_rd_en)
            first_l_r_valid <= 1'b1;
        else if (l_r_valid)
            first_l_r_valid <= 1'b0;

        // Mask the first l_r_valid because if sdram_addr[3:0] is non-zero, then each PCIe data word will
        //   be split over 2 SDRAM words and we need both SDRAM words to put the PCIe data into the expected format;
        // Since we mask the first l_r_valid, we need to add an extra l_r_valid to end of the sequence to
        //   make up for the masked l_r_valid; if (l_r_valid_misalign == 1), then an extra l_r_valid
        //   is already coming, so do nothing; otherwise add the extra l_r_valid
        end_l_r_valid <= l_r_valid_last & (reads_outstanding  == {{FIFO_DADDR_WIDTH-MAX_XFER_SIZE_BITS{1'b0}}, 1'b1}) & (state == DONE);

        // Delay r_extra_l_r_valid to same timing as final data_fifo_wr_en
        r_extra_l_r_valid <= end_l_r_valid & ~l_r_valid_misalign;
        data_fifo_wr_en   <= (l_r_valid & ~first_l_r_valid) | (end_l_r_valid & ~l_r_valid_misalign);
    end
end

// post_cmd needs to be valid coincident with the final data_fifo_wr_en for the DMA
assign post_cmd = r_extra_l_r_valid | (end_l_r_valid & l_r_valid_misalign);

// Need two clocks of SDRAM data to generate a PCIe data word
//   when the DMA starting card_addr != 0
always @(posedge sdram_clk or negedge sdram_rst_n_r)
begin
    if (sdram_rst_n_r == 1'b0)
        r_l_dataout <= {SDRAM_DATA_WIDTH{1'b0}};
    else if (l_r_valid)
        r_l_dataout <= l_dataout_in;
end

// If (r_sdram_addr_offset != 0) then SDRAM data was read that was not
//   requested on to be transfered on the PCIe bus (due to misaligned
//   PCIe and SDRAM starting addresses); throw away unrequested SDRAM
//   data and align to expected PCIe alignment
always @(posedge sdram_clk or negedge sdram_rst_n_r)
begin
    if (sdram_rst_n_r == 1'b0)
    begin
        sdram_offset_data <= {SDRAM_DATA_WIDTH{1'b0}};
    end
    else
    begin
        case (r_sdram_addr_offset[SDRAM_REMAIN_WIDTH-1:0])
            4'h0 : sdram_offset_data <=                         r_l_dataout[127:  0];
            4'h1 : sdram_offset_data <= {l_dataout_in[  7:  0], r_l_dataout[127:  8]};
            4'h2 : sdram_offset_data <= {l_dataout_in[ 15:  0], r_l_dataout[127: 16]};
            4'h3 : sdram_offset_data <= {l_dataout_in[ 23:  0], r_l_dataout[127: 24]};
            4'h4 : sdram_offset_data <= {l_dataout_in[ 31:  0], r_l_dataout[127: 32]};
            4'h5 : sdram_offset_data <= {l_dataout_in[ 39:  0], r_l_dataout[127: 40]};
            4'h6 : sdram_offset_data <= {l_dataout_in[ 47:  0], r_l_dataout[127: 48]};
            4'h7 : sdram_offset_data <= {l_dataout_in[ 55:  0], r_l_dataout[127: 56]};
            4'h8 : sdram_offset_data <= {l_dataout_in[ 63:  0], r_l_dataout[127: 64]};
            4'h9 : sdram_offset_data <= {l_dataout_in[ 71:  0], r_l_dataout[127: 72]};
            4'hA : sdram_offset_data <= {l_dataout_in[ 79:  0], r_l_dataout[127: 80]};
            4'hB : sdram_offset_data <= {l_dataout_in[ 87:  0], r_l_dataout[127: 88]};
            4'hC : sdram_offset_data <= {l_dataout_in[ 95:  0], r_l_dataout[127: 96]};
            4'hD : sdram_offset_data <= {l_dataout_in[103:  0], r_l_dataout[127:104]};
            4'hE : sdram_offset_data <= {l_dataout_in[111:  0], r_l_dataout[127:112]};
            4'hF : sdram_offset_data <= {l_dataout_in[119:  0], r_l_dataout[127:120]};
        endcase
    end
end

assign data_fifo_wr_data = sdram_offset_data;

always @(posedge sdram_clk or negedge sdram_rst_n_r)
begin
    if (sdram_rst_n_r == 1'b0)
    begin
        state <= IDLE;
        state_idle <= 1'b1;
    end
    else
    begin
        case (state)

            // Wait for a command to become pending
            IDLE    :   if (sdram_cmd_pending)
                            state <= WAIT;

            // Delay to allow an extra clock to complete compuationally intensive calculations
            WAIT    :   state <= DRDY;

            // Wait until a burst of data is ready
            DRDY    :   if (sdram_data_ready)
                            state <= DREQ;

            // Execute SDRAM commands to transfer data
            DREQ    :   if (~l_busy)
                            state <= (cmd_remain_count == l_b_size) ? DONE : WAIT;

            // Wait until final SDRAM word has been written into the FIFO
            DONE    :   if (post_cmd)
                            state <= IDLE;

            default :   state <= IDLE;

        endcase

        state_idle <= (state == IDLE);
    end
end

// This module only reads
assign l_w_req = 1'b0;
assign l_r_req = (state == DREQ);

// SDRAM Core Address is 64-bit, but our address is always 128-bit, so add 0 pad bit
assign l_addr = (BYTE_ADDRESSING == 1) ? {{(SDRAM_ADDR_WIDTH-DMA_SDRAM_ADDR_WIDTH){1'b0}}, r_sdram_addr} :
                                         {{(SDRAM_ADDR_WIDTH-(DMA_SDRAM_ADDR_WIDTH+1)){1'b0}}, r_sdram_addr, 1'b0};


always @(posedge sdram_clk or negedge sdram_rst_n_r)
begin
    if (sdram_rst_n_r == 1'b0)
    begin
        l_b_size <= {SDRAM_BSIZE_WIDTH{1'b0}};
    end
    else if ((state == DRDY) & sdram_data_ready)
    begin
        l_b_size <= sdram_req_count; // Not width-linted because which is bigger changes by parameters
    end
end

assign l_datain = {SDRAM_DATA_WIDTH{1'b0}}; // Unused
assign l_dm_in  = {(SDRAM_DM_WIDTH){1'b1}}; // Unused; DM writes when low

assign l_req_pri  = 3'h0; // Unused
assign l_auto_pch = 1'b0; // Unused



endmodule
