// -------------------------------------------------------------------------
//
//  PROJECT: PCI Express
//  COMPANY: Northwest Logic, Inc.
//
// ------------------------- CONFIDENTIAL ----------------------------------
//
//                 Copyright 2011 by Northwest Logic, Inc.
//
//  All rights reserved.  No part of this source code may be reproduced or
//  transmitted in any form or by any means, electronic or mechanical,
//  including photocopying, recording, or any information storage and
//  retrieval system, without permission in writing from Northest Logic, Inc.
//
//  Further, no use of this source code is permitted in any form or means
//  without a valid, written license agreement with Northwest Logic, Inc.
//
//  $Date: 2012-04-04 11:43:51 -0700 (Wed, 04 Apr 2012) $
//  $Revision: 27533 $
//
//                         Northwest Logic, Inc.
//                  1100 NW Compton Drive, Suite 100
//                      Beaverton, OR 97006, USA
//
//                       Ph.  +1 503 533 5800
//                       Fax. +1 503 533 5900
//                          www.nwlogic.com
//
// -------------------------------------------------------------------------

`timescale 1ps / 1ps

// -----------------------
// -- Module Definition --
// -----------------------

module s2c_adr_pkt (

    pcie_rst_n,         // PCIe Clock and Reset
    pcie_clk,           //

    sdram_rst_n,        // SDRAM Clock and Reset; design supports different
    sdram_clk,          //   PCIe and SDRAM clock domains

    testmode,

    cmd_req,            // System to Card DMA Engine: User Command Interface
    cmd_ready,          //   Get user's permission to issue a non-posted request of specified size/address
    cmd_addr,           //
    cmd_bcount,         //
    cmd_user_control,   //
    cmd_abort,          //
    cmd_abort_ack,      //
    cmd_stop,           //
    cmd_stop_bcount,    //

    data_req,           // System to Card DMA Engine: User Data Interface
    data_ready,         //   Get user's permission to accept data transfers;
    data_addr,          //   provide useful transaction attributes;
    data_bcount,        //   Transfer data to user
    data_en,            //
    data_error,         //
    data_remain,        //
    data_valid,         //
    data_first_req,     //
    data_last_req,      //
    data_first_desc,    //
    data_last_desc,     //
    data_first_chain,   //
    data_last_chain,    //
    data_data,          //
    data_user_control,  //

    l_r_req,            // System to Card Multi-Port Interface
    l_w_req,            //
    l_req_pri,          //
    l_addr,             //
    l_b_size,           //
    l_auto_pch,         //
    l_busy,             //
    l_r_valid,          //
    l_r_valid_last,     //
    l_d_req,            //
    l_d_req_last,       //
    l_datain,           //
    l_dm_in,            //
    l_dataout,          //
    l_error

);



// ----------------
// -- Parameters --
// ----------------

// NOTE: Only values which are parameters are intended to be modified from their default values

// SDRAM Multi-Port Front End Parameters
localparam  SDRAM_DATA_WIDTH        = 128;  // SDRAM controller local bus data width
localparam  SDRAM_DM_WIDTH          = 16;   // SDRAM controller local bus data mask width == SDRAM_DATA_WIDTH/8
localparam  SDRAM_REMAIN_WIDTH      = 4;    // Number of bits required to address all of the bytes in a SDRAM word; 2^SDRAM_REMAIN_WIDTH must equal SDRAM_DM_WIDTH
parameter   SDRAM_ADDR_WIDTH        = 33;   // SDRAM controller Local address width
parameter   SDRAM_BSIZE_WIDTH       = 8;    // Width of l_b_size parameter

parameter   DMA_DEST_ADDR_WIDTH     = 28;   // Number of byte address bits implemented by the SDRAM device; SDRAM depth == 2^DMA_DEST_ADDR_WIDTH; 2^28 = 256 MByte
localparam  DMA_DEST_BCOUNT_WIDTH   = 10;   // Size of data_bcount port

parameter   BYTE_ADDRESSING         = 0;    // Use Byte Addressing for SDRAM Interface if 1

// Convert byte address, count to SDRAM word size address, count
localparam  DMA_SDRAM_ADDR_WIDTH    = (BYTE_ADDRESSING == 1) ? DMA_DEST_ADDR_WIDTH   : (DMA_DEST_ADDR_WIDTH   - SDRAM_REMAIN_WIDTH);
localparam  DMA_SDRAM_COUNT_WIDTH   = (BYTE_ADDRESSING == 1) ? DMA_DEST_BCOUNT_WIDTH : (DMA_DEST_BCOUNT_WIDTH - SDRAM_REMAIN_WIDTH);

// Command FIFO
parameter   FIFO_CADDR_WIDTH        = 4;    // Address width of Command FIFO; minimum 4
localparam  FIFO_CDATA_WIDTH        = DMA_SDRAM_COUNT_WIDTH + DMA_SDRAM_ADDR_WIDTH;

// Data FIFO
parameter   FIFO_DADDR_WIDTH        = 9;    // Address width of Data FIFO; set to larger of 7 or block RAM address width at max data width

localparam  FIFO_DDATA_WIDTH        = (BYTE_ADDRESSING == 1) ? SDRAM_DATA_WIDTH: (SDRAM_DATA_WIDTH + SDRAM_DM_WIDTH);

// PCIe Complete Core Port Parameters
localparam  CORE_DATA_WIDTH         = 128;  // Width of input and output data
localparam  CORE_BE_WIDTH           = 16;   // Width of input and output K
localparam  CORE_REMAIN_WIDTH       = 4;    // 2^CORE_REMAIN_WIDTH represents the number of bytes in CORE_DATA_WIDTH

// Set thresholds for which ready will be asserted; subtract from max level to accomodate latencies and required signaling protocol
localparam  CMD_FIFO_READY_THRESH   = (1 << FIFO_CADDR_WIDTH) - 5;
localparam  DATA_FIFO_OFFSET        = 1 << (DMA_DEST_BCOUNT_WIDTH - SDRAM_REMAIN_WIDTH);
localparam  DATA_FIFO_READY_THRESH  = (1 << FIFO_DADDR_WIDTH) - DATA_FIFO_OFFSET; // Not ready when less than 2 * largest count that must be supported



// ----------------------
// -- Port Definitions --
// ----------------------

input                               pcie_rst_n;
input                               pcie_clk;

input                               sdram_rst_n;
input                               sdram_clk;

input                               testmode;

input                               cmd_req;
output                              cmd_ready;
input   [63:0]                      cmd_addr;
input   [9:0]                       cmd_bcount;
input   [63:0]                      cmd_user_control;
input                               cmd_abort;
output                              cmd_abort_ack;
output                              cmd_stop;
output  [9:0]                       cmd_stop_bcount;

input                               data_req;
output                              data_ready;
input   [63:0]                      data_addr;
input   [9:0]                       data_bcount;
input                               data_en;
input                               data_error;
input   [CORE_REMAIN_WIDTH-1:0]     data_remain;
input   [CORE_REMAIN_WIDTH:0]       data_valid;
input                               data_first_req;
input                               data_last_req;
input                               data_first_desc;
input                               data_last_desc;
input                               data_first_chain;
input                               data_last_chain;
input   [CORE_DATA_WIDTH-1:0]       data_data;
input   [63:0]                      data_user_control;

output                              l_r_req;
output                              l_w_req;
output  [2:0]                       l_req_pri;
output  [SDRAM_ADDR_WIDTH-1:0]      l_addr;
output  [SDRAM_BSIZE_WIDTH-1:0]     l_b_size;
output                              l_auto_pch;
input                               l_busy;
input                               l_r_valid;
input                               l_r_valid_last;
input                               l_d_req;
input                               l_d_req_last;
output  [SDRAM_DATA_WIDTH-1:0]      l_datain;
output  [SDRAM_DM_WIDTH-1:0]        l_dm_in;
input   [SDRAM_DATA_WIDTH-1:0]      l_dataout;
output                              l_error;



// ----------------
// -- Port Types --
// ----------------

wire                                pcie_rst_n;
wire                                sdram_rst_n;
wire                                pcie_clk;
wire                                sdram_clk;

wire                                cmd_req;
wire                                cmd_ready;
wire    [63:0]                      cmd_addr;
wire    [9:0]                       cmd_bcount;
wire    [63:0]                      cmd_user_control;
wire                                cmd_abort;
wire                                cmd_abort_ack;
wire                                cmd_stop;
wire    [9:0]                       cmd_stop_bcount;

wire                                data_req;
reg                                 data_ready;
wire    [63:0]                      data_addr;
wire    [9:0]                       data_bcount;
wire                                data_en;
wire                                data_error;
wire    [CORE_REMAIN_WIDTH-1:0]     data_remain;
wire    [CORE_REMAIN_WIDTH:0]       data_valid;
wire                                data_first_req;
wire                                data_last_req;
wire                                data_first_desc;
wire                                data_last_desc;
wire                                data_first_chain;
wire                                data_last_chain;
wire    [CORE_DATA_WIDTH-1:0]       data_data;
wire    [63:0]                      data_user_control;

wire                                l_r_req;
reg                                 l_w_req;
wire    [2:0]                       l_req_pri;
wire    [SDRAM_ADDR_WIDTH-1:0]      l_addr;
wire    [SDRAM_BSIZE_WIDTH-1:0]     l_b_size;
wire                                l_auto_pch;
wire                                l_busy;
wire                                l_r_valid;
wire                                l_r_valid_last;
wire                                l_d_req;
wire                                l_d_req_last;
wire    [SDRAM_DATA_WIDTH-1:0]      l_datain;
wire    [SDRAM_DM_WIDTH-1:0]        l_dm_in;
wire    [SDRAM_DATA_WIDTH-1:0]      l_dataout;
wire                                l_error;



// -------------------
// -- Local Signals --
// -------------------

// Reset Generation and Propogation
reg                                 pcie_rst_n_d1;
reg                                 pcie_rst_n_d2;
wire                                sdram_rst_n_int;

// Command FIFO
wire    [CORE_REMAIN_WIDTH:0]       input_bytes_last_en;
wire    [CORE_REMAIN_WIDTH:0]       fifo_bytes_last_en;
wire                                c_extra_enable_needed;

reg                                 first;

reg     [63:0]                      hold_addr_plus_hold_data_bcount;
reg     [9:0]                       hold_data_bcount;
reg     [63:0]                      hold_data_addr;

reg                                 extra_en_needed;
reg     [DMA_SDRAM_ADDR_WIDTH-1:0]  save_addr;
reg     [DMA_SDRAM_COUNT_WIDTH-1:0] save_count;
reg                                 save_error;
reg                                 d2_cmd_fifo_wr_en;
reg                                 d_cmd_fifo_wr_en;
reg                                 cmd_fifo_wr_en;
reg     [SDRAM_REMAIN_WIDTH-1:0]    sdram_data_offset;

wire    [DMA_SDRAM_COUNT_WIDTH-1:0] c_save_count;
reg     [FIFO_CDATA_WIDTH-1:0]      d_cmd_fifo_wr_data;
reg     [FIFO_CDATA_WIDTH-1:0]      cmd_fifo_wr_data;
wire                                cmd_fifo_ready;

wire    [FIFO_CADDR_WIDTH:0]        cmd_fifo_wr_level;

wire    [FIFO_CDATA_WIDTH-1:0]      cmd_fifo_rd_data;
wire                                cmd_fifo_rd_empty;

wire                                sdram_cmd_pending;
reg     [DMA_SDRAM_COUNT_WIDTH-1:0] sdram_count;
reg     [DMA_SDRAM_ADDR_WIDTH-1:0]  sdram_addr;
reg                                 sdram_error;

wire    [3:0]                       data_128_wr_offset;
reg                                 data_128_wr_en_extra;

reg     [CORE_BE_WIDTH-1:0]         data_be;

wire                                data_128_wr_en;
wire    [15:0]                      data_128_wr_be;
wire    [127:0]                     data_128_wr_data;
// Rotate Data by Starting Card Address
reg                                 df_wr_en;
reg                                 df_error;

reg                                 df_data_128_wr_en;
reg     [3:0]                       df_data_128_wr_offset;
reg     [127:0]                     df_data_128_wr_data;
reg     [15:0]                      df_data_128_wr_be;
reg                                 df_data_128_wr_en_extra;

reg     [127:0]                     r_df_data_128_wr_data;
reg     [15:0]                      r_df_data_128_wr_be;

reg     [127:0]                     df_wr_data;
reg     [15:0]                      df_wr_be;

reg                                 r_gnt;

// Data FIFO
wire                                data_fifo_ready;

reg                                 data_fifo_wr_en;
reg     [FIFO_DDATA_WIDTH-1:0]      data_fifo_wr_data;
wire    [FIFO_DADDR_WIDTH:0]        data_fifo_wr_level;

wire    [FIFO_DDATA_WIDTH-1:0]      data_fifo_rd_data;

reg     [SDRAM_DATA_WIDTH-1:0]      data_rd_data;
wire    [SDRAM_DM_WIDTH-1:0]        c_data_rd_be;
reg     [SDRAM_DM_WIDTH-1:0]        data_rd_be;

// SDRAM Control State Machine
wire                                cmd_fifo_rd_en;
wire                                data_fifo_rd_en;

wire                                cmd_fifo_wr_full_unused;
wire    [FIFO_CADDR_WIDTH:0]        cmd_fifo_rd_level_unused;
wire    [FIFO_CADDR_WIDTH:0]        cmd_fifo_rd_xfer_unused;
wire                                data_fifo_wr_full_unused;
wire    [FIFO_DADDR_WIDTH:0]        data_fifo_rd_level_unused;
wire    [FIFO_DADDR_WIDTH:0]        data_fifo_rd_adv_level_unused;
wire                                data_fifo_rd_empty;


`ifdef SIMULATION
// HW Abort Testing
reg     [63:0]                      abort_address;  // Address at which HW abort will trigger
reg                                 abort_enable;   // Enable the HW abort trigger
reg                                 data_throttle;  // Disable data transfers

initial begin
    abort_address = 64'b0;
    abort_enable  = 1'b0;
    data_throttle = 1'b0;
end
`endif


// ---------------
// -- Equations --
// ---------------

// -----------------
// Reset Logic
//
// Propogate PCIe reset to SDRAM clock domain
// So that DMA Reset from DMA Back End will allow proper reset of
// FIFO logic

always @(posedge sdram_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        pcie_rst_n_d1 <= 1'b0;
        pcie_rst_n_d2 <= 1'b0;
    end
    else
    begin
        pcie_rst_n_d1 <= 1'b1;
        pcie_rst_n_d2 <= pcie_rst_n_d1;
    end
end

assign sdram_rst_n_int = testmode ? sdram_rst_n : sdram_rst_n & pcie_rst_n_d2;

// -----------------
// Command Interface

`ifdef SIMULATION
assign cmd_ready = (abort_enable == 1'b0)                   ? 1'b1 :    // abort not enabled
                   (cmd_addr + cmd_bcount <= abort_address) ? 1'b1 :    // access is below abort_address
                   (cmd_addr > abort_address)               ? 1'b1 :    // access is above abort_address
                                                              1'b0;     // access hits abort address
assign cmd_stop        = ~cmd_ready;
assign cmd_stop_bcount = (abort_address - cmd_addr);                    // stop command at abort_address
`else
// The command port can be used to add flow control to the issuing of
//   DMA read requests and to get advance information about transactions
//   that will be coming on the data interface; note that the data
//   interface will in many cases have multiple write transactions
//   per command; the command port can also be used to stop a
//   DMA chain early by issuing cmd_stop instead of cmd_ready to
//   a cmd_req.  When cmd_stop is asserted, cmd_stop_bcount should
//   indicate the number of additional bytes that the user is
//   willing to transfer; if the user signals a desire to stop
//   but to take additional data, then additional requests will be made
//   until the final transfer byte count is completed; all cmd_req which were
//   acknowledged with cmd_ready will complete normally.
// The address and byte count information on the command interface
//   is made available on the data interface on a per data transaction
//   basis, so most applications will not need to use the command interface.

// Always ready to receive commands; flow control will be done with data
assign cmd_ready = 1'b1;

// This module does not stop DMAs early
assign cmd_stop        = 1'b0;
assign cmd_stop_bcount = 10'h0;
`endif


// -------------
// Handle Aborts

// Acknowledge abort once both the command and data FIFO are empty
//   indicating the outstanding operations are completed
assign cmd_abort_ack = cmd_abort & cmd_fifo_rd_empty & data_fifo_rd_empty;



// ------------
// Command FIFO

// The command FIFO carries the SDRAM starting address and length from the PCIe clock domain to the SDRAM clock domain

// Compute whether the incoming address offset plus byte count will cause the transfer to wrap to
//   an additional SDRAM data word; an extra SDRAM data enable is required in this case
//   this has to be qualified with the last enable when the offset (if any) is at max value
assign input_bytes_last_en[CORE_REMAIN_WIDTH]     = (data_bcount[CORE_REMAIN_WIDTH-1:0] == {CORE_REMAIN_WIDTH{1'b0}});
assign input_bytes_last_en[CORE_REMAIN_WIDTH-1:0] =  data_bcount[CORE_REMAIN_WIDTH-1:0];
assign fifo_bytes_last_en    = first ? {1'b0, data_addr[CORE_REMAIN_WIDTH-1:0]} + input_bytes_last_en : {1'b0, hold_addr_plus_hold_data_bcount[CORE_REMAIN_WIDTH-1:0]} + input_bytes_last_en;
assign c_extra_enable_needed = (fifo_bytes_last_en > {1'b1, {CORE_REMAIN_WIDTH{1'b0}}});

always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        first                           <= 1'b1;

        hold_addr_plus_hold_data_bcount <= 64'h0;
        hold_data_bcount                <= 10'h0;
        hold_data_addr                  <= 64'h0;

        extra_en_needed                 <= 1'b0;

        save_addr                       <= {DMA_SDRAM_ADDR_WIDTH{1'b0}};
        save_count                      <= {DMA_SDRAM_COUNT_WIDTH{1'b0}};
        save_error                      <= 1'b0;

        d2_cmd_fifo_wr_en               <= 1'b0;
        d_cmd_fifo_wr_en                <= 1'b0;
        cmd_fifo_wr_en                  <= 1'b0;

        d_cmd_fifo_wr_data              <= {FIFO_CDATA_WIDTH{1'b0}};
        cmd_fifo_wr_data                <= {FIFO_CDATA_WIDTH{1'b0}};

        sdram_data_offset               <= {SDRAM_REMAIN_WIDTH{1'b0}};
    end
    else
    begin
        if (data_en)
            first <= data_last_chain;

        hold_addr_plus_hold_data_bcount <= hold_data_addr + {54'h0, hold_data_bcount};

        if (data_req & data_ready)
        begin
            hold_data_bcount <= data_bcount;

            if (first)
                hold_data_addr <= data_addr;
            else
                hold_data_addr <= hold_data_addr + {54'h0, hold_data_bcount};
        end

        if (data_req & data_ready)
        begin
            // Save the Card Address for the current transaction
            if (first)
                save_addr <= (BYTE_ADDRESSING == 1) ? data_addr[DMA_DEST_ADDR_WIDTH-1:0]: data_addr[DMA_DEST_ADDR_WIDTH-1:SDRAM_REMAIN_WIDTH];
            else
                save_addr <= (BYTE_ADDRESSING == 1) ? hold_addr_plus_hold_data_bcount[DMA_DEST_ADDR_WIDTH-1:0]: hold_addr_plus_hold_data_bcount[DMA_DEST_ADDR_WIDTH-1:SDRAM_REMAIN_WIDTH];

            // Identify if an extra enable is needed at the end of this transaction
            // Record data address offset for the remainder of the transaction
            extra_en_needed <= c_extra_enable_needed;

            // Keep track of error flag to mark bad data
            save_error <= data_error;

            sdram_data_offset <= first ? data_addr[SDRAM_REMAIN_WIDTH-1:0] : hold_addr_plus_hold_data_bcount[SDRAM_REMAIN_WIDTH-1:0];
        end

        // Save the number of words put into the FIFO for the current transaction
        if (BYTE_ADDRESSING == 1)
        begin
            if (data_req & data_ready)
                save_count <= data_bcount[DMA_SDRAM_COUNT_WIDTH-1:0];
        end
        else
        begin
            if (data_req & data_ready)
                save_count <= {DMA_SDRAM_COUNT_WIDTH{1'b0}};
            else if (data_128_wr_en)
                save_count <= save_count + {{(DMA_SDRAM_COUNT_WIDTH-1){1'b0}}, 1'b1};
        end

        d2_cmd_fifo_wr_en <= data_last_req & data_en;
        d_cmd_fifo_wr_en  <= d2_cmd_fifo_wr_en;
        cmd_fifo_wr_en    <= d_cmd_fifo_wr_en;

        d_cmd_fifo_wr_data <= (BYTE_ADDRESSING == 1) ? {save_count, save_addr} : {c_save_count, save_addr}; // Store only relevant portion of address & count
        cmd_fifo_wr_data   <= d_cmd_fifo_wr_data;

    end

end

// Account for extra enable if its required; this enable potentially overlaps with
//   the next transaction's req & ready clear, so it needs to be combinatorially added to save_count
assign c_save_count = data_128_wr_en_extra ? (save_count + {{(DMA_SDRAM_COUNT_WIDTH-1){1'b0}}, 1'b1}) : save_count;

assign cmd_fifo_ready = (cmd_fifo_wr_level < CMD_FIFO_READY_THRESH);

// Instantiate Command FIFO
ref_dc_fifo_shallow_ram #(

    .ADDR_WIDTH         (FIFO_CADDR_WIDTH           ),
    .DATA_WIDTH         (FIFO_CDATA_WIDTH           ),
    .EN_SPECULATIVE_RD  (1'b0                       ),
    .EN_LOOK_AHEAD      (1'b1                       ),
    .DLY_WR_FOR_RD_LVL  (1'b0                       )

) cmd_fifo (

    .wr_rst_n           (pcie_rst_n                 ),
    .wr_clk             (pcie_clk                   ),
    .wr_clr             (1'b0                       ),
    .wr_en              (cmd_fifo_wr_en             ),
    .wr_data            (cmd_fifo_wr_data           ),
    .wr_level           (cmd_fifo_wr_level          ),
    .wr_full            (cmd_fifo_wr_full_unused    ),

    .rd_rst_n           (sdram_rst_n_int            ),
    .rd_clk             (sdram_clk                  ),
    .rd_clr             (1'b0                       ),
    .rd_flush           (1'b0                       ),
    .rd_ack             (cmd_fifo_rd_en             ),
    .rd_xfer            (1'b0                       ),
    .rd_sync            (1'b0                       ),
    .rd_data            (cmd_fifo_rd_data           ),
    .rd_level           (cmd_fifo_rd_level_unused   ),
    .rd_xfer_level      (cmd_fifo_rd_xfer_unused    ),
    .rd_empty           (cmd_fifo_rd_empty          )
);

assign sdram_cmd_pending = ~cmd_fifo_rd_empty;

always @(posedge sdram_clk or negedge sdram_rst_n_int)
begin
    if (sdram_rst_n_int == 1'b0)
    begin
        sdram_error <= 1'b0;
        sdram_count <= {DMA_SDRAM_COUNT_WIDTH{1'b0}};
        sdram_addr  <= {DMA_SDRAM_ADDR_WIDTH{1'b0}};
    end
    else
    begin
        sdram_error <= 1'b0;
        sdram_count <= cmd_fifo_rd_data[(DMA_SDRAM_COUNT_WIDTH+DMA_SDRAM_ADDR_WIDTH)-1:DMA_SDRAM_ADDR_WIDTH];
        sdram_addr  <= cmd_fifo_rd_data[DMA_SDRAM_ADDR_WIDTH-1:0];
    end
end

assign data_128_wr_offset = sdram_data_offset;



// ----------------------------------------------------
// Convert PCIe Back-End Data Width to SDRAM Data Width

// Convert data_remain to byte enables
always @*
begin
    case (data_remain[3:0])
        4'h0 : data_be = 16'b1111111111111111;
        4'h1 : data_be = 16'b0111111111111111;
        4'h2 : data_be = 16'b0011111111111111;
        4'h3 : data_be = 16'b0001111111111111;
        4'h4 : data_be = 16'b0000111111111111;
        4'h5 : data_be = 16'b0000011111111111;
        4'h6 : data_be = 16'b0000001111111111;
        4'h7 : data_be = 16'b0000000111111111;
        4'h8 : data_be = 16'b0000000011111111;
        4'h9 : data_be = 16'b0000000001111111;
        4'ha : data_be = 16'b0000000000111111;
        4'hb : data_be = 16'b0000000000011111;
        4'hc : data_be = 16'b0000000000001111;
        4'hd : data_be = 16'b0000000000000111;
        4'he : data_be = 16'b0000000000000011;
        4'hf : data_be = 16'b0000000000000001;
    endcase
end

// PCIe Back-End and SDRAM Data width is matched
assign data_128_wr_en   = data_en;
assign data_128_wr_be   = data_be;
assign data_128_wr_data = data_data;

always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
        data_128_wr_en_extra <= 1'b0;
    else
        data_128_wr_en_extra <= data_en & data_last_req & extra_en_needed;
end
// ------------------------------------
// Rotate Data by Starting Card Address

// Pipeline for route speed
always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        df_wr_en                <= 1'b0;
        df_data_128_wr_en       <= 1'b0;
        df_data_128_wr_offset   <= 4'h0;
        df_data_128_wr_data     <= 128'h0;
        df_data_128_wr_be       <= 16'h0;
        df_data_128_wr_en_extra <= 1'b0;
    end
    else
    begin
        df_wr_en                <= data_128_wr_en | data_128_wr_en_extra;
        df_data_128_wr_en       <= data_128_wr_en;
        df_data_128_wr_offset   <= data_128_wr_offset;
        df_data_128_wr_data     <= data_128_wr_data;
        df_data_128_wr_be       <= data_128_wr_be;
        df_data_128_wr_en_extra <= data_128_wr_en_extra;
    end
end

always @(*)
begin
    case (df_data_128_wr_offset[3:0])
        4'h0 : df_wr_data =  df_data_128_wr_data[127:0];
        4'h1 : df_wr_data = {df_data_128_wr_data[119:0], r_df_data_128_wr_data[127:120]};
        4'h2 : df_wr_data = {df_data_128_wr_data[111:0], r_df_data_128_wr_data[127:112]};
        4'h3 : df_wr_data = {df_data_128_wr_data[103:0], r_df_data_128_wr_data[127:104]};
        4'h4 : df_wr_data = {df_data_128_wr_data[ 95:0], r_df_data_128_wr_data[127: 96]};
        4'h5 : df_wr_data = {df_data_128_wr_data[ 87:0], r_df_data_128_wr_data[127: 88]};
        4'h6 : df_wr_data = {df_data_128_wr_data[ 79:0], r_df_data_128_wr_data[127: 80]};
        4'h7 : df_wr_data = {df_data_128_wr_data[ 71:0], r_df_data_128_wr_data[127: 72]};
        4'h8 : df_wr_data = {df_data_128_wr_data[ 63:0], r_df_data_128_wr_data[127: 64]};
        4'h9 : df_wr_data = {df_data_128_wr_data[ 55:0], r_df_data_128_wr_data[127: 56]};
        4'hA : df_wr_data = {df_data_128_wr_data[ 47:0], r_df_data_128_wr_data[127: 48]};
        4'hB : df_wr_data = {df_data_128_wr_data[ 39:0], r_df_data_128_wr_data[127: 40]};
        4'hC : df_wr_data = {df_data_128_wr_data[ 31:0], r_df_data_128_wr_data[127: 32]};
        4'hD : df_wr_data = {df_data_128_wr_data[ 23:0], r_df_data_128_wr_data[127: 24]};
        4'hE : df_wr_data = {df_data_128_wr_data[ 15:0], r_df_data_128_wr_data[127: 16]};
        4'hF : df_wr_data = {df_data_128_wr_data[  7:0], r_df_data_128_wr_data[127:  8]};
    endcase
end

always @(*)
begin
    case (df_data_128_wr_offset[3:0])
        4'h0 : df_wr_be =                                                                  df_data_128_wr_be[15:0];
        4'h1 : df_wr_be = df_data_128_wr_en_extra ? {15'h0, r_df_data_128_wr_be[15   ]} : {df_data_128_wr_be[14:0], r_df_data_128_wr_be[15   ]};
        4'h2 : df_wr_be = df_data_128_wr_en_extra ? {14'h0, r_df_data_128_wr_be[15:14]} : {df_data_128_wr_be[13:0], r_df_data_128_wr_be[15:14]};
        4'h3 : df_wr_be = df_data_128_wr_en_extra ? {13'h0, r_df_data_128_wr_be[15:13]} : {df_data_128_wr_be[12:0], r_df_data_128_wr_be[15:13]};
        4'h4 : df_wr_be = df_data_128_wr_en_extra ? {12'h0, r_df_data_128_wr_be[15:12]} : {df_data_128_wr_be[11:0], r_df_data_128_wr_be[15:12]};
        4'h5 : df_wr_be = df_data_128_wr_en_extra ? {11'h0, r_df_data_128_wr_be[15:11]} : {df_data_128_wr_be[10:0], r_df_data_128_wr_be[15:11]};
        4'h6 : df_wr_be = df_data_128_wr_en_extra ? {10'h0, r_df_data_128_wr_be[15:10]} : {df_data_128_wr_be[ 9:0], r_df_data_128_wr_be[15:10]};
        4'h7 : df_wr_be = df_data_128_wr_en_extra ? { 9'h0, r_df_data_128_wr_be[15: 9]} : {df_data_128_wr_be[ 8:0], r_df_data_128_wr_be[15: 9]};
        4'h8 : df_wr_be = df_data_128_wr_en_extra ? { 8'h0, r_df_data_128_wr_be[15: 8]} : {df_data_128_wr_be[ 7:0], r_df_data_128_wr_be[15: 8]};
        4'h9 : df_wr_be = df_data_128_wr_en_extra ? { 7'h0, r_df_data_128_wr_be[15: 7]} : {df_data_128_wr_be[ 6:0], r_df_data_128_wr_be[15: 7]};
        4'hA : df_wr_be = df_data_128_wr_en_extra ? { 6'h0, r_df_data_128_wr_be[15: 6]} : {df_data_128_wr_be[ 5:0], r_df_data_128_wr_be[15: 6]};
        4'hB : df_wr_be = df_data_128_wr_en_extra ? { 5'h0, r_df_data_128_wr_be[15: 5]} : {df_data_128_wr_be[ 4:0], r_df_data_128_wr_be[15: 5]};
        4'hC : df_wr_be = df_data_128_wr_en_extra ? { 4'h0, r_df_data_128_wr_be[15: 4]} : {df_data_128_wr_be[ 3:0], r_df_data_128_wr_be[15: 4]};
        4'hD : df_wr_be = df_data_128_wr_en_extra ? { 3'h0, r_df_data_128_wr_be[15: 3]} : {df_data_128_wr_be[ 2:0], r_df_data_128_wr_be[15: 3]};
        4'hE : df_wr_be = df_data_128_wr_en_extra ? { 2'h0, r_df_data_128_wr_be[15: 2]} : {df_data_128_wr_be[ 1:0], r_df_data_128_wr_be[15: 2]};
        4'hF : df_wr_be = df_data_128_wr_en_extra ? { 1'h0, r_df_data_128_wr_be[15: 1]} : {df_data_128_wr_be[   0], r_df_data_128_wr_be[15: 1]};
    endcase
end

always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        r_df_data_128_wr_data <= 128'h0;
        r_gnt                 <= 1'b0;
        r_df_data_128_wr_be   <= 16'b0;
    end
    else
    begin
        if (df_data_128_wr_en)
            r_df_data_128_wr_data <= df_data_128_wr_data;

        r_gnt <= data_req & data_ready;

        if (r_gnt)
            // Initialize delayed byte enables for first write
            r_df_data_128_wr_be <= 16'b0;
        else if (df_data_128_wr_en)
            r_df_data_128_wr_be <= df_data_128_wr_be;
    end
end


// ---------
// Data FIFO

assign data_fifo_ready = (data_fifo_wr_level <= DATA_FIFO_READY_THRESH);

// Register ready term for better FMax
always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
        data_ready <= 1'b0;
    else
`ifdef SIMULATION
        data_ready <= cmd_fifo_ready & data_fifo_ready & !data_throttle;
`else
        data_ready <= cmd_fifo_ready & data_fifo_ready;
`endif
end

// Instantiate Data FIFO
ref_dc_fifo_adv_block_ram #(

    .ADDR_WIDTH         (FIFO_DADDR_WIDTH               ),
    .DATA_WIDTH         (FIFO_DDATA_WIDTH               ),
    .EN_LOOK_AHEAD      (1'b1                           ),
    .DLY_WR_FOR_RD_LVL  (1'b1                           )

) data_fifo (

    .wr_rst_n           (pcie_rst_n                     ),
    .wr_clk             (pcie_clk                       ),
    .wr_en              (data_fifo_wr_en                ),
    .wr_data            (data_fifo_wr_data              ),
    .wr_level           (data_fifo_wr_level             ),
    .wr_full            (data_fifo_wr_full_unused       ),

    .rd_rst_n           (sdram_rst_n_int                ),
    .rd_clk             (sdram_clk                      ),
    .rd_en              (data_fifo_rd_en                ),
    .rd_data            (data_fifo_rd_data              ),
    .rd_level           (data_fifo_rd_level_unused      ),
    .rd_empty           (data_fifo_rd_empty             ),
    .rd_adv_en          (1'b0                           ),
    .rd_adv_inc         ({FIFO_DADDR_WIDTH-1{1'b0}}     ),
    .rd_adv_level       (data_fifo_rd_adv_level_unused  )

);

always @(posedge pcie_clk or negedge pcie_rst_n)
begin
    if (pcie_rst_n == 1'b0)
    begin
        data_fifo_wr_en   <= 1'b0;
        data_fifo_wr_data <= {FIFO_DDATA_WIDTH{1'b0}};
        df_error          <= 1'b0;
    end
    else
    begin
        df_error          <= save_error;
        data_fifo_wr_en   <= df_wr_en;
        if (BYTE_ADDRESSING == 1)
            data_fifo_wr_data <= df_wr_data;
        else
            data_fifo_wr_data <= {df_wr_be, df_wr_data};
    end
end

generate
    if (BYTE_ADDRESSING == 1)
    begin: data_rd_ba
        assign c_data_rd_be = {SDRAM_DM_WIDTH{1'b1}};
    end
    else
    begin: data_rd_std
        assign c_data_rd_be = data_fifo_rd_data[((SDRAM_DATA_WIDTH+SDRAM_DM_WIDTH)-1):SDRAM_DATA_WIDTH];
    end
endgenerate

always @(posedge sdram_clk or negedge sdram_rst_n_int)
begin
    if (sdram_rst_n_int == 1'b0)
    begin
        data_rd_data  <= {SDRAM_DATA_WIDTH{1'b0}};
        data_rd_be    <= {SDRAM_DM_WIDTH{1'b1}};
    end
    else
    begin
        data_rd_data  <= data_fifo_rd_data[SDRAM_DATA_WIDTH-1:0];
        data_rd_be    <= c_data_rd_be;
    end
end



// This module only writes

always @(posedge sdram_clk or negedge sdram_rst_n_int)
begin
    if (sdram_rst_n_int == 1'b0)
        l_w_req <= 1'b0;
    else
    begin
        if (l_w_req & ~l_busy)
            l_w_req <= 1'b0;  // Don't request back-to-back since registering Command FIFO RAM output for route speed;
                              //   does not affect performance because normally data will be in bursts
        else if (sdram_cmd_pending)
            l_w_req <= 1'b1;
    end
end

assign l_r_req = 1'b0;

assign cmd_fifo_rd_en = l_w_req & ~l_busy;

// SDRAM Core Address is 64-bit, but our address is always 128-bit, so add 0 pad bit
assign l_addr = (BYTE_ADDRESSING == 1) ? {{(SDRAM_ADDR_WIDTH-DMA_SDRAM_ADDR_WIDTH){1'b0}}, sdram_addr} :
                                         {{(SDRAM_ADDR_WIDTH-(DMA_SDRAM_ADDR_WIDTH+1)){1'b0}}, sdram_addr, 1'b0};

assign l_error = sdram_error;

// SDRAM count is in SDRAM local interface data width, so hook directly
assign l_b_size = sdram_count;

// When d_req is asserted, data must be provided to the controller one clock later;
//   Data FIFO is latency 1, so hook up directly
assign data_fifo_rd_en = l_d_req;
assign l_datain        = data_rd_data;
assign l_dm_in         = ~data_rd_be; // DM writes when low
assign l_req_pri       = 3'h0; // Unused
assign l_auto_pch      = 1'b0; // Unused



endmodule
