// -------------------------------------------------------------------------
//
//  PROJECT: PCI Express Core
//  COMPANY: Northwest Logic, Inc.
//
// ------------------------- CONFIDENTIAL ----------------------------------
//
//                 Copyright 2007 by Northwest Logic, Inc.
//
//  All rights reserved.  No part of this source code may be reproduced or 
//  transmitted in any form or by any means, electronic or mechanical, 
//  including photocopying, recording, or any information storage and
//  retrieval system, without permission in writing from Northest Logic, Inc.
//
//  Further, no use of this source code is permitted in any form or means
//  without a valid, written license agreement with Northwest Logic, Inc.
//
//                         Northwest Logic, Inc.
//                  1100 NW Compton Drive, Suite 100
//                      Beaverton, OR 97006, USA
//  
//                       Ph.  +1 503 533 5800
//                       Fax. +1 503 533 5900
//                          www.nwlogic.com
//
// -------------------------------------------------------------------------

`timescale 1ps / 1ps



// -----------------------
// -- Module Definition --
// -----------------------

module sram_mp (

    rst_n,
    clk,

    l_r_req_p0,       
    l_w_req_p0,       
    l_req_pri_p0,     
    l_addr_p0,        
    l_b_size_p0,      
    l_auto_pch_p0,    
    l_busy_p0,        
    l_r_valid_p0,                       
    l_r_valid_last_p0,
    l_d_req_p0,       
    l_d_req_last_p0,  
    l_datain_p0,      
    l_dm_in_p0,       

    l_r_req_p1,       
    l_w_req_p1,       
    l_req_pri_p1,     
    l_addr_p1,        
    l_b_size_p1,      
    l_auto_pch_p1,    
    l_busy_p1,        
    l_r_valid_p1,                       
    l_r_valid_last_p1,
    l_d_req_p1,       
    l_d_req_last_p1,  
    l_datain_p1,      
    l_dm_in_p1,       

    l_r_req_p2,       
    l_w_req_p2,       
    l_req_pri_p2,     
    l_addr_p2,        
    l_b_size_p2,      
    l_auto_pch_p2,    
    l_busy_p2,        
    l_r_valid_p2,                       
    l_r_valid_last_p2,
    l_d_req_p2,       
    l_d_req_last_p2,  
    l_datain_p2,      
    l_dm_in_p2,       

    l_r_req_p3,       
    l_w_req_p3,       
    l_req_pri_p3,     
    l_addr_p3,        
    l_b_size_p3,      
    l_auto_pch_p3,    
    l_busy_p3,        
    l_r_valid_p3,                       
    l_r_valid_last_p3,
    l_d_req_p3,       
    l_d_req_last_p3,  
    l_datain_p3,      
    l_dm_in_p3,       

    l_r_req_p4,       
    l_w_req_p4,       
    l_req_pri_p4,     
    l_addr_p4,        
    l_b_size_p4,      
    l_auto_pch_p4,    
    l_busy_p4,        
    l_r_valid_p4,                       
    l_r_valid_last_p4,
    l_d_req_p4,       
    l_d_req_last_p4,  
    l_datain_p4,      
    l_dm_in_p4,       

    l_r_req_p5,       
    l_w_req_p5,       
    l_req_pri_p5,     
    l_addr_p5,        
    l_b_size_p5,      
    l_auto_pch_p5,    
    l_busy_p5,        
    l_r_valid_p5,                       
    l_r_valid_last_p5,
    l_d_req_p5,       
    l_d_req_last_p5,  
    l_datain_p5,      
    l_dm_in_p5,       

    l_r_req_p6,       
    l_w_req_p6,       
    l_req_pri_p6,     
    l_addr_p6,        
    l_b_size_p6,      
    l_auto_pch_p6,    
    l_busy_p6,        
    l_r_valid_p6,                       
    l_r_valid_last_p6,
    l_d_req_p6,       
    l_d_req_last_p6,  
    l_datain_p6,      
    l_dm_in_p6,       

    l_r_req_p7,       
    l_w_req_p7,       
    l_req_pri_p7,     
    l_addr_p7,        
    l_b_size_p7,      
    l_auto_pch_p7,    
    l_busy_p7,        
    l_r_valid_p7,                       
    l_r_valid_last_p7,
    l_d_req_p7,       
    l_d_req_last_p7,  
    l_datain_p7,      
    l_dm_in_p7,       

    l_dataout

);



// ----------------
// -- Parameters --
// ----------------

parameter   DMA_DEST_ADDR_WIDTH     = 9;    // Address with to implement for SRAM DMA memory (size == (2^(DMA_DEST_ADDR_WIDTH+4) in bytes

parameter   DSIZE                   = 128;  // Local side data width
parameter   ADDR_WIDTH              = 33;   // Local address width
localparam  BURST_SIZE_WIDTH        = 8;    // Width of l_b_size parameter 

// State Machine States
localparam  IDLE                    = 3'b001; // Idle      
localparam  DLAY                    = 3'b010; // Pipeline for speed
localparam  BUSY                    = 3'b100; // Wait for previous grant to finish



// ----------------------
// -- Port Definitions --
// ----------------------

input                               rst_n;
input                               clk;

input                               l_r_req_p0;       
input                               l_w_req_p0;       
input   [2:0]                       l_req_pri_p0;     
input   [ADDR_WIDTH-1:0]            l_addr_p0;        
input   [BURST_SIZE_WIDTH-1:0]      l_b_size_p0;      
input                               l_auto_pch_p0;    
output                              l_busy_p0;        
output                              l_r_valid_p0;                       
output                              l_r_valid_last_p0;
output                              l_d_req_p0;       
output                              l_d_req_last_p0;  
input   [DSIZE-1:0]                 l_datain_p0;      
input   [DSIZE/8-1:0]               l_dm_in_p0;       

input                               l_r_req_p1;       
input                               l_w_req_p1;       
input   [2:0]                       l_req_pri_p1;     
input   [ADDR_WIDTH-1:0]            l_addr_p1;        
input   [BURST_SIZE_WIDTH-1:0]      l_b_size_p1;      
input                               l_auto_pch_p1;    
output                              l_busy_p1;        
output                              l_r_valid_p1;                       
output                              l_r_valid_last_p1;
output                              l_d_req_p1;       
output                              l_d_req_last_p1;  
input   [DSIZE-1:0]                 l_datain_p1;      
input   [DSIZE/8-1:0]               l_dm_in_p1;       

input                               l_r_req_p2;       
input                               l_w_req_p2;       
input   [2:0]                       l_req_pri_p2;     
input   [ADDR_WIDTH-1:0]            l_addr_p2;        
input   [BURST_SIZE_WIDTH-1:0]      l_b_size_p2;      
input                               l_auto_pch_p2;    
output                              l_busy_p2;        
output                              l_r_valid_p2;                       
output                              l_r_valid_last_p2;
output                              l_d_req_p2;       
output                              l_d_req_last_p2;  
input   [DSIZE-1:0]                 l_datain_p2;      
input   [DSIZE/8-1:0]               l_dm_in_p2;       

input                               l_r_req_p3;       
input                               l_w_req_p3;       
input   [2:0]                       l_req_pri_p3;     
input   [ADDR_WIDTH-1:0]            l_addr_p3;        
input   [BURST_SIZE_WIDTH-1:0]      l_b_size_p3;      
input                               l_auto_pch_p3;    
output                              l_busy_p3;        
output                              l_r_valid_p3;                       
output                              l_r_valid_last_p3;
output                              l_d_req_p3;       
output                              l_d_req_last_p3;  
input   [DSIZE-1:0]                 l_datain_p3;      
input   [DSIZE/8-1:0]               l_dm_in_p3;       

input                               l_r_req_p4;       
input                               l_w_req_p4;       
input   [2:0]                       l_req_pri_p4;     
input   [ADDR_WIDTH-1:0]            l_addr_p4;        
input   [BURST_SIZE_WIDTH-1:0]      l_b_size_p4;      
input                               l_auto_pch_p4;    
output                              l_busy_p4;        
output                              l_r_valid_p4;                       
output                              l_r_valid_last_p4;
output                              l_d_req_p4;       
output                              l_d_req_last_p4;  
input   [DSIZE-1:0]                 l_datain_p4;      
input   [DSIZE/8-1:0]               l_dm_in_p4;       

input                               l_r_req_p5;       
input                               l_w_req_p5;       
input   [2:0]                       l_req_pri_p5;     
input   [ADDR_WIDTH-1:0]            l_addr_p5;        
input   [BURST_SIZE_WIDTH-1:0]      l_b_size_p5;      
input                               l_auto_pch_p5;    
output                              l_busy_p5;        
output                              l_r_valid_p5;                       
output                              l_r_valid_last_p5;
output                              l_d_req_p5;       
output                              l_d_req_last_p5;  
input   [DSIZE-1:0]                 l_datain_p5;      
input   [DSIZE/8-1:0]               l_dm_in_p5;       

input                               l_r_req_p6;       
input                               l_w_req_p6;       
input   [2:0]                       l_req_pri_p6;     
input   [ADDR_WIDTH-1:0]            l_addr_p6;        
input   [BURST_SIZE_WIDTH-1:0]      l_b_size_p6;      
input                               l_auto_pch_p6;    
output                              l_busy_p6;        
output                              l_r_valid_p6;                       
output                              l_r_valid_last_p6;
output                              l_d_req_p6;       
output                              l_d_req_last_p6;  
input   [DSIZE-1:0]                 l_datain_p6;      
input   [DSIZE/8-1:0]               l_dm_in_p6;       

input                               l_r_req_p7;       
input                               l_w_req_p7;       
input   [2:0]                       l_req_pri_p7;     
input   [ADDR_WIDTH-1:0]            l_addr_p7;        
input   [BURST_SIZE_WIDTH-1:0]      l_b_size_p7;      
input                               l_auto_pch_p7;    
output                              l_busy_p7;        
output                              l_r_valid_p7;                       
output                              l_r_valid_last_p7;
output                              l_d_req_p7;       
output                              l_d_req_last_p7;  
input   [DSIZE-1:0]                 l_datain_p7;      
input   [DSIZE/8-1:0]               l_dm_in_p7;       

output  [DSIZE-1:0]                 l_dataout;



// ----------------
// -- Port Types --
// ----------------

wire                                rst_n;
wire                                clk;

wire                                l_r_req_p0;       
wire                                l_w_req_p0;       
wire    [2:0]                       l_req_pri_p0;     
wire    [ADDR_WIDTH-1:0]            l_addr_p0;        
wire    [BURST_SIZE_WIDTH-1:0]      l_b_size_p0;      
wire                                l_auto_pch_p0;    
reg                                 l_busy_p0;        
reg                                 l_r_valid_p0;                       
reg                                 l_r_valid_last_p0;
reg                                 l_d_req_p0;       
reg                                 l_d_req_last_p0;  
wire    [DSIZE-1:0]                 l_datain_p0;      
wire    [DSIZE/8-1:0]               l_dm_in_p0;       

wire                                l_r_req_p1;       
wire                                l_w_req_p1;       
wire    [2:0]                       l_req_pri_p1;     
wire    [ADDR_WIDTH-1:0]            l_addr_p1;        
wire    [BURST_SIZE_WIDTH-1:0]      l_b_size_p1;      
wire                                l_auto_pch_p1;    
reg                                 l_busy_p1;        
reg                                 l_r_valid_p1;                       
reg                                 l_r_valid_last_p1;
reg                                 l_d_req_p1;       
reg                                 l_d_req_last_p1;  
wire    [DSIZE-1:0]                 l_datain_p1;      
wire    [DSIZE/8-1:0]               l_dm_in_p1;       

wire                                l_r_req_p2;       
wire                                l_w_req_p2;       
wire    [2:0]                       l_req_pri_p2;     
wire    [ADDR_WIDTH-1:0]            l_addr_p2;        
wire    [BURST_SIZE_WIDTH-1:0]      l_b_size_p2;      
wire                                l_auto_pch_p2;    
reg                                 l_busy_p2;        
reg                                 l_r_valid_p2;                       
reg                                 l_r_valid_last_p2;
reg                                 l_d_req_p2;       
reg                                 l_d_req_last_p2;  
wire    [DSIZE-1:0]                 l_datain_p2;      
wire    [DSIZE/8-1:0]               l_dm_in_p2;       

wire                                l_r_req_p3;       
wire                                l_w_req_p3;       
wire    [2:0]                       l_req_pri_p3;     
wire    [ADDR_WIDTH-1:0]            l_addr_p3;        
wire    [BURST_SIZE_WIDTH-1:0]      l_b_size_p3;      
wire                                l_auto_pch_p3;    
reg                                 l_busy_p3;        
reg                                 l_r_valid_p3;                       
reg                                 l_r_valid_last_p3;
reg                                 l_d_req_p3;       
reg                                 l_d_req_last_p3;  
wire    [DSIZE-1:0]                 l_datain_p3;      
wire    [DSIZE/8-1:0]               l_dm_in_p3;       

wire                                l_r_req_p4;       
wire                                l_w_req_p4;       
wire    [2:0]                       l_req_pri_p4;     
wire    [ADDR_WIDTH-1:0]            l_addr_p4;        
wire    [BURST_SIZE_WIDTH-1:0]      l_b_size_p4;      
wire                                l_auto_pch_p4;    
reg                                 l_busy_p4;        
reg                                 l_r_valid_p4;                       
reg                                 l_r_valid_last_p4;
reg                                 l_d_req_p4;       
reg                                 l_d_req_last_p4;  
wire    [DSIZE-1:0]                 l_datain_p4;      
wire    [DSIZE/8-1:0]               l_dm_in_p4;       

wire                                l_r_req_p5;       
wire                                l_w_req_p5;       
wire    [2:0]                       l_req_pri_p5;     
wire    [ADDR_WIDTH-1:0]            l_addr_p5;        
wire    [BURST_SIZE_WIDTH-1:0]      l_b_size_p5;      
wire                                l_auto_pch_p5;    
reg                                 l_busy_p5;        
reg                                 l_r_valid_p5;                       
reg                                 l_r_valid_last_p5;
reg                                 l_d_req_p5;       
reg                                 l_d_req_last_p5;  
wire    [DSIZE-1:0]                 l_datain_p5;      
wire    [DSIZE/8-1:0]               l_dm_in_p5;       

wire                                l_r_req_p6;       
wire                                l_w_req_p6;       
wire    [2:0]                       l_req_pri_p6;     
wire    [ADDR_WIDTH-1:0]            l_addr_p6;        
wire    [BURST_SIZE_WIDTH-1:0]      l_b_size_p6;      
wire                                l_auto_pch_p6;    
reg                                 l_busy_p6;        
reg                                 l_r_valid_p6;                       
reg                                 l_r_valid_last_p6;
reg                                 l_d_req_p6;       
reg                                 l_d_req_last_p6;  
wire    [DSIZE-1:0]                 l_datain_p6;      
wire    [DSIZE/8-1:0]               l_dm_in_p6;       

wire                                l_r_req_p7;       
wire                                l_w_req_p7;       
wire    [2:0]                       l_req_pri_p7;     
wire    [ADDR_WIDTH-1:0]            l_addr_p7;        
wire    [BURST_SIZE_WIDTH-1:0]      l_b_size_p7;      
wire                                l_auto_pch_p7;    
reg                                 l_busy_p7;        
reg                                 l_r_valid_p7;                       
reg                                 l_r_valid_last_p7;
reg                                 l_d_req_p7;       
reg                                 l_d_req_last_p7;  
wire    [DSIZE-1:0]                 l_datain_p7;      
wire    [DSIZE/8-1:0]               l_dm_in_p7;       

reg     [DSIZE-1:0]                 l_dataout;



// -------------------
// -- Local Signals --
// -------------------

wire    [7:0]                       req;
reg     [2:0]                       c_gnt_sel;

reg     [2:0]                       state;

reg     [2:0]                       gnt_sel;
reg     [2:0]                       prior;
reg     [7:0]                       gnt;

reg                                 wr_rd_n;
reg     [DMA_DEST_ADDR_WIDTH-1:0]   addr;
reg     [BURST_SIZE_WIDTH-1:0]      b_size;

reg     [2:0]                       data_gnt_sel;
reg     [7:0]                       data_gnt;
reg                                 data_wr_rd_n;
reg     [BURST_SIZE_WIDTH-1:0]      data_ctr;
reg                                 data_ctr_leq_1;
reg                                 data_ctr_eq_1;

reg     [2:0]                       r_data_gnt_sel;

reg     [DMA_DEST_ADDR_WIDTH-1:0]   sram_rd_addr;

reg     [DMA_DEST_ADDR_WIDTH-1:0]   d_sram_wr_addr;
reg     [DMA_DEST_ADDR_WIDTH-1:0]   sram_wr_addr;

reg                                 d_sram_wr_en;

reg                                 sram_access;

reg     [DSIZE/8-1:0]               be_in;
reg     [DSIZE-1:0]                 data_in;

reg                                 d_l_r_valid_p0;     
reg                                 d_l_r_valid_last_p0;

reg                                 d_l_r_valid_p1;     
reg                                 d_l_r_valid_last_p1;

reg                                 d_l_r_valid_p2;     
reg                                 d_l_r_valid_last_p2;

reg                                 d_l_r_valid_p3;     
reg                                 d_l_r_valid_last_p3;

reg                                 d_l_r_valid_p4;     
reg                                 d_l_r_valid_last_p4;

reg                                 d_l_r_valid_p5;     
reg                                 d_l_r_valid_last_p5;

reg                                 d_l_r_valid_p6;     
reg                                 d_l_r_valid_last_p6;

reg                                 d_l_r_valid_p7;     
reg                                 d_l_r_valid_last_p7;

reg     [DSIZE-1:0]                 sram_wr_data;
wire    [DSIZE-1:0]                 sram_rd_data;

genvar                              i;



// ---------------
// -- Equations --
// ---------------

// Build an easier to use request bus from the individual request lines
assign req[0] = l_r_req_p0 | l_w_req_p0;
assign req[1] = l_r_req_p1 | l_w_req_p1;
assign req[2] = l_r_req_p2 | l_w_req_p2;
assign req[3] = l_r_req_p3 | l_w_req_p3;
assign req[4] = l_r_req_p4 | l_w_req_p4;
assign req[5] = l_r_req_p5 | l_w_req_p5;
assign req[6] = l_r_req_p6 | l_w_req_p6;
assign req[7] = l_r_req_p7 | l_w_req_p7;

// Round-Robin arbiter; priority is arranged in incrementing order by port number with the lowest priority assigned to the last granted port number
always @(*)
begin
    case (prior)
        3'd0: c_gnt_sel = req[0] ? 3'd0 : (req[1] ? 3'd1 : (req[2] ? 3'd2 : (req[3] ? 3'd3 : (req[4] ? 3'd4 : (req[5] ? 3'd5 : (req[6] ? 3'd6 : (3'd7)))))));
        3'd1: c_gnt_sel = req[1] ? 3'd1 : (req[2] ? 3'd2 : (req[3] ? 3'd3 : (req[4] ? 3'd4 : (req[5] ? 3'd5 : (req[6] ? 3'd6 : (req[7] ? 3'd7 : (3'd0)))))));
        3'd2: c_gnt_sel = req[2] ? 3'd2 : (req[3] ? 3'd3 : (req[4] ? 3'd4 : (req[5] ? 3'd5 : (req[6] ? 3'd6 : (req[7] ? 3'd7 : (req[0] ? 3'd0 : (3'd1)))))));
        3'd3: c_gnt_sel = req[3] ? 3'd3 : (req[4] ? 3'd4 : (req[5] ? 3'd5 : (req[6] ? 3'd6 : (req[7] ? 3'd7 : (req[0] ? 3'd0 : (req[1] ? 3'd1 : (3'd2)))))));
        3'd4: c_gnt_sel = req[4] ? 3'd4 : (req[5] ? 3'd5 : (req[6] ? 3'd6 : (req[7] ? 3'd7 : (req[0] ? 3'd0 : (req[1] ? 3'd1 : (req[2] ? 3'd2 : (3'd3)))))));
        3'd5: c_gnt_sel = req[5] ? 3'd5 : (req[6] ? 3'd6 : (req[7] ? 3'd7 : (req[0] ? 3'd0 : (req[1] ? 3'd1 : (req[2] ? 3'd2 : (req[3] ? 3'd3 : (3'd4)))))));
        3'd6: c_gnt_sel = req[6] ? 3'd6 : (req[7] ? 3'd7 : (req[0] ? 3'd0 : (req[1] ? 3'd1 : (req[2] ? 3'd2 : (req[3] ? 3'd3 : (req[4] ? 3'd4 : (3'd5)))))));
        3'd7: c_gnt_sel = req[7] ? 3'd7 : (req[0] ? 3'd0 : (req[1] ? 3'd1 : (req[2] ? 3'd2 : (req[3] ? 3'd3 : (req[4] ? 3'd4 : (req[5] ? 3'd5 : (3'd6)))))));
    endcase
end

// Use a state machine to keep track of overlapped requests; this state machine assumes that burst sizes
//   will typically be at least 3 words and uses this information to advantage to pipeline for better
//   route speed; transfers smaller than 3 words are still supported, but will hurt throughput
always @(posedge clk or negedge rst_n)
begin
    if (rst_n == 1'b0)
        state <= IDLE;
    else
    begin
        case (state)

            // Determine which request to grant
            IDLE    :   if (req != 8'h0)
                            state <= DLAY;

            // Mux in granted port's information
            DLAY    :   state <= BUSY;

            // Wait for any pending transfers to complete and then start the new grant request
            BUSY    :   if (data_ctr_leq_1)
                            state <= IDLE;

            default :   state <= IDLE;
                                              
        endcase
    end
end

always @(posedge clk or negedge rst_n)
begin
    if (rst_n == 1'b0)
    begin
        gnt_sel         <= 3'h0;
        prior           <= 3'h0;
        gnt             <= 8'h0;

        l_busy_p0       <= 1'b1;
        l_busy_p1       <= 1'b1;
        l_busy_p2       <= 1'b1;
        l_busy_p3       <= 1'b1;
        l_busy_p4       <= 1'b1;
        l_busy_p5       <= 1'b1;
        l_busy_p6       <= 1'b1;
        l_busy_p7       <= 1'b1;

        wr_rd_n         <= 1'b0;
        addr            <= {DMA_DEST_ADDR_WIDTH{1'b0}};
        b_size          <= {BURST_SIZE_WIDTH{1'b0}};

        data_gnt_sel    <= 3'h0;
        data_gnt        <= 8'h0;
        data_wr_rd_n    <= 1'b0;
        data_ctr        <= {BURST_SIZE_WIDTH{1'b0}}; 
        data_ctr_leq_1  <= 1'b1;
        data_ctr_eq_1   <= 1'b0;

        r_data_gnt_sel  <= 3'h0;

        sram_rd_addr    <= {DMA_DEST_ADDR_WIDTH{1'b0}};

        d_sram_wr_addr  <= {DMA_DEST_ADDR_WIDTH{1'b0}};
        sram_wr_addr    <= {DMA_DEST_ADDR_WIDTH{1'b0}};

        d_sram_wr_en    <= 1'b0;

        sram_access     <= 1'b0;

        l_d_req_p0      <= 1'b0;
        l_d_req_p1      <= 1'b0;
        l_d_req_p2      <= 1'b0;
        l_d_req_p3      <= 1'b0;
        l_d_req_p4      <= 1'b0;
        l_d_req_p5      <= 1'b0;
        l_d_req_p6      <= 1'b0;
        l_d_req_p7      <= 1'b0;

        l_d_req_last_p0 <= 1'b0;
        l_d_req_last_p1 <= 1'b0;
        l_d_req_last_p2 <= 1'b0;
        l_d_req_last_p3 <= 1'b0;
        l_d_req_last_p4 <= 1'b0;
        l_d_req_last_p5 <= 1'b0;
        l_d_req_last_p6 <= 1'b0;
        l_d_req_last_p7 <= 1'b0;
    end
    else
    begin
        // Hold port number which won a grant
        if (state == IDLE)
            gnt_sel <= c_gnt_sel;

        // Assign the port following the granted port the highest priority
        //   for the next arbitration
        prior <= gnt_sel + 3'h1;

        // Valid only in state BUSY
        case (gnt_sel)
            3'd0 : gnt <= 8'h01;
            3'd1 : gnt <= 8'h02;
            3'd2 : gnt <= 8'h04;
            3'd3 : gnt <= 8'h08;
            3'd4 : gnt <= 8'h10;
            3'd5 : gnt <= 8'h20;
            3'd6 : gnt <= 8'h40;
            3'd7 : gnt <= 8'h80;
        endcase

        l_busy_p0 <= ~((c_gnt_sel == 3'h0) & req[0] & (state == IDLE));
        l_busy_p1 <= ~((c_gnt_sel == 3'h1) & req[1] & (state == IDLE));
        l_busy_p2 <= ~((c_gnt_sel == 3'h2) & req[2] & (state == IDLE));
        l_busy_p3 <= ~((c_gnt_sel == 3'h3) & req[3] & (state == IDLE));
        l_busy_p4 <= ~((c_gnt_sel == 3'h4) & req[4] & (state == IDLE));
        l_busy_p5 <= ~((c_gnt_sel == 3'h5) & req[5] & (state == IDLE));
        l_busy_p6 <= ~((c_gnt_sel == 3'h6) & req[6] & (state == IDLE));
        l_busy_p7 <= ~((c_gnt_sel == 3'h7) & req[7] & (state == IDLE));

        // Record type of granted request; 1==write, 0==read
        if (state == DLAY)
        begin
            // If a port has both write and read requests, service the writes with priority;
            //   this is not a "fair" arbitration between write and read requests from
            //   the same port, but the typical application for this module will only
            //   have write OR read requests and should not have both
            case (gnt_sel)
                3'd0 : wr_rd_n <= l_w_req_p0;
                3'd1 : wr_rd_n <= l_w_req_p1;
                3'd2 : wr_rd_n <= l_w_req_p2;
                3'd3 : wr_rd_n <= l_w_req_p3;
                3'd4 : wr_rd_n <= l_w_req_p4;
                3'd5 : wr_rd_n <= l_w_req_p5;
                3'd6 : wr_rd_n <= l_w_req_p6;
                3'd7 : wr_rd_n <= l_w_req_p7;
            endcase
        end

        if (state == DLAY)
        begin
            case (gnt_sel)
                3'd0 : addr <= l_addr_p0[DMA_DEST_ADDR_WIDTH:1];
                3'd1 : addr <= l_addr_p1[DMA_DEST_ADDR_WIDTH:1];
                3'd2 : addr <= l_addr_p2[DMA_DEST_ADDR_WIDTH:1];
                3'd3 : addr <= l_addr_p3[DMA_DEST_ADDR_WIDTH:1];
                3'd4 : addr <= l_addr_p4[DMA_DEST_ADDR_WIDTH:1];
                3'd5 : addr <= l_addr_p5[DMA_DEST_ADDR_WIDTH:1];
                3'd6 : addr <= l_addr_p6[DMA_DEST_ADDR_WIDTH:1];
                3'd7 : addr <= l_addr_p7[DMA_DEST_ADDR_WIDTH:1];
            endcase
        end

        if (state == DLAY)
        begin
            case (gnt_sel)
                3'd0 : b_size <= l_b_size_p0;
                3'd1 : b_size <= l_b_size_p1;
                3'd2 : b_size <= l_b_size_p2;
                3'd3 : b_size <= l_b_size_p3;
                3'd4 : b_size <= l_b_size_p4;
                3'd5 : b_size <= l_b_size_p5;
                3'd6 : b_size <= l_b_size_p6;
                3'd7 : b_size <= l_b_size_p7;
            endcase
        end

        // Implement the grant
        if ((state == BUSY) & data_ctr_leq_1)
        begin
            data_gnt_sel <= gnt_sel;
            data_gnt     <= gnt;
            data_wr_rd_n <= wr_rd_n;
        end
        
        if ((state == BUSY) & data_ctr_leq_1)
            data_ctr <= b_size;
        else if (data_ctr != {BURST_SIZE_WIDTH{1'b0}})
            data_ctr <= data_ctr - {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1};

        if ((state == BUSY) & data_ctr_leq_1)
            data_ctr_leq_1 <= (b_size <= {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1});     // Loading new count <= 1
        else
            data_ctr_leq_1 <= (data_ctr <= {{(BURST_SIZE_WIDTH-2){1'b0}}, 2'b10});  // Going to 1 or less

        if ((state == BUSY) & data_ctr_leq_1)
            data_ctr_eq_1 <= (b_size == {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1});      // Loading new count == 1
        else if (data_ctr == {{(BURST_SIZE_WIDTH-2){1'b0}}, 2'b10})
            data_ctr_eq_1 <= 1'b1;  // 2 going to 1
        else
            data_ctr_eq_1 <= 1'b0;

        // Delay to same latency as data_in
        r_data_gnt_sel <= data_gnt_sel;

        // Compute look ahead register for (state == BUSY) and data_ctr == 1 condition 
        if ((state == BUSY) & data_ctr_leq_1)
            sram_rd_addr <= addr;
        else if (sram_access)
            sram_rd_addr <= sram_rd_addr + {{(DMA_DEST_ADDR_WIDTH-1){1'b0}}, 1'b1};

        // Using read-modify write, so write address always follows read address by 1 clock;
        //   add extra clock for better timing
        d_sram_wr_addr <= sram_rd_addr;
        sram_wr_addr   <= d_sram_wr_addr;

        d_sram_wr_en <= data_wr_rd_n & sram_access;

        // l_b_size_pX cannot be zero since l_b_size == 0 is not allowed; pipeline sram_access accordingly
        if ((state == BUSY) & data_ctr_leq_1)
            sram_access <= 1'b1;
        else if (data_ctr == {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1})
            sram_access <= 1'b0;

        // Individual port l_d_req
        if (gnt[0] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n) l_d_req_p0 <= 1'b1; else if (data_ctr_leq_1) l_d_req_p0 <= 1'b0;
        if (gnt[1] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n) l_d_req_p1 <= 1'b1; else if (data_ctr_leq_1) l_d_req_p1 <= 1'b0;
        if (gnt[2] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n) l_d_req_p2 <= 1'b1; else if (data_ctr_leq_1) l_d_req_p2 <= 1'b0;
        if (gnt[3] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n) l_d_req_p3 <= 1'b1; else if (data_ctr_leq_1) l_d_req_p3 <= 1'b0;
        if (gnt[4] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n) l_d_req_p4 <= 1'b1; else if (data_ctr_leq_1) l_d_req_p4 <= 1'b0;
        if (gnt[5] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n) l_d_req_p5 <= 1'b1; else if (data_ctr_leq_1) l_d_req_p5 <= 1'b0;
        if (gnt[6] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n) l_d_req_p6 <= 1'b1; else if (data_ctr_leq_1) l_d_req_p6 <= 1'b0;
        if (gnt[7] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n) l_d_req_p7 <= 1'b1; else if (data_ctr_leq_1) l_d_req_p7 <= 1'b0;

        l_d_req_last_p0 <= (gnt[0] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n & (b_size == {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1})) | (data_gnt[0] & data_wr_rd_n & (data_ctr == {{(BURST_SIZE_WIDTH-2){1'b0}}, 2'b10}));
        l_d_req_last_p1 <= (gnt[1] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n & (b_size == {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1})) | (data_gnt[1] & data_wr_rd_n & (data_ctr == {{(BURST_SIZE_WIDTH-2){1'b0}}, 2'b10}));
        l_d_req_last_p2 <= (gnt[2] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n & (b_size == {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1})) | (data_gnt[2] & data_wr_rd_n & (data_ctr == {{(BURST_SIZE_WIDTH-2){1'b0}}, 2'b10}));
        l_d_req_last_p3 <= (gnt[3] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n & (b_size == {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1})) | (data_gnt[3] & data_wr_rd_n & (data_ctr == {{(BURST_SIZE_WIDTH-2){1'b0}}, 2'b10}));
        l_d_req_last_p4 <= (gnt[4] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n & (b_size == {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1})) | (data_gnt[4] & data_wr_rd_n & (data_ctr == {{(BURST_SIZE_WIDTH-2){1'b0}}, 2'b10}));
        l_d_req_last_p5 <= (gnt[5] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n & (b_size == {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1})) | (data_gnt[5] & data_wr_rd_n & (data_ctr == {{(BURST_SIZE_WIDTH-2){1'b0}}, 2'b10}));
        l_d_req_last_p6 <= (gnt[6] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n & (b_size == {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1})) | (data_gnt[6] & data_wr_rd_n & (data_ctr == {{(BURST_SIZE_WIDTH-2){1'b0}}, 2'b10}));
        l_d_req_last_p7 <= (gnt[7] & (state == BUSY) & data_ctr_leq_1 & wr_rd_n & (b_size == {{(BURST_SIZE_WIDTH-1){1'b0}}, 1'b1})) | (data_gnt[7] & data_wr_rd_n & (data_ctr == {{(BURST_SIZE_WIDTH-2){1'b0}}, 2'b10}));
    end
end

always @(*)
begin
    case (r_data_gnt_sel)
        3'd0 : be_in = ~l_dm_in_p0;
        3'd1 : be_in = ~l_dm_in_p1;
        3'd2 : be_in = ~l_dm_in_p2;
        3'd3 : be_in = ~l_dm_in_p3;
        3'd4 : be_in = ~l_dm_in_p4;
        3'd5 : be_in = ~l_dm_in_p5;
        3'd6 : be_in = ~l_dm_in_p6;
        3'd7 : be_in = ~l_dm_in_p7;
    endcase
end

always @(*)
begin
    case (r_data_gnt_sel)
        3'd0 : data_in = l_datain_p0;
        3'd1 : data_in = l_datain_p1;
        3'd2 : data_in = l_datain_p2;
        3'd3 : data_in = l_datain_p3;
        3'd4 : data_in = l_datain_p4;
        3'd5 : data_in = l_datain_p5;
        3'd6 : data_in = l_datain_p6;
        3'd7 : data_in = l_datain_p7;
    endcase
end

// Read data valid strobes; 2 clock delay from sram_rd_address
always @(posedge clk or negedge rst_n)
begin
    if (rst_n == 1'b0)
    begin
        d_l_r_valid_p0      <= 1'b0;
        d_l_r_valid_last_p0 <= 1'b0;

        d_l_r_valid_p1      <= 1'b0;
        d_l_r_valid_last_p1 <= 1'b0;

        d_l_r_valid_p2      <= 1'b0;
        d_l_r_valid_last_p2 <= 1'b0;

        d_l_r_valid_p3      <= 1'b0;
        d_l_r_valid_last_p3 <= 1'b0;

        d_l_r_valid_p4      <= 1'b0;
        d_l_r_valid_last_p4 <= 1'b0;

        d_l_r_valid_p5      <= 1'b0;
        d_l_r_valid_last_p5 <= 1'b0;

        d_l_r_valid_p6      <= 1'b0;
        d_l_r_valid_last_p6 <= 1'b0;

        d_l_r_valid_p7      <= 1'b0;
        d_l_r_valid_last_p7 <= 1'b0;

        l_r_valid_p0        <= 1'b0;
        l_r_valid_last_p0   <= 1'b0;

        l_r_valid_p1        <= 1'b0;
        l_r_valid_last_p1   <= 1'b0;

        l_r_valid_p2        <= 1'b0;
        l_r_valid_last_p2   <= 1'b0;

        l_r_valid_p3        <= 1'b0;
        l_r_valid_last_p3   <= 1'b0;

        l_r_valid_p4        <= 1'b0;
        l_r_valid_last_p4   <= 1'b0;

        l_r_valid_p5        <= 1'b0;
        l_r_valid_last_p5   <= 1'b0;

        l_r_valid_p6        <= 1'b0;
        l_r_valid_last_p6   <= 1'b0;

        l_r_valid_p7        <= 1'b0;
        l_r_valid_last_p7   <= 1'b0;
    end
    else
    begin
        d_l_r_valid_p0      <= data_gnt[0] & ~data_wr_rd_n & sram_access;
        d_l_r_valid_last_p0 <= data_gnt[0] & ~data_wr_rd_n & data_ctr_eq_1;

        d_l_r_valid_p1      <= data_gnt[1] & ~data_wr_rd_n & sram_access;
        d_l_r_valid_last_p1 <= data_gnt[1] & ~data_wr_rd_n & data_ctr_eq_1;

        d_l_r_valid_p2      <= data_gnt[2] & ~data_wr_rd_n & sram_access;
        d_l_r_valid_last_p2 <= data_gnt[2] & ~data_wr_rd_n & data_ctr_eq_1;

        d_l_r_valid_p3      <= data_gnt[3] & ~data_wr_rd_n & sram_access;
        d_l_r_valid_last_p3 <= data_gnt[3] & ~data_wr_rd_n & data_ctr_eq_1;

        d_l_r_valid_p4      <= data_gnt[4] & ~data_wr_rd_n & sram_access;
        d_l_r_valid_last_p4 <= data_gnt[4] & ~data_wr_rd_n & data_ctr_eq_1;

        d_l_r_valid_p5      <= data_gnt[5] & ~data_wr_rd_n & sram_access;
        d_l_r_valid_last_p5 <= data_gnt[5] & ~data_wr_rd_n & data_ctr_eq_1;

        d_l_r_valid_p6      <= data_gnt[6] & ~data_wr_rd_n & sram_access;
        d_l_r_valid_last_p6 <= data_gnt[6] & ~data_wr_rd_n & data_ctr_eq_1;

        d_l_r_valid_p7      <= data_gnt[7] & ~data_wr_rd_n & sram_access;
        d_l_r_valid_last_p7 <= data_gnt[7] & ~data_wr_rd_n & data_ctr_eq_1;

        l_r_valid_p0        <= d_l_r_valid_p0;     
        l_r_valid_last_p0   <= d_l_r_valid_last_p0;

        l_r_valid_p1        <= d_l_r_valid_p1;     
        l_r_valid_last_p1   <= d_l_r_valid_last_p1;

        l_r_valid_p2        <= d_l_r_valid_p2;     
        l_r_valid_last_p2   <= d_l_r_valid_last_p2;

        l_r_valid_p3        <= d_l_r_valid_p3;     
        l_r_valid_last_p3   <= d_l_r_valid_last_p3;

        l_r_valid_p4        <= d_l_r_valid_p4;     
        l_r_valid_last_p4   <= d_l_r_valid_last_p4;

        l_r_valid_p5        <= d_l_r_valid_p5;     
        l_r_valid_last_p5   <= d_l_r_valid_last_p5;

        l_r_valid_p6        <= d_l_r_valid_p6;     
        l_r_valid_last_p6   <= d_l_r_valid_last_p6;

        l_r_valid_p7        <= d_l_r_valid_p7;     
        l_r_valid_last_p7   <= d_l_r_valid_last_p7;
    end
end

always @(posedge clk or negedge rst_n)
begin
    if (rst_n == 1'b0)
        l_dataout <= {DSIZE{1'b0}};
    else
        l_dataout <= sram_rd_data;
end

// Assign each byte its own RAM instance
reg     [DSIZE/8-1:0]  gen_sram_wr_en;

always @(posedge clk or negedge rst_n)
begin
    if (rst_n == 1'b0)
        sram_wr_data <= {DSIZE{1'b0}};
    else
        sram_wr_data <= data_in;
end

// Instantiate DMA Destination Memory
generate for (i=0; i<(DSIZE/8); i=i+1)
    begin : gen_sram_wr_data
        always @(posedge clk or negedge rst_n)
        begin
            if (rst_n == 1'b0)
                gen_sram_wr_en[i] <= 1'b0;
            else
                gen_sram_wr_en[i] <= d_sram_wr_en & be_in[i];
        end

        ref_inferred_block_ram #(

            .ADDR_WIDTH         (DMA_DEST_ADDR_WIDTH                ),
            .DATA_WIDTH         (8                                  )

        ) dma_target_memory (

            .wr_clk             (clk                                ),
            .wr_addr            (sram_wr_addr                       ),
            .wr_en              (gen_sram_wr_en[i]                  ),
            .wr_data            (sram_wr_data[((i+1)*8)-1:(i*8)]    ),

            .rd_clk             (clk                                ),
            .rd_addr            (sram_rd_addr                       ),
            .rd_data            (sram_rd_data[((i+1)*8)-1:(i*8)]    )

        );
    end
endgenerate



endmodule
