/************************************************************************
 * Copyright 2006-2010 Silicon Software GmbH
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License (version 2) as
 * published by the Free Software Foundation.
 */
#include <linux/err.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include "menable4.h"
#include "menable.h"
#include "menable_ioctl.h"
#include "uiq.h"

#include "linux_version.h"

/**
 * men4_reset_vlink - reset link between bridge FPGA and upper FPGA
 * @men: board to reset
 * @upper: reset upper FPGA part or not
 * returns: 0 on success, error code else
 */
static int
men4_reset_vlink(struct siso_menable *men, const int upper)
{
	uint32_t cplid;
	int i;

	/* multiple register accesses are here to
	 * ensure the value passes the register
	 * pipeline */

	iowrite32(2, men->runtime_base + ME4_IFCONTROL);
	iowrite32(0, men->runtime_base + ME4_IFCONTROL);
	iowrite32(0, men->runtime_base + ME4_IFCONTROL);
	for (i = 0; i < 5; i++)
		iowrite32(1, men->runtime_base + ME4_IFCONTROL);

	if (!upper)
		return 0;

	cplid = ioread32(men->runtime_base + ME4_FPGACONTROL) & 0xffff0000;
	iowrite32(cplid | 0x2, men->runtime_base + ME4_FULLOFFSET + ME4_IFCONTROL);
	for (i = 0; i < 4; i++)
		iowrite32(cplid, men->runtime_base + ME4_FULLOFFSET + ME4_IFCONTROL);

	for (i = ME4_PCIECONFIG0; i < ME4_PCIECONFIGMAX; i += 4) {
		uint32_t v = ioread32(men->runtime_base + i);
		iowrite32(v, men->runtime_base + ME4_FULLOFFSET + i);
	}

	iowrite32(cplid | 0x1, men->runtime_base + ME4_FULLOFFSET + ME4_IFCONTROL);

	return 0;
}

static int
men4_ioctl(struct siso_menable *men, const unsigned int cmd,
		const unsigned int size, unsigned long arg)
{
	switch (cmd) {
	case IOCTL_BOARD_INFO: {
		unsigned int a[4];
		int i;

		if (size != sizeof(a)) {
			warn_wrong_iosize(men, cmd, sizeof(a));
			return -EINVAL;
		}

		for (i = 0; i < ARRAY_SIZE(a); i++)
			a[i] = ioread32(men->runtime_base + 4 * i);
		if (copy_to_user((void __user *) arg,
				a, sizeof(a)))
			return -EFAULT;
		return 0;
	}
	case IOCTL_PP_CONTROL: {
		int ret;
		unsigned long flags;

		if (size != 0) {
			warn_wrong_iosize(men, cmd, 0);
			return -EINVAL;
		}

		spin_lock_irqsave(&men->designlock, flags);
		if (men->design_changing) {
			spin_unlock_irqrestore(&men->designlock, flags);
			return -EBUSY;
		}

		men->design_changing = true;
		spin_unlock_irqrestore(&men->designlock, flags);

		switch (arg) {
		case 0:
			spin_lock(&men->headlock);
			if (men->d4->upper_config)
				iowrite32(0, men->runtime_base + ME4_FULLOFFSET +
						ME4_IFCONTROL);
			iowrite32(0, men->runtime_base + ME4_IFCONTROL);
			men->d4->upper_config = false;

			ret = men_alloc_dma(men, men->d4->lodma);
			break;
		case 1:
			/* DCM reset */
			iowrite32(0x10, men->runtime_base + ME4_IFCONTROL);
			udelay(10);
			iowrite32(0, men->runtime_base + ME4_IFCONTROL);
			iowrite32(0, men->runtime_base + ME4_IFCONTROL);
			msleep(5);
			if (ioread32(men->runtime_base + ME4_IFCONTROL) & 0x10)
				return -EBUSY;

			ret = men4_reset_vlink(men, 1);
			if (ret == 0) {
				men->d4->upper_config = true;

				ret = men_add_dmas(men);
			}
			break;
		default:
			ret = -EINVAL;
		}

		spin_lock_irqsave(&men->designlock, flags);
		men->design_changing = false;

		spin_unlock_irqrestore(&men->designlock, flags);
		return ret;
	}
	case IOCTL_RESSOURCE_CONTROL:
	case IOCTL_GET_EEPROM_DATA:
	case IOCTL_DESIGN_SETTINGS:
		return -EINVAL;
	default:
		return -ENOIOCTLCMD;
	}
}

static void
me4_free_sgl(struct siso_menable *men, struct menable_dmabuf *sb)
{
	struct men_dma_chain *res = sb->dmat;
	dma_addr_t dma = sb->dma;

	while (res) {
		struct men_dma_chain *n;
		dma_addr_t ndma;

		n = res->next;
#if BITS_PER_LONG > 32
		ndma = le64_to_cpu(res->pcie->next) & 0xfffffffffffffffcULL;
#else
		ndma = ((uint32_t)le64_to_cpu(res->pcie->next)) & 0xfffffffc;
#endif
		if (dma == ndma)
			break;
		pci_pool_free(men->pool, res->pcie, dma);
		kfree(res);
		res = n;
		dma = ndma;
	}
}

static void
me4_queue_sb(struct menable_dmachan *db, struct menable_dmabuf *sb)
{
	w64(sb->buf_length / 4, db->iobase + ME4_DMAMAXLEN);
	wmb();
	w64(sb->dma, db->iobase + ME4_DMAADDR);
	wmb();
}

static irqreturn_t
me4_irq(int irq, void *dev_id)
{
	uint32_t sr, st;
	struct siso_menable *men = dev_id;
	int dma;

	if (pci_channel_offline(men->pdev))
		return IRQ_HANDLED;

	sr = ioread32(men->runtime_base + ME4_IRQSTATUS);

	if (unlikely(sr == 0))
		return IRQ_NONE;

	if (unlikely(sr == 0xffffffff)) {
		dev_warn(&men->dev, "IRQ status register read returned -1\n");
		iowrite32(0, men->runtime_base + ME4_IRQENABLE);
		iowrite32(0xffffffff, men->runtime_base + ME4_IRQACK);
		return IRQ_HANDLED;
	}

	spin_lock(&men->d4->irqmask_lock);
	if (unlikely(sr & ~men->d4->irq_wanted)) {
		iowrite32(men->d4->irq_wanted, men->runtime_base + ME4_IRQENABLE);
		sr &= men->d4->irq_wanted;
		if (!sr) {
			spin_unlock(&men->d4->irqmask_lock);
			return IRQ_HANDLED;
		}
	}
	spin_unlock(&men->d4->irqmask_lock);

	st = (sr & ((1 << men->d4->lodma) - 1));
	if (sr & 0x40000000) {
		uint32_t tmp;

		if (unlikely(!men->d4->upper_config)) {
			WARN_ON(1);
		} else {
			tmp = ioread32(men->runtime_base + ME4_IRQSTATUS +
					ME4_FULLOFFSET);

			iowrite32(0x40000000, men->runtime_base + ME4_IRQACK);
			if (tmp != -1)
				st |= ((tmp & 0xff) << men->d4->lodma);
		}
	}

	for (dma = 0; dma < men->num_dma; dma++) {
		if (st & (0x1 << dma)) {
			struct menable_dmachan *db = men->dmas[dma];
			void __iomem *dmabase = db->iobase;
			void __iomem *lenaddr = dmabase + ME4_DMALENGTH;
			void __iomem *tagaddr = dmabase + ME4_DMATAG;
			uint32_t ic, delta;
			int i;

			spin_lock(&db->chanlock);
			iowrite32(1 << db->ackbit, db->irqack);
			ic = ioread32(dmabase + ME4_DMACOUNT);
			spin_lock(&db->listlock);
			if (unlikely(db->active == NULL)) {
				for (i = ic - db->imgcnt; i > 0; i--) {
					uint32_t tmp = ioread32(lenaddr);
					tmp = ioread32(tagaddr);
					db->lost++;
				}
				spin_unlock(&db->listlock);
				spin_unlock(&db->chanlock);
				continue;
			}

			delta = ic - db->imgcnt;
			for (i = delta; i > 0; i--) {
				struct menable_dmabuf *sb = men_move_hot(db);
				uint32_t len = ioread32(lenaddr);
				uint32_t tag = ioread32(tagaddr);

				if (unlikely(sb != NULL)) {
					sb->dma_length = len;
					sb->dma_tag = tag;
				}
			}

			if (db->cpl && (db->goodcnt >= db->cplimg))
				complete(db->cpl);

			if (likely(db->transfer_todo > 0)) {
				unsigned int sbcnt = min(
						ME4_DMA_FIFO_DEPTH - db->hot,
						db->transfer_todo - db->hot);

				if (delta)
					me_queue_dma(db, sbcnt);
				spin_unlock(&db->listlock);
				mod_timer(&db->timer, jiffies + db->timeout);
			} else {
				spin_unlock(&db->listlock);
				db->running = 3;
				schedule_work(&db->dwork);
			}
			spin_unlock(&db->chanlock);
		}
	}

	st = (sr & 0x3fff0000);
	if (st != 0) {
		uint32_t bit;
		for (bit = ME4_IRQQ_LOW; (bit <= ME4_IRQQ_HIGH) && st; bit++) {
			if (st & (1 << bit)) {
				uiq_irq(men->uiqs[bit - ME4_IRQQ_LOW]);
				st ^= (1 << bit);
			}
		}
	}

	return IRQ_HANDLED;
}

static void
men4_abort_dma(struct siso_menable *men, struct menable_dmachan *dc)
{
	iowrite32(2, dc->iobase + ME4_DMACTRL);
	wmb();
	iowrite32(0, dc->iobase + ME4_DMACTRL);
	wmb();
}

static void
men4_stop_dma(struct siso_menable *men, struct menable_dmachan *dc)
{
	uint32_t irqreg;
	unsigned long flags;

	irqreg = ioread32(dc->irqenable);
	irqreg &= ~(1 << dc->enablebit);
	iowrite32(irqreg, dc->irqenable);

	iowrite32(0, dc->iobase + ME4_DMACTRL);
	wmb();

	spin_lock_irqsave(&men->d4->irqmask_lock, flags);
	if (dc->number < men->d4->lodma)
		men->d4->irq_wanted &= ~(1 << dc->enablebit);
	spin_unlock_irqrestore(&men->d4->irqmask_lock, flags);
}

static int
me4_create_userbuf(struct siso_menable *men, struct menable_dmabuf *db)
{
	struct men_dma_chain *cur;
	int i;

	db->dmat->pcie = pci_pool_alloc(men->pool, GFP_USER, &db->dma);
	if (!db->dmat->pcie)
		goto fail_pcie;
	memset(db->dmat->pcie, 0, sizeof(*db->dmat->pcie));

	cur = db->dmat;

	for (i = 0; i < db->nents; i++) {
		int idx = i % ARRAY_SIZE(cur->pcie->addr);

		cur->pcie->addr[idx] =
				cpu_to_le64(sg_dma_address(db->sg + i) + 0x1);

		if ((idx == ARRAY_SIZE(cur->pcie->addr) - 1) &&
						(i + 1 < db->nents)) {
			dma_addr_t next;

			cur->next = kzalloc(sizeof(*cur->next), GFP_USER);
			if (!cur->next)
				goto fail;

			cur->next->pcie = pci_pool_alloc(men->pool,
					GFP_USER, &next);
			if (!cur->next->pcie) {
				kfree(cur->next);
				cur->next = NULL;
				goto fail;
			}
			cur->pcie->next = cpu_to_le64(next + 0x2);
			cur = cur->next;
			memset(cur->pcie, 0, sizeof(*cur->pcie));
		}
	}
	cur->pcie->next = men->d4->dummybuf.dmat->pcie->next;

	return 0;
fail:
	me4_free_sgl(men, db);
	return -ENOMEM;
fail_pcie:
	kfree(db->dmat);
	return -ENOMEM;
}

static int
men4_create_dummybuf(struct siso_menable *men)
{
	struct men_dma_chain *cur;
	struct menable_dmabuf *db = &men->d4->dummybuf;
	int i;
	dma_addr_t pagedma;

	db->index = -1;
	db->dmat = kzalloc(sizeof(*db->dmat), GFP_KERNEL);
	if (!db->dmat)
		goto fail_dmat;

	db->dmat->pcie = pci_pool_alloc(men->pool, GFP_USER, &db->dma);
	if (!db->dmat->pcie)
		goto fail_pcie;
	memset(db->dmat->pcie, 0, sizeof(*db->dmat->pcie));

	men->d4->dummypage = pci_alloc_consistent(men->pdev, 4096, &pagedma);
	if (men->d4->dummypage == NULL)
		goto fail_page;

	cur = db->dmat;

	for (i = 0; i < ARRAY_SIZE(cur->pcie->addr); i++)
		cur->pcie->addr[i] = cpu_to_le64(pagedma + 0x1);

	cur->pcie->next = cpu_to_le64(db->dma + 0x2);
	return 0;
fail_page:
	pci_pool_free(men->pool, db->dmat->pcie, db->dma);
fail_pcie:
	kfree(db->dmat);
fail_dmat:
	return -ENOMEM;
}

static void
men4_destroy_dummybuf(struct siso_menable *men)
{
	uint64_t pg = le64_to_cpu(men->d4->dummybuf.dmat->pcie->addr[0]) - 1;
#if BITS_PER_LONG > 32
	dma_addr_t dmaaddr = pg;
#else
	dma_addr_t dmaaddr = (uint32_t) (pg & 0xffffffff);
#endif

	pci_free_consistent(men->pdev, 4096, men->d4->dummypage, dmaaddr);
	pci_pool_free(men->pool, men->d4->dummybuf.dmat->pcie,
					men->d4->dummybuf.dma);
	kfree(men->d4->dummybuf.dmat);
}

static void
me4_exit(struct siso_menable *men)
{
	me4_remove_sysfs(men);
	men4_destroy_dummybuf(men);
	kfree(men->uiqs);
	kfree(men->d4);
}

static unsigned int
me4_query_dma(struct siso_menable *men)
{
	uint32_t u, d;

	d = ioread32(men->runtime_base + ME4_NUMDMA);

	if (unlikely(d == 0xffffffff)) {
		dev_warn(&men->dev, "Reading DMACNT from bridge failed\n");
		men->d4->lodma = 0;
		return 0;
	}

	men->d4->lodma = d;

	if (men->d4->upper_config) {
		u = ioread32(men->runtime_base + ME4_FULLOFFS + ME4_NUMDMA);
		if (unlikely(u == 0xffffffff)) {
			dev_warn(&men->dev,
				"Reading DMACNT from upper FPGA failed\n");
			u = 0;
		}
	} else {
		u = 0;
	}

	dev_dbg(&men->dev, "%i DMA channels detected (lo %i hi %i)\n",
			u + d, d, u);

	return u + d;
}

static int
men4_startdma(struct menable_dmachan *dmac)
{
	uint32_t tmp, dir;
	unsigned long flags;

	men4_abort_dma(dmac->parent, dmac);

	dir = (dmac->direction == PCI_DMA_TODEVICE) ? 2 : 1;

	tmp = ioread32(dmac->iobase + ME4_DMATYPE);
	if (!(tmp & dir))
		return -EACCES;
	iowrite32(dir, dmac->iobase + ME4_DMATYPE);

	/* clear IRQ */
	iowrite32(1 << dmac->ackbit, dmac->irqack);

	dmac->imgcnt = ioread32(dmac->iobase + ME4_DMACOUNT);

	me_queue_dma(dmac, min(dmac->transfer_todo, ME4_DMA_FIFO_DEPTH));

	spin_lock_irqsave(&dmac->parent->d4->irqmask_lock, flags);
	if (dmac->number < dmac->parent->d4->lodma)
		dmac->parent->d4->irq_wanted |= (1 << dmac->enablebit);
	tmp = ioread32(dmac->irqenable);
	iowrite32(tmp | (1 << dmac->enablebit), dmac->irqenable);
	spin_unlock_irqrestore(&dmac->parent->d4->irqmask_lock, flags);

	iowrite32(1, dmac->iobase + ME4_DMAACTIVE);
	ioread32(dmac->iobase + ME4_DMAACTIVE);
	iowrite32(0, dmac->iobase + ME4_DMAACTIVE);
	ioread32(dmac->iobase + ME4_DMAACTIVE);
	iowrite32(1, dmac->iobase + ME4_DMACTRL);
	ioread32(dmac->iobase + ME4_DMACTRL);

	return 0;
}

static void
men4_dmabase(struct siso_menable *men, struct menable_dmachan *dc)
{
	void __iomem *addrbase;

	if (dc->number < men->d4->lodma) {
		dc->ackbit = dc->number;
		addrbase = men->runtime_base;
	} else {
		dc->ackbit = dc->number - men->d4->lodma;
		addrbase = men->runtime_base + ME4_FULLOFFS;
	}

	dc->iobase =  addrbase + ME4_DMAOFFS + ME4_DMASZ * dc->ackbit;
	dc->irqack = addrbase + ME4_IRQACK;
	dc->irqenable = addrbase + ME4_IRQENABLE;
	dc->enablebit = dc->ackbit;
}

static void
men4_stopirq(struct siso_menable *men)
{
	unsigned int i;

	iowrite32(0, men->runtime_base + ME4_IRQENABLE);
	if (men->d4->upper_config) {
		iowrite32(0, men->runtime_base + ME4_FULLOFFSET + ME4_IFCONTROL);
		men->d4->upper_config = 0;
	}
	iowrite32(0, men->runtime_base + ME4_IFCONTROL);

	for (i = 0; i < men->num_uiq; i++)
		men->uiqs[i]->running = false;
}

static void
men4_startirq(struct siso_menable *men)
{
	uint32_t mask = ME4_IRQMASK;

	men->d4->irq_wanted = mask;
	iowrite32(0xffffffff, men->runtime_base + ME4_IRQACK);
	iowrite32(mask, men->runtime_base + ME4_IRQENABLE);
}

/**
 * men4_reset_core - reset state machines near the PCIe core
 * @men: board to reset
 *
 * This will reset the state machines and logic directly connected to the
 * PCIe core.
 */
static void
men4_reset_core(struct siso_menable *men)
{
	int i;

	iowrite32(0xa, men->runtime_base + ME4_IFCONTROL);
	iowrite32(0xa, men->runtime_base + ME4_IFCONTROL);
	for (i = 0; i < 4; i++)
		iowrite32(0x8, men->runtime_base + ME4_IFCONTROL);
	for (i = 0; i < 6; i++)
		iowrite32(0, men->runtime_base + ME4_IFCONTROL);

	men4_reset_vlink(men, 0);
}

static struct menable_dmabuf *
me4_dummybuf(struct menable_dmachan *dc)
{
	return &dc->parent->d4->dummybuf;
}

static struct lock_class_key me4_irqmask_lock;

int
me4_probe(struct siso_menable *men)
{
	int ret = -ENOMEM;
	unsigned char i;
	unsigned int dmat;
	unsigned int uiqoffs;

	men->d4 = kzalloc(sizeof(*men->d4), GFP_KERNEL);
	if (men->d4 == NULL)
		goto fail;

	spin_lock_init(&men->d4->irqmask_lock);
	lockdep_set_class(&men->d4->irqmask_lock, &me4_irqmask_lock);

	men4_reset_core(men);
	men4_stopirq(men);

	if (pci_set_dma_mask(men->pdev, DMA_BIT_MASK(64))) {
		dev_err(&men->dev, "No suitable DMA available.\n");
		goto fail_mask;
	}
	pci_set_consistent_dma_mask(men->pdev, DMA_BIT_MASK(64));
	men->pool = pci_pool_create("me4_sgl", men->pdev,
			sizeof(struct me4_sgl), 128, 4096);
	if (!men->pool) {
		dev_err(&men->dev, "can not allocate DMA pool\n");
		goto fail_pool;
	}

	ret = men4_create_dummybuf(men);
	if (ret) {
		dev_err(&men->dev, "can not allocate dummy buffer\n");
		goto fail_dummy;
	}

	men->create_buf = me4_create_userbuf;
	men->free_buf = me4_free_sgl;
	men->startdma = men4_startdma;
	men->abortdma = men4_abort_dma;
	men->stopdma = men4_stop_dma;
	men->stopirq = men4_stopirq;
	men->startirq = men4_startirq;
	men->ioctl = men4_ioctl;
	men->exit = me4_exit;
	men->query_dma = me4_query_dma;
	men->dmabase = men4_dmabase;
	men->queue_sb = me4_queue_sb;
	men->dummybuf = me4_dummybuf;

	men->num_uiq = ioread32(men->runtime_base + ME4_UIQCNT);
	uiqoffs = ioread32(men->runtime_base + ME4_FIRSTUIQ);

	if ((men->num_uiq == 0) && (uiqoffs == 0)) {
		men->num_uiq = ME4_IRQQ_HIGH - ME4_IRQQ_LOW + 1;
		uiqoffs = ME4_IRQQUEUE;
	}

	if (men->num_uiq != 0) {
		dmat = ioread32(men->runtime_base + ME4_IRQTYPE);

		men->uiqs = kcalloc(men->num_uiq, sizeof(*men->uiqs), GFP_KERNEL);

		for (i = 0; i < men->num_uiq; i++) {
			struct menable_uiq *uiq;

			int t = dmat & (1 << (i + ME4_IRQQ_LOW));

			uiq = men_uiq_init(i, men->runtime_base + uiqoffs + 8 * i,
					men, t, t ? 8 : 0);

			if (IS_ERR(uiq)) {
				ret = PTR_ERR(uiq);
				goto fail_uiq;
			}

			uiq->irqack = men->runtime_base + ME4_IRQACK;
			uiq->ackbit = i + ME4_IRQQ_LOW;

			men->uiqs[i] = uiq;
		}
	}

#if 0
	ret = pci_enable_msi(men->pdev);
	if (ret)
		dev_info(&men->dev, "can't enable MSI\n");
#endif

	ret = me4_create_sysfs(men);
	if (ret)
		goto fail_sysfs;

	men->desname = men->d4->design_name;
	men->deslen = sizeof(men->d4->design_name);
	ret = devm_request_irq(&men->pdev->dev, men->pdev->irq, me4_irq,
				IRQF_SHARED, DRIVER_NAME, men);
	if (ret) {
		dev_err(&men->dev, "can't request interrupt\n");
		goto fail_irq;
	}

	return 0;
fail_irq:
	me4_remove_sysfs(men);
fail_sysfs:
	i = men->num_uiq;
fail_uiq:
	while (i > 0) {
		men_uiq_remove(men->uiqs[i]);
		i--;
	}
	kfree(men->uiqs);
	men4_destroy_dummybuf(men);
fail_dummy:
	pci_pool_destroy(men->pool);
fail_pool:
fail_mask:
	kfree(men->d4);
fail:
	return ret;
}
