Linux DMA：使用 DMAengine 进行分散-聚集交易

Question

Linux DMA：使用 DMAengine 进行分散-聚集交易

kly*_*one 4 c linux linux-device-driver linux-kernel dma

我尝试使用来自自定义内核驱动程序的 DMAengine API 来执行分散-聚集操作。我有一个连续的存储区域作为源极和我想要通过在几个分布式缓冲器以复制其数据散布表结构。DMA 控制器是支持 DMAengine API 的PL330 控制器（请参阅PL330 DMA 控制器）。

我的测试代码如下：

在我的驱动程序头文件 ( test_driver.h) 中：

#ifndef __TEST_DRIVER_H__
#define __TEST_DRIVER_H__

#include <linux/platform_device.h>
#include <linux/device.h>

#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/of_dma.h>

#define SG_ENTRIES 3
#define BUF_SIZE 16
#define DEV_BUF 0x10000000

struct dma_block {
    void * data;
    int size;
};

struct dma_private_info {

    struct sg_table sgt;

    struct dma_block * blocks;
    int nblocks;

    int dma_started;

    struct dma_chan * dma_chan;
    struct dma_slave_config dma_config;
    struct dma_async_tx_descriptor * dma_desc;
    dma_cookie_t cookie;
};

struct test_platform_device {
    struct platform_device * pdev;

    struct dma_private_info dma_priv;
};

#define _get_devp(tdev) (&((tdev)->pdev->dev))
#define _get_dmapip(tdev) (&((tdev)->dma_priv))

int dma_stop(struct test_platform_device * tdev);
int dma_start(struct test_platform_device * tdev);
int dma_start_block(struct test_platform_device * tdev);
int dma_init(struct test_platform_device * tdev);
int dma_exit(struct test_platform_device * tdev);

#endif

Run Code Online (Sandbox Code Playgroud)

在我包含 dma 函数 ( dma_functions.c) 的源代码中：

#include <linux/slab.h>

#include "test_driver.h"

#define BARE_RAM_BASE 0x10000000
#define BARE_RAM_SIZE 0x10000000

struct ram_bare {
    uint32_t * __iomem map;

    uint32_t base;
    uint32_t size;
};

static void dma_sg_check(struct test_platform_device * tdev)
{
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);
    uint32_t * buf;
    unsigned int bufsize;
    int nwords;
    int nbytes_word = sizeof(uint32_t);
    int nblocks;
    struct ram_bare ramb;
    uint32_t * p;
    int i;
    int j;

    ramb.map = ioremap(BARE_RAM_BASE,BARE_RAM_SIZE);
    ramb.base = BARE_RAM_BASE;
    ramb.size = BARE_RAM_SIZE;

    dev_info(dev,"nblocks: %d \n",dma_priv->nblocks);

    p = ramb.map;

    nblocks = dma_priv->nblocks;

    for( i = 0 ; i < nblocks ; i++ ) {

        buf = (uint32_t *) dma_priv->blocks[i].data;
        bufsize = dma_priv->blocks[i].size;
        nwords = dma_priv->blocks[i].size/nbytes_word;

        dev_info(dev,"block[%d],size %d: ",i,bufsize);

        for ( j = 0 ; j <  nwords; j++, p++) {
            dev_info(dev,"DMA: 0x%x, RAM: 0x%x",buf[j],ioread32(p));
        }
    }

    iounmap(ramb.map);
}

static int dma_sg_exit(struct test_platform_device * tdev)
{
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    int ret = 0;
    int i;

    for( i = 0 ; i < dma_priv->nblocks ; i++ ) {
        kfree(dma_priv->blocks[i].data);
    }

    kfree(dma_priv->blocks);

    sg_free_table(&(dma_priv->sgt));

    return ret;
}

int dma_stop(struct test_platform_device * tdev)
{
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);
    int ret = 0;

    dma_unmap_sg(dev,dma_priv->sgt.sgl,\
        dma_priv->sgt.nents, DMA_FROM_DEVICE);

    dma_sg_exit(tdev);

    dma_priv->dma_started = 0;

    return ret;
}

static void dma_callback(void * param)
{
    enum dma_status dma_stat;
    struct test_platform_device * tdev = (struct test_platform_device *) param;
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);

    dev_info(dev,"Checking the DMA state....\n");

    dma_stat = dma_async_is_tx_complete(dma_priv->dma_chan,\
        dma_priv->cookie, NULL, NULL);

    if(dma_stat == DMA_COMPLETE) {
        dev_info(dev,"DMA complete! \n");
        dma_sg_check(tdev);
        dma_stop(tdev);
    } else if (unlikely(dma_stat == DMA_ERROR)) {
        dev_info(dev,"DMA error! \n");
        dma_stop(tdev);
    }
}

static void dma_busy_loop(struct test_platform_device * tdev)
{
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);

    enum dma_status status;
    int status_change = -1;

    do {
        status = dma_async_is_tx_complete(dma_priv->dma_chan, dma_priv->cookie, NULL, NULL);

        switch(status) {
        case DMA_COMPLETE:
            if(status_change != 0)
                dev_info(dev,"DMA status: COMPLETE\n");
            status_change = 0;
            break;
        case DMA_PAUSED:
            if (status_change != 1)
                dev_info(dev,"DMA status: PAUSED\n");
            status_change = 1;
            break;
        case DMA_IN_PROGRESS:
            if(status_change != 2)
                dev_info(dev,"DMA status: IN PROGRESS\n");
            status_change = 2;
            break;
        case DMA_ERROR:
            if (status_change != 3)
                dev_info(dev,"DMA status: ERROR\n");
            status_change = 3;
            break;
        default:
            dev_info(dev,"DMA status: UNKNOWN\n");
            status_change = -1;
            break;
        }
    } while(status != DMA_COMPLETE);

    dev_info(dev,"DMA transaction completed! \n");
}

static int dma_sg_init(struct test_platform_device * tdev)
{

    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct scatterlist *sg;
    int ret = 0;
    int i;

    ret = sg_alloc_table(&(dma_priv->sgt), SG_ENTRIES, GFP_ATOMIC);
    if(ret)
        goto out_mem2;

    dma_priv->nblocks = SG_ENTRIES;
    dma_priv->blocks = (struct dma_block *) kmalloc(dma_priv->nblocks\
        *sizeof(struct dma_block), GFP_ATOMIC);
    if(dma_priv->blocks == NULL) 
         goto out_mem1;


    for( i = 0 ; i < dma_priv->nblocks ; i++ ) {
        dma_priv->blocks[i].size = BUF_SIZE;
        dma_priv->blocks[i].data = kmalloc(dma_priv->blocks[i].size, GFP_ATOMIC);
        if(dma_priv->blocks[i].data == NULL)
            goto out_mem3;
    }

    for_each_sg(dma_priv->sgt.sgl, sg, dma_priv->sgt.nents, i)
        sg_set_buf(sg,dma_priv->blocks[i].data,dma_priv->blocks[i].size);

    return ret;

out_mem3:
    i--;

    while(i >= 0)
        kfree(dma_priv->blocks[i].data);

    kfree(dma_priv->blocks);

out_mem2:
    sg_free_table(&(dma_priv->sgt));

out_mem1:
    ret = -ENOMEM;  

    return ret;

}

static int _dma_start(struct test_platform_device * tdev,int block)
{
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);
    int ret = 0;
    int sglen;

    /* Step 1: Allocate and initialize the SG list */
    dma_sg_init(tdev);

    /* Step 2: Map the SG list */
    sglen = dma_map_sg(dev,dma_priv->sgt.sgl,\
        dma_priv->sgt.nents, DMA_FROM_DEVICE);
    if(! sglen)
        goto out2;

    /* Step 3: Configure the DMA */
    (dma_priv->dma_config).direction = DMA_DEV_TO_MEM;
    (dma_priv->dma_config).src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
    (dma_priv->dma_config).src_maxburst = 1;
    (dma_priv->dma_config).src_addr = (dma_addr_t) DEV_BUF;

    dmaengine_slave_config(dma_priv->dma_chan, \
        &(dma_priv->dma_config));

    /* Step 4: Prepare the SG descriptor */
    dma_priv->dma_desc = dmaengine_prep_slave_sg(dma_priv->dma_chan, \
        dma_priv->sgt.sgl, dma_priv->sgt.nents, DMA_DEV_TO_MEM, \
        DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
    if (dma_priv->dma_desc == NULL) {
        dev_err(dev,"DMA could not assign a descriptor! \n");
        goto out1;
    }

    /* Step 5: Set the callback method */
    (dma_priv->dma_desc)->callback = dma_callback;
    (dma_priv->dma_desc)->callback_param = (void *) tdev;

    /* Step 6: Put the DMA descriptor in the queue */
    dma_priv->cookie = dmaengine_submit(dma_priv->dma_desc);

    /* Step 7: Fires the DMA transaction */
    dma_async_issue_pending(dma_priv->dma_chan);

    dma_priv->dma_started = 1;

    if(block)
        dma_busy_loop(tdev);

    return ret;

out1:
    dma_stop(tdev);
out2:
    ret = -1;

    return ret;
}

int dma_start(struct test_platform_device * tdev) {
    return _dma_start(tdev,0);
}

int dma_start_block(struct test_platform_device * tdev) {
    return _dma_start(tdev,1);
}

int dma_init(struct test_platform_device * tdev)
{
    int ret = 0;
    struct dma_private_info * dma_priv = _get_dmapip(tdev);
    struct device * dev = _get_devp(tdev);

    dma_priv->dma_chan = dma_request_slave_channel(dev, \
        "dma_chan0");
    if (dma_priv->dma_chan == NULL) {
        dev_err(dev,"DMA channel busy! \n");
        ret = -1;
    }

    dma_priv->dma_started = 0;

    return ret;
}

int dma_exit(struct test_platform_device * tdev)
{
    int ret = 0;
    struct dma_private_info * dma_priv = _get_dmapip(tdev);

    if(dma_priv->dma_started) {
        dmaengine_terminate_all(dma_priv->dma_chan);
        dma_stop(tdev);
        dma_priv->dma_started = 0;
    }

    if(dma_priv->dma_chan != NULL)
        dma_release_channel(dma_priv->dma_chan);

    return ret;
}

Run Code Online (Sandbox Code Playgroud)

在我的驱动程序源文件 ( test_driver.c) 中：

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/version.h>
#include <linux/device.h>
#include <linux/platform_device.h>
#include <linux/of_device.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/interrupt.h>

#include "test_driver.h"

static int dma_block=0;
module_param_named(dma_block, dma_block, int, 0444);

static struct test_platform_device tdev;

static struct of_device_id test_of_match[] = {
  { .compatible = "custom,test-driver-1.0", },
  {}
};

static int test_probe(struct platform_device *op)
{
    int ret = 0;
    struct device * dev = &(op->dev);

    const struct of_device_id *match = of_match_device(test_of_match, &op->dev);

    if (!match)
        return -EINVAL;

    tdev.pdev = op;

    dma_init(&tdev);

    if(dma_block)
        ret = dma_start_block(&tdev);
    else
        ret = dma_start(&tdev);

    if(ret) {
        dev_err(dev,"Error to start DMA transaction! \n");
    } else {
        dev_info(dev,"DMA OK! \n");
    }

    return ret;
}

static int test_remove(struct platform_device *op)
{       
    dma_exit(&tdev);

    return 0;
}

static struct platform_driver test_platform_driver = {
  .probe = test_probe,
  .remove = test_remove,
  .driver = {
    .name = "test-driver",
    .owner = THIS_MODULE,
    .of_match_table = test_of_match,
  },
};

static int test_init(void)
{
    platform_driver_register(&test_platform_driver);
    return 0;
}

static void test_exit(void)
{
    platform_driver_unregister(&test_platform_driver);
}

module_init(test_init);
module_exit(test_exit);

MODULE_AUTHOR("klyone");
MODULE_DESCRIPTION("DMA SG test module");
MODULE_LICENSE("GPL");

Run Code Online (Sandbox Code Playgroud)

但是，DMA 从不调用我的回调函数，我也不知道为什么会发生这种情况。也许，我误解了什么......

有人可以帮助我吗？

提前致谢。

Answer 1

Cra*_*tey 5

警告：我没有为您提供明确的解决方案，只是关于如何调试此问题的一些观察和建议 [基于多年编写/调试 linux 设备驱动程序的经验]。

我假设您认为回调没有完成，因为您没有收到任何 printk 消息。但是，回调是唯一拥有它们的地方。但是，printk 级别是否设置得足够高以查看消息？我会dev_info在您的模块初始化中添加一个，以证明它按预期打印。

此外，如果dma_start没有按预期工作，您 [可能] 将不会收到回调，因此我也会在dev_info那里添加一些调用（例如，在步骤 7 中的调用之前和之后）。我还注意到并非所有调用dma_start检查错误都会返回 [可能没问题或返回无效，只是提到以防万一您错过了]

此时需要注意的是，这里真的有两个问题：（1）你的DMA请求是否成功启动[并完成]？(2) 你收到回电了吗？

所以，我将一些代码从dma_completeinto (eg) 中分离出来dma_test_done。后者进行相同的检查，但只打印“完成”消息。您可以在轮询模式下调用它以验证 DMA 完成。

所以，如果你 [最终] 得到了一个完成，那么问题就归结为你没有得到回调的原因。然而，如果你[甚至]没有完成，那是一个更根本的问题。

这让我想起。您没有显示任何调用代码dma_start或如何等待完成。我认为如果您的回调正常工作，它会发出某种唤醒，基础级别会等待。或者，回调会执行请求解除分配/清理（即您编写的更多代码）

在第 7 步，您正在调用dma_async_issue_pending，它应该调用pl330_issue_pending。pl330_issue_pending会打电话pl330_tasklet。

pl330_tasklet是一个tasklet函数，但也可以直接调用它[在没有活动请求时启动 DMA]。

pl330_tasklet将循环其“工作”队列并将任何已完成的项目移动到其“已完成”队列。然后它尝试开始新的请求。然后它在其完成的队列上循环并发出回调。

pl330_tasklet抓取回调指针，但如果它为空，它会被默默地忽略。您已经设置了回调，但最好验证您设置回调的位置pl330_tasklet是否与从中获取回调的位置相同[或传播到] 。

当您拨打电话时，一切都可能很忙，因此没有已完成的请求，没有空间开始新的请求，因此没有要完成的任务。在这种情况下，pl330_tasklet稍后将再次调用。

所以，当dma_async_issue_pending返回时，没有任何可能已经发生尚未。对于您的情况，这很有可能。

pl330_tasklet尝试通过调用来启动新的 DMA fill_queue。它将通过查看来检查描述符是否 [已经] 忙碌status != BUSY。因此，您可能希望验证您的值是否正确。否则，您永远不会收到回调 [甚至任何 DMA 启动]。

然后，fill_queue将尝试通过pl330_submit_req. 但是，这可能会返回错误（例如队列已满），因此，再次推迟。

作为参考，请注意顶部的以下注释pl330_submit_req：

Submit a list of xfers after which the client wants notification.
Client is not notified after each xfer unit, just once after all
xfer units are done or some error occurs.

Run Code Online (Sandbox Code Playgroud)

我要做的是开始修改pl330.c并添加调试消息和交叉检查。如果您的系统使 pl330 为许多其他请求提供服务，您可以通过检查设备的私有数据指针是否与您的匹配来限制调试消息。

特别是，您希望在请求实际开始时收到一条消息，因此您可以在请求的末尾添加一条调试消息 pl330_submit_req

然后，在pl330_tasklet请求中添加消息也会有所帮助。

这是两个很好的起点。但是，不要害怕根据需要添加更多的 printk 调用。您可能会对什么被称为 [或不被称为] 或按什么顺序感到惊讶。

更新：

如果我安装具有阻塞行为的内核模块，那么一切都会很好地初始化。但是，dma_busy_loop 函数显示 DMA 描述符始终处于 IN PROGESS 并且 DMA 事务从未完成。为此，不执行回调函数。会发生什么？

又做了一点研究。Cookie 只是递增的序列号。例如，如果您发出一个请求，该请求被分解为 [比如说] 10 个单独的分散/聚集操作 [描述符]，那么每个操作都会获得一个唯一的 cookie 值。cookie 返回值是最新的/最后的（例如 10）。

当您调用 (1) 时dma_async_is_tx_complete，(2) 它调用chan->device->device_tx_status，(3) 即pl330_tx_status，(4) 调用dma_cookie_status

附注/提示：当我跟踪下来，我只是不停地来回翻转之间dmaengine.h和pl330.c。就像：看（1），它调用（2）。这是哪里设置的？在pl330.c，我想。所以，我搜索了字符串并得到了 pl330 函数的名称（即（3））。所以，我去那里，看到它（4）。所以……回到dmaengine.h……

但是，当您进行外部调用时，您忽略了 [setting to NULL] 最后两个参数。这些可能很有用，因为它们返回“最后”和“使用”的 cookie。因此，即使您没有完全完成，这些值也可能会发生变化并显示部分进度。

其中之一最终应该 >= 到“返回”cookie 值。(ie) 整个操作应该是完整的。因此，这将有助于区分可能发生的情况。

另外，请注意，在dmaengine.h，正下方dma_async_is_tx_complete，有dma_async_is_complete。此函数决定是否返回DMA_COMPLETEor DMA_IN_PROGRESS，基于您传递的 cookie 值以及“最后”和“已使用”的 cookie 值。它是被动的，不在代码路径 [AFAICT] 中使用，但它确实展示了如何自己计算完成度。

归档时间：	10 年前
查看次数：	3401 次
最近记录：	10 年前