Linux Misc 设备:具有私有数据分配的驱动程序上的 Misc_deregister() 处的一般保护错误

Ema*_*max 7 c linux linux-device-driver linux-kernel

我正在 Linux 中编写一个简单的字符驱动程序,用于学习 Linux 内核开发的教育目的。为此,我创建了一个杂项设备,并在我的过程中动态分配其私有数据misc_init(),并在misc_exit(). 即使驱动程序还没有其他代码,我在执行sudo rmmod sepehr.

这是我的驱动程序代码:

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>

// Private data structure
struct sepehr_private {
    struct mutex mtx;
    char *message;
    size_t size;
};

// File Operations - Open
static int sepehr_open(struct inode *inode, struct file *file)
{
    return 0;
}

// File Operations - Release
static int sepehr_release(struct inode *inode, struct file *file)
{
    return 0;
}

// File Operations - Read
static ssize_t sepehr_read(struct file *file, char *buffer, size_t len,
               loff_t *offset)
{
    return 0;
}

// File Operations - Write
static ssize_t sepehr_write(struct file *file, const char *buffer, size_t len,
                loff_t *offset)
{
    return len;
}

// File Operations - Ioctl
static long sepehr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
    return 0;
}

// File Operations - Llseek
static loff_t sepehr_llseek(struct file *file, loff_t offset, int whence)
{
    return 0;
}

// File operations structure
static const struct file_operations fops = {
    .owner = THIS_MODULE,
    .open = sepehr_open,
    .release = sepehr_release,
    .read = sepehr_read,
    .write = sepehr_write,
    .unlocked_ioctl = sepehr_ioctl,
    .llseek = sepehr_llseek,
};

// Misc device structure
static struct miscdevice misc_device = {
    .minor = MISC_DYNAMIC_MINOR,
    .name = "sepehr",
    .fops = &fops,
    .mode = 0666,
};

// Module init function
static int __init misc_init(void)
{
    int ret;
    struct sepehr_private *priv;

    priv = kzalloc(sizeof(*priv), GFP_KERNEL);
    if (!priv) {
        printk("Unable to allocate memory for sepehr_private\n");
        return -ENOMEM;
    }

    mutex_init(&priv->mtx);
    priv->message = NULL;
    priv->size = 0;

    ret = misc_register(&misc_device);
    if (ret) {
        kfree(priv); // free the memory in case of registration failure
        return ret;
    }

    misc_device.this_device->driver_data = priv;

    return ret;
}

// Module exit function
static void __exit misc_exit(void)
{
    struct sepehr_private *priv = misc_device.this_device->driver_data;
    misc_deregister(&misc_device);

    mutex_destroy(&priv->mtx);
    kfree(priv->message);
    kfree(priv);
}

module_init(misc_init);
module_exit(misc_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Sepehr");
MODULE_DESCRIPTION("It's fake. Just for testing");
MODULE_VERSION("0.1");
Run Code Online (Sandbox Code Playgroud)

当我重复加载和卸载时(因为我会测试我将编写的代码的新部分),最终我会从rmmod. 然后,驱动程序处于卡住模式并且无法删除,我只能重新启动我的虚拟机。也很奇怪的是,这种情况会在我几次之后发生insmodrmmod而不一定是在第一次尝试时发生。所以我怀疑这是一些未定义的行为,带有悬空指针或内存损坏。

这是要检查的 dmesg 的完整输出,但简而言之,它遇到一般保护故障 at misc_deregister,并且RIPis at strlen()、 from kstrdup()、 from device_get_devnode()

dmesg 完整日志:

[ 1041.925268] sepehr: loading out-of-tree module taints kernel.
[ 1041.925585] sepehr: module verification failed: signature and/or required key missing - tainting kernel
[ 1044.099974] general protection fault, probably for non-canonical address 0x7304240700013200: 0000 [#1] PREEMPT SMP PTI
[ 1044.100002] CPU: 1 PID: 2300 Comm: rmmod Tainted: G           OE      6.3.12-100.fc37.x86_64 #1
[ 1044.100016] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[ 1044.100027] RIP: 0010:strlen+0x4/0x30
[ 1044.100039] Code: f7 75 ec 31 c0 c3 cc cc cc cc 48 89 f8 c3 cc cc cc cc 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa <80> 3f 00 74 14 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 cc
[ 1044.100063] RSP: 0018:ffffa70fc140fd60 EFLAGS: 00010206
[ 1044.100073] RAX: ffff94cff6bc5b00 RBX: 7304240700013200 RCX: 0000000000000000
[ 1044.100084] RDX: 0000000000000000 RSI: 0000000000000cc0 RDI: 7304240700013200
[ 1044.100095] RBP: ffff94cff6fed000 R08: ffffa70fc140fdb0 R09: 000000008020001d
[ 1044.100106] R10: ffffa70fc140fdc8 R11: ffffa70fc140fdd0 R12: 0000000000000cc0
[ 1044.100116] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 1044.100127] FS:  00007ff614371740(0000) GS:ffff94d19c440000(0000) knlGS:0000000000000000
[ 1044.100139] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1044.100148] CR2: 000055cb8d5e7468 CR3: 000000013687a006 CR4: 00000000000306e0
[ 1044.100160] Call Trace:
[ 1044.100168]  <TASK>
[ 1044.100175]  ? die_addr+0x36/0x90
[ 1044.100187]  ? exc_general_protection+0x1be/0x420
[ 1044.100198]  ? asm_exc_general_protection+0x26/0x30
[ 1044.100211]  ? strlen+0x4/0x30
[ 1044.100218]  kstrdup+0x1d/0x70
[ 1044.100229]  device_get_devnode+0x70/0xe0
[ 1044.100241]  devtmpfs_delete_node+0x53/0xa0
[ 1044.100252]  device_del+0x338/0x3e0
[ 1044.100264]  device_unregister+0x17/0x60
[ 1044.100273]  misc_deregister+0x70/0xe0
[ 1044.100285]  misc_exit+0x1c/0xf20 [sepehr]
[ 1044.100296]  __do_sys_delete_module.constprop.0+0x19b/0x2f0
[ 1044.100309]  do_syscall_64+0x5f/0x90
[ 1044.100319]  ? do_syscall_64+0x6b/0x90
[ 1044.100328]  ? exc_page_fault+0x74/0x170
[ 1044.100337]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
[ 1044.100348] RIP: 0033:0x7ff613d2fe5b
[ 1044.100367] Code: 73 01 c3 48 8b 0d d5 5f 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d a5 5f 0c 00 f7 d8 64 89 01 48
[ 1044.100390] RSP: 002b:00007ffedc00adb8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
[ 1044.100402] RAX: ffffffffffffffda RBX: 000055cb8d5dc760 RCX: 00007ff613d2fe5b
[ 1044.100430] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000055cb8d5dc7c8
[ 1044.100441] RBP: 0000000000000000 R08: 1999999999999999 R09: 0000000000000000
[ 1044.100451] R10: 00007ff613da0ac0 R11: 0000000000000206 R12: 00007ffedc00b010
[ 1044.100461] R13: 00007ffedc00c799 R14: 000055cb8d5dc2a0 R15: 00007ffedc00b018
[ 1044.100473]  </TASK>
[ 1044.100478] Modules linked in: sepehr(OE-) nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set rfkill nf_tables nfnetlink qrtr sunrpc snd_intel8x0 intel_rapl_msr snd_ac97_codec intel_rapl_common ac97_bus snd_seq snd_seq_device snd_pcm snd_timer rapl joydev snd i2c_piix4 vboxguest soundcore fuse loop zram xfs crct10dif_pclmul crc32_pclmul crc32c_intel polyval_generic ghash_clmulni_intel sha512_ssse3 serio_raw vmwgfx e1000 video drm_ttm_helper ttm wmi ata_generic pata_acpi scsi_dh_rdac scsi_dh_emc scsi_dh_alua dm_multipath
[ 1044.100508] Unloaded tainted modules: sepehr(OE):1 [last unloaded: sepehr(OE)]
[ 1044.102506] ---[ end trace 0000000000000000 ]---
[ 1044.102785] RIP: 0010:strlen+0x4/0x30
[ 1044.103049] Code: f7 75 ec 31 c0 c3 cc cc cc cc 48 89 f8 c3 cc cc cc cc 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa <80> 3f 00 74 14 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 cc
[ 1044.103788] RSP: 0018:ffffa70fc140fd60 EFLAGS: 00010206
[ 1044.104058] RAX: ffff94cff6bc5b00 RBX: 7304240700013200 RCX: 0000000000000000
[ 1044.104328] RDX: 0000000000000000 RSI: 0000000000000cc0 RDI: 7304240700013200
[ 1044.104577] RBP: ffff94cff6fed000 R08: ffffa70fc140fdb0 R09: 000000008020001d
[ 1044.104825] R10: ffffa70fc140fdc8 R11: ffffa70fc140fdd0 R12: 0000000000000cc0
[ 1044.105313] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 1044.105574] FS:  00007ff614371740(0000) GS:ffff94d19c440000(0000) knlGS:0000000000000000
[ 1044.105831] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1044.106087] CR2: 000055cb8d5e7468 CR3: 000000013687a006 CR4: 00000000000306e0
Run Code Online (Sandbox Code Playgroud)

作为参考,我在6.3.12-100.fc37.x86_64两个独立的干净虚拟机上的 Fedora 内核上进行了测试。两者相同的问题。我尝试了很多方法,但找不到解决方案或看出哪里出了问题。

我原来的驱动程序代码要长得多,我将其精简为这样。删除数据分配还可以避免驱动程序遇到错误。我的错误必须来自我分配数据的方式。我从 elixir.bootlin.com 探索了其他杂项驱动程序,但也找不到类似的示例来重现我的代码。我想知道是否有一种正确的方法来初始化驱动程序的数据,而不会以这种方式对内核进行操作。

编辑:我还应该提到,我想保持数据动态分配,而不是作为全局静态非常量变量进行跟踪。

小智 1

this_device->driver_data成员在内部使用,因此您不应覆盖它。