voi*_*oid 1 c linux race-condition linux-device-driver linux-kernel
对于 linux 内核设备驱动程序,有file_operations
结构体或 fops 结构体,它允许驱动程序为各种文件操作定义处理程序。
我的问题是关于.release
fop 处理程序。
我知道只有在对象的最后一个文件描述符 (fd)关闭(或 munmapped)时才会调用release
处理程序。这是在调用并且到达 0 时完成的。file
fput
file
file->f_count
但是 - 我不清楚其他文件操作是否可以在另一个线程中同时运行 release
输入。
例如:
一个进程的 1 个线程是否可以在(或 fd)的ioctl
处理程序中file
,而同一进程的另一个线程在release
处理程序中?
可以release
成为file
对象竞争条件的一个因素吗?
进程的 1 个线程是否可以在文件(或 fd)的 ioctl 处理程序中,而同一进程的另一个线程在发布处理程序中?
否。当文件条目上的引用计数器为 0 时,会调用release入口点。ioctl()增加文件上的引用计数器。因此,当ioctl()在轨道上时,将不会调用release入口点。
下面讨论的源代码是:
The GLIBC's pthread_create() actually involves a clone() system call with the following flags:
CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID
Run Code Online (Sandbox Code Playgroud)
According to the manual of clone(), the CLONE_FILES flag makes the threads of a process
share the same file descriptor table. Any file descriptor created by
one thread is also valid in the other threads. Similarly, if one thread closes a file descriptor, or changes its associated flags (using the fcntl() F_SETFD operation), the other threads are also affected.
When clone() is passed CLONE_FILES, the files_struct is not duplicated but a reference counter is incremented. As a consequence, the task structures of both threads point on the same files_struct (files field):
. The task structure is defined in include/linux/sched.h:
CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID
Run Code Online (Sandbox Code Playgroud)
. In kernel/fork.c, the clone() service calls copy_files() to increment the reference counter on the files_struct
struct task_struct {
[...]
/* Open file information: */
struct files_struct *files; /// <==== Table of open files shared between thread
[...]
Run Code Online (Sandbox Code Playgroud)
. The files_struct is defined in include/linux/fdtable.h:
static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
{
struct files_struct *oldf, *newf;
int error = 0;
/*
* A background process may not have any files ...
*/
oldf = current->files;
if (!oldf)
goto out;
if (clone_flags & CLONE_FILES) {
atomic_inc(&oldf->count); // <==== Ref counter incremented: files_struct is shared
goto out;
}
newf = dup_fd(oldf, &error);
if (!newf)
goto out;
tsk->files = newf;
error = 0;
out:
return error;
}
Run Code Online (Sandbox Code Playgroud)
ioctl() system call is defined fs/ioctl.c. It calls fdget() first to increment the reference counter on the file entry, do the requested operation and then call fdput()
/*
* Open file table structure
*/
struct files_struct {
/*
* read mostly part
*/
atomic_t count; // <==== Reference counter
bool resize_in_progress;
wait_queue_head_t resize_wait;
struct fdtable __rcu *fdt;
struct fdtable fdtab;
/*
* written part on a separate cache line in SMP
*/
spinlock_t file_lock ____cacheline_aligned_in_smp;
unsigned int next_fd;
unsigned long close_on_exec_init[1];
unsigned long open_fds_init[1];
unsigned long full_fds_bits_init[1];
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
Run Code Online (Sandbox Code Playgroud)
The file entry is defined in include/linux/fs.h. Its reference counter is the f_count field:
int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
int error;
struct fd f = fdget(fd);
if (!f.file)
return -EBADF;
error = security_file_ioctl(f.file, cmd, arg);
if (!error)
error = do_vfs_ioctl(f.file, fd, cmd, arg);
fdput(f);
return error;
}
SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
return ksys_ioctl(fd, cmd, arg);
}
Run Code Online (Sandbox Code Playgroud)
Here is a simple device driver into which the file operations merely display a message when they are triggered. The ioctl() entry makes the caller sleep 5 seconds:
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
enum rw_hint f_write_hint;
atomic_long_t f_count; // <===== Reference counter
unsigned int f_flags;
[...]
} __randomize_layout
__attribute__((aligned(4)));
Run Code Online (Sandbox Code Playgroud)
Here is a user space program which involves the main thread and a secondary one. The main thread opens the above device and waits for the secondary thread to start (barrier) before closing the device after 1 second. Meanwhile, the secondary thread calls ioctl() on the above device which makes it sleep 5 seconds. Then it calls ioctl() a second time before exiting.
The expected behavior is to make the main thread close the device file while the secondary thread is running the ioctl().
#include <stdio.h>
#include <pthread.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <errno.h>
static int dev_fd;
static pthread_barrier_t barrier;
void *entry(void *arg)
{
int rc;
printf("Thread running...\n");
// Rendez-vous with main thread
pthread_barrier_wait(&barrier);
rc = ioctl(dev_fd, 0);
printf("rc = %d, errno = %d\n", rc, errno);
rc = ioctl(dev_fd, 0);
printf("rc = %d, errno = %d\n", rc, errno);
return NULL;
}
int main(void)
{
pthread_t tid;
dev_fd = open("/dev/device", O_RDWR);
pthread_barrier_init(&barrier, NULL, 2);
pthread_create(&tid,NULL, entry, NULL);
pthread_barrier_wait(&barrier);
sleep(1);
close(dev_fd);
pthread_join(tid,NULL);
return 0;
}
Run Code Online (Sandbox Code Playgroud)
Installation of the kernel module:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/kdev_t.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/delay.h>
MODULE_LICENSE("GPL");
#define DEVICE_NAME "device"
static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char *, size_t, loff_t *);
static long int device_ioctl(struct file *, unsigned int, unsigned long);
static int device_flush(struct file *, fl_owner_t);
static const struct file_operations fops = {
.owner = THIS_MODULE,
.read = device_read,
.write = device_write,
.unlocked_ioctl = device_ioctl,
.open = device_open,
.flush = device_flush,
.release = device_release
};
struct cdev *device_cdev;
dev_t deviceNumbers;
static int __init init(void)
{
// This returns the major number chosen dynamically in deviceNumbers
int ret = alloc_chrdev_region(&deviceNumbers, 0, 1, DEVICE_NAME);
if (ret < 0) {
printk(KERN_ALERT "Error registering: %d\n", ret);
return -1;
}
device_cdev = cdev_alloc();
cdev_init(device_cdev, &fops);
ret = cdev_add(device_cdev, deviceNumbers, 1);
printk(KERN_INFO "Device initialized (major number is %d)\n", MAJOR(deviceNumbers));
return 0;
}
static void __exit cleanup(void)
{
unregister_chrdev_region(deviceNumbers, 1);
cdev_del(device_cdev);
printk(KERN_INFO "Device unloaded\n");
}
static int device_open(struct inode *inode, struct file *file)
{
printk(KERN_INFO "Device open\n");
return 0;
}
static int device_flush(struct file *file, fl_owner_t id)
{
printk(KERN_INFO "Device flush\n");
return 0;
}
static int device_release(struct inode *inode, struct file *file)
{
printk(KERN_INFO "Device released\n");
return 0;
}
static ssize_t device_write(struct file *filp, const char *buff, size_t len, loff_t * off)
{
printk(KERN_INFO "Device write\n");
return len;
}
static ssize_t device_read(struct file *filp, char *buff, size_t len, loff_t * off)
{
printk(KERN_INFO "Device read\n");
return 0;
}
static long int device_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param)
{
printk(KERN_INFO "Device ioctl enter\n");
msleep_interruptible(5000);
printk(KERN_INFO "Device ioctl out\n");
return 0;
}
module_init(init);
module_exit(cleanup);
Run Code Online (Sandbox Code Playgroud)
The execution of the program shows that the first ioctl() makes the thread wait 5 seconds. But the second returns in error with EBADF (9) because meanwhile the device file has been closed by the main thread:
#include <stdio.h>
#include <pthread.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <errno.h>
static int dev_fd;
static pthread_barrier_t barrier;
void *entry(void *arg)
{
int rc;
printf("Thread running...\n");
// Rendez-vous with main thread
pthread_barrier_wait(&barrier);
rc = ioctl(dev_fd, 0);
printf("rc = %d, errno = %d\n", rc, errno);
rc = ioctl(dev_fd, 0);
printf("rc = %d, errno = %d\n", rc, errno);
return NULL;
}
int main(void)
{
pthread_t tid;
dev_fd = open("/dev/device", O_RDWR);
pthread_barrier_init(&barrier, NULL, 2);
pthread_create(&tid,NULL, entry, NULL);
pthread_barrier_wait(&barrier);
sleep(1);
close(dev_fd);
pthread_join(tid,NULL);
return 0;
}
Run Code Online (Sandbox Code Playgroud)
在内核日志中,我们可以看到主线程中的close()仅触发了设备上的flush()操作,而第一个ioctl()在辅助线程中的轨道上。然后,一旦第一个ioctl()返回,内核内部就会释放文件条目(引用计数器降为 0),因此,第二个ioctl()没有到达设备,因为文件描述符不再引用打开的文件。因此,第二次调用的EBADF错误:
$ sudo insmod ./device.ko
$ dmesg
[13270.589766] Device initialized (major number is 237)
$ sudo mknod /dev/device c 237 0
$ sudo chmod 666 /dev/device
$ ls -l /dev/device
crw-rw-rw- 1 root root 237, 0 janv. 27 10:55 /dev/device
Run Code Online (Sandbox Code Playgroud)