摘要:
记录linux存储栈, 及制作最简单的aufs文件系统
总体概览:
最简单的文件系统aufs的制作及加载进内核:
aufs文件:
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/namei.h>
#include <linux/cred.h>
#include <linux/mount.h>
//每个文件系统需要一个MAGIC number
#define AUFS_MAGIC 0x64668735
//aufs文件系统的挂载点
static struct vfsmount *aufs_mount;
//根据创建的aufs文件系统的 super_block创建具体的inode结构体
static struct inode *aufs_get_inode(struct super_block *sb, int mode, dev_t dev)
{
struct inode *inode = new_inode(sb);
if (inode) {
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime;
switch (mode & S_IFMT) {
default:
init_special_inode(inode, mode, dev);
break;
case S_IFREG:
printk("create a file \\n");
break;
case S_IFDIR:
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
printk("creat a dir file \\n");
inode->__i_nlink++;
break;
}
}
return inode;
}
//把创建的inode和dentry结构体连接起来
static int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
{
struct inode * inode;
int error = -EPERM;
if (dentry->d_inode)
return -EEXIST;
inode = aufs_get_inode(dir->i_sb, mode, dev);
if (inode) {
d_instantiate(dentry, inode);
dget(dentry);
error = 0;
}
return error;
}
static int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
int res;
res = aufs_mknod(dir, dentry, mode | S_IFDIR, 0);
if (!res) {
dir->__i_nlink++;
}
return res;
}
static int aufs_create(struct inode *dir, struct dentry *dentry, int mode)
{
return aufs_mknod(dir, dentry, mode | S_IFREG, 0);
}
//根据父dentry、mode、name创建子dentry
static int aufs_create_by_name(const char *name, mode_t mode,
struct dentry *parent, struct dentry **dentry)
{
int error = 0;
if (!parent) {
if (aufs_mount && aufs_mount->mnt_sb) {
parent = aufs_mount->mnt_sb->s_root;
}
}
if (!parent) {
printk("Ah! can not find a parent!\\n");
return -EFAULT;
}
*dentry = NULL;
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(dentry)) {
if ((mode & S_IFMT) == S_IFDIR)
error = aufs_mkdir(parent->d_inode, *dentry, mode);
else
error = aufs_create(parent->d_inode, *dentry, mode);
} else
error = PTR_ERR(dentry);
return error;
}
//在aufs文件系统中创建文件
struct dentry *aufs_create_file(const char *name, mode_t mode,
struct dentry *parent, void *data,
struct file_operations *fops)
{
struct dentry *dentry = NULL;
int error;
printk("aufs: creating file \'%s\'", name);
error = aufs_create_by_name(name, mode, parent, &dentry);
if (error) {
dentry = NULL;
goto exit;
}
if (dentry->d_inode) {
if (data)
dentry->d_inode->i_private = data;
if (fops)
dentry->d_inode->i_fop = fops;
}
exit:
return dentry;
}
//在aufs文件系统中创建一个文件夹
struct dentry *aufs_create_dir(const char *name, struct dentry *parent)
{
return aufs_create_file(name, S_IFDIR | S_IRWXU | S_IRUGO, parent, NULL, NULL);
}
static int enabled = 1;
//对应于打开的aufs文件的读取方法
static ssize_t aufs_file_read(struct file *fle, char __user *buf, size_t nbytes, loff_t *ppos)
{
char *s = enabled ? "aufs read enabled\\n" : "aufs read disabled\\n";
dump_stack();
return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
}
//对应于打开的aufs文件的写入方法
static ssize_t aufs_file_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
{
int res = *buffer - '0';
if (res)
enabled = 1;
else
enabled = 0;
return count;
}
//对应具体打开文件的文件操作方式
static struct file_operations aufs_file_operations = {
.read = aufs_file_read,
.write = aufs_file_write,
};
//用于填充aufs的super_block
static int aufs_fill_super(struct super_block *sb, void *data, int silent)
{
static struct tree_descr debug_files[] = {{""}};
return simple_fill_super(sb, AUFS_MAGIC, debug_files);
}
//创建aufs文件系统的对应的根目录的dentry
static struct dentry *aufs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_single(fs_type, flags, data, aufs_fill_super);
}
//初始化aufs文件系统的 file_system_type结构,每个文件系统对应一个这样的结构体,主要用于提供具体文件系统的//的信息,以及操作的方法
static struct file_system_type aufs_type = {
.name = "aufs",
.mount = aufs_get_sb,
.kill_sb = kill_litter_super,
};
//创建aufs文件系统,同时创建对应的文件夹和文件
static int __init aufs_init(void)
{
int ret;
struct dentry *pslot;
ret = register_filesystem(&aufs_type);
if (ret) {
printk(KERN_ERR "aufs: cannot register file system\\n");
return ret;
}
aufs_mount = kern_mount(&aufs_type);
if (IS_ERR(aufs_mount)) {
printk(KERN_ERR "aufs: cannot mount file system\\n");
unregister_filesystem(&aufs_type);
return ret;
}
pslot = aufs_create_dir("woman_star", NULL); //创建woman_star文件系统,返回所创建文件夹的dentry
aufs_create_file("lbb", S_IFREG | S_IRUGO, pslot, NULL, &aufs_file_operations);//在对应的文件夹下,创建具体的文件
aufs_create_file("fbb", S_IFREG | S_IRUGO, pslot, NULL, &aufs_file_operations);
aufs_create_file("lj1", S_IFREG | S_IRUGO, pslot, NULL, &aufs_file_operations);
pslot = aufs_create_dir("man_star", NULL);
aufs_create_file("ldh", S_IFREG | S_IRUGO, pslot, NULL, &aufs_file_operations);
aufs_create_file("lcw", S_IFREG | S_IRUGO, pslot, NULL, &aufs_file_operations);
aufs_create_file("jw", S_IFREG | S_IRUGO, pslot, NULL, &aufs_file_operations);
return 0;
}
//卸载aufs文件系统
static void __exit aufs_exit(void)
{
kern_unmount(aufs_mount);
unregister_filesystem(&aufs_type);
aufs_mount = NULL;
}
module_init(aufs_init);
module_exit(aufs_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("This is a simple module");
MODULE_VERSION("Ver 0.1");
makefile:
# If KERNELRELEASE is defined, we've been invoked from the
# kernel build system and can use its language.
ifneq ($(KERNELRELEASE),)
obj-m := aufs.o
# Otherwise we were called directly from the command
# line; invoke the kernel build system.
else
KERNELDIR ?= /lib/modules/$(shell uname -r)/build
PWD := $(shell pwd)
default:
$(MAKE) -C $(KERNELDIR) M=$(PWD) modules
endif
clean:
rm -f *.ko *.o *.mod.o *.mod.c *.symvers *.order -rf
执行make编译:
root@localhost:~/work/csi/aufs# make
make -C /lib/modules/4.18.0-348.7.1.el8_5.x86_64/build M=/root/work/csi/aufs modules
make[1]: Entering directory '/usr/src/kernels/4.18.0-348.7.1.el8_5.x86_64'
CC [M] /root/work/csi/aufs/aufs.o
In file included from /root/work/csi/aufs/aufs.c:1:
./include/linux/module.h:129:6: warning: ‘init_module’ specifies less restrictive attribute than its target ‘aufs_init’: ‘cold’ [-Wmissing-attributes]
129 | int init_module(void) __attribute__((alias(#initfn)));
| ^~~~~~~~~~~
/root/work/csi/aufs/aufs.c:229:1: note: in expansion of macro ‘module_init’
229 | module_init(aufs_init);
| ^~~~~~~~~~~
/root/work/csi/aufs/aufs.c:189:19: note: ‘init_module’ target declared here
189 | static int __init aufs_init(void)
| ^~~~~~~~~
In file included from /root/work/csi/aufs/aufs.c:1:
./include/linux/module.h:135:7: warning: ‘cleanup_module’ specifies less restrictive attribute than its target ‘aufs_exit’: ‘cold’ [-Wmissing-attributes]
135 | void cleanup_module(void) __attribute__((alias(#exitfn)));
| ^~~~~~~~~~~~~~
/root/work/csi/aufs/aufs.c:230:1: note: in expansion of macro ‘module_exit’
230 | module_exit(aufs_exit);
| ^~~~~~~~~~~
/root/work/csi/aufs/aufs.c:222:20: note: ‘cleanup_module’ target declared here
222 | static void __exit aufs_exit(void)
| ^~~~~~~~~
Building modules, stage 2.
MODPOST 1 modules
CC /root/work/csi/aufs/aufs.mod.o
LD [M] /root/work/csi/aufs/aufs.ko
make[1]: Leaving directory '/usr/src/kernels/4.18.0-348.7.1.el8_5.x86_64'
root@localhost:~/work/csi/aufs# tree
.
├── aufs.c
├── aufs.ko
├── aufs.mod.c
├── aufs.mod.o
├── aufs.o
├── Makefile
├── modules.order
└── Module.symvers
0 directories, 8 files
安装aufs文件系统:
root@localhost:~/work/csi/aufs# insmod ./aufs.ko
root@localhost:~/work/csi/aufs#
root@localhost:~/work/csi/aufs# lsmod | grep aufs
aufs 16384 0
测试aufs文件系统:
一. 根目录下创建测试文件夹/au
mkdir /au
二. 挂载aufs文件系统
mount -t aufs none /au
三. 查看/au目录下的内容是否符合预期
root@localhost:/au# tree
.
├── man_star
│ ├── jw
│ ├── lcw
│ └── ldh
└── woman_star
├── fbb
├── lbb
└── lj1
2 directories, 6 files
核心处理:
挂载
int sys_mount(char * dev_name, char * dir_name, int rw_flag)
{
struct m_inode * dev_i, * dir_i;
struct super_block * sb;
int dev;
if (!(dev_i = namei(dev_name)))
return -ENOENT;
dev = dev_i->i_zone[0];
if (!S_ISBLK(dev_i->i_mode))
{
iput(dev_i);
return -EPERM;
}
iput(dev_i);
if (!(dir_i = namei(dir_name)))
return -ENOENT;
if (dir_i->i_count != 1 || dir_i->i_num == ROOT_INO)
{
iput(dir_i);
return -EBUSY;
}
if (!S_ISDIR(dir_i->i_mode))
{
iput(dir_i);
return -EPERM;
}
if (!(sb = read_super(dev)))
{
iput(dir_i);
return -EBUSY;
}
if (sb->s_imount)
{
iput(dir_i);
return -EBUSY;
}
if (dir_i->i_mount)
{
iput(dir_i);
return -EPERM;
}
sb->s_imount = dir_i;
dir_i->i_mount = 1;
dir_i->i_dirt = 1; /* NOTE! we don't iput(dir_i) */
return 0; /* we do that in umount */
}
打开文件:
int sys_open(const char * filename, int flag, int mode)
{
struct m_inode * inode;
struct file * f;
int i, fd;
mode &= 0777 & ~current->umask;
for (fd = 0; fd < NR_OPEN; fd++)
if (!current->filp[fd])
break;
if (fd >= NR_OPEN)
return -EINVAL;
current->close_on_exec &= ~(1 << fd);
f = 0 + file_table;
for (i = 0; i < NR_FILE; i++, f++)
if (!f->f_count) break;
if (i >= NR_FILE)
return -EINVAL;
(current->filp[fd] = f)->f_count++;
if ((i = open_namei(filename, flag, mode, &inode)) < 0)
{
current->filp[fd] = NULL;
f->f_count = 0;
return i;
}
/* ttys are somewhat special (ttyxx major==4, tty major==5) */
if (S_ISCHR(inode->i_mode))
if (check_char_dev(inode, inode->i_zone[0], flag))
{
iput(inode);
current->filp[fd] = NULL;
f->f_count = 0;
return -EAGAIN;
}
/* Likewise with block-devices: check for floppy_change */
if (S_ISBLK(inode->i_mode))
check_disk_change(inode->i_zone[0]);
f->f_mode = inode->i_mode;
f->f_flags = flag;
f->f_count = 1;
f->f_inode = inode;
f->f_pos = 0;
return (fd);
}
/*
* open_namei()
*
* namei for open - this is in fact almost the whole open-routine.
*/
int open_namei(const char * pathname, int flag, int mode,
struct m_inode ** res_inode)
{
const char * basename;
int inr, dev, namelen;
struct m_inode * dir, *inode;
struct buffer_head * bh;
struct dir_entry * de;
if ((flag & O_TRUNC) && !(flag & O_ACCMODE))
flag |= O_WRONLY;
mode &= 0777 & ~current->umask;
mode |= I_REGULAR;
if (!(dir = dir_namei(pathname, &namelen, &basename, NULL)))
return -ENOENT;
if (!namelen)
{ /* special case: '/usr/' etc */
if (!(flag & (O_ACCMODE | O_CREAT | O_TRUNC)))
{
*res_inode = dir;
return 0;
}
iput(dir);
return -EISDIR;
}
bh = find_entry(&dir, basename, namelen, &de);
if (!bh)
{
if (!(flag & O_CREAT))
{
iput(dir);
return -ENOENT;
}
if (!permission(dir, MAY_WRITE))
{
iput(dir);
return -EACCES;
}
inode = new_inode(dir->i_dev);
if (!inode)
{
iput(dir);
return -ENOSPC;
}
inode->i_uid = current->euid;
inode->i_mode = mode;
inode->i_dirt = 1;
bh = add_entry(dir, basename, namelen, &de);
if (!bh)
{
inode->i_nlinks--;
iput(inode);
iput(dir);
return -ENOSPC;
}
de->inode = inode->i_num;
bh->b_dirt = 1;
brelse(bh);
iput(dir);
*res_inode = inode;
return 0;
}
inr = de->inode;
dev = dir->i_dev;
brelse(bh);
if (flag & O_EXCL)
{
iput(dir);
return -EEXIST;
}
if (!(inode = follow_link(dir, iget(dev, inr))))
return -EACCES;
if ((S_ISDIR(inode->i_mode) && (flag & O_ACCMODE)) ||
!permission(inode, ACC_MODE(flag)))
{
iput(inode);
return -EPERM;
}
inode->i_atime = CURRENT_TIME;
if (flag & O_TRUNC)
truncate(inode);
*res_inode = inode;
return 0;
}