文件系统抽象层VFS
文件系统抽象层是把不同文件系统的对外共性接口提取出来,形成一个函数指针数组,这样,通用文件系统访问接口层只需访问文件系统抽象层,而不需关心具体文件系统的实现细节和接口。
file & dir 接口
file&dir 接口层定义了进程在内核中直接访问的文件相关信息,这定义在 file 数据结构中,具体描述如下:
// kern/fs/file.h
struct file {
enum {
FD_NONE, FD_INIT, FD_OPENED, FD_CLOSED,
} status; //访问文件的执行状态
bool readable; //文件是否可读
bool writable; //文件是否可写
int fd; //文件在filemap中的索引值
off_t pos; //访问文件的当前位置
struct inode *node; //该文件对应的内存inode指针
int open_count; //打开此文件的次数
};
而在 kern/process/proc.h
中的 proc_struct 结构中加入了描述了进程访问文件的数据接口 files_struct,其数据结构定义如下:
// kern/fs/fs.h
struct files_struct {
struct inode *pwd; //进程当前执行目录的内存inode指针
struct file *fd_array; //进程打开文件的数组
atomic_t files_count; //访问此文件的线程个数
semaphore_t files_sem; //确保对进程控制块中fs_struct的互斥访问
};
当创建一个进程后,该进程的 files_struct 将会被初始化或复制父进程的 files_struct。当用户进程打开一个文件时,将从 fd_array 数组中取得一个空闲 file 项,然后会把此 file 的成员变量 node 指针指向一个代表此文件的 inode 的起始地址。
inode 接口
index node 是位于内存的索引节点,它是 VFS 结构中的重要数据结构,因为它实际负责把不同文件系统的特定索引节点信息(甚至不能算是一个索引节点)统一封装起来,避免了进程直接访问具体文件系统。其定义如下:
// kern/vfs/inode.h
struct inode {
union { //包含不同文件系统特定inode信息的union成员变量
struct device __device_info; //设备文件系统内存inode信息
struct sfs_inode __sfs_inode_info; //SFS文件系统内存inode信息
} in_info;
enum {
inode_type_device_info = 0x1234,
inode_type_sfs_inode_info,
} in_type; //此inode所属文件系统类型
atomic_t ref_count; //此inode的引用计数
atomic_t open_count; //打开此inode对应文件的个数
struct fs *in_fs; //抽象的文件系统,包含访问文件系统的函数指针
const struct inode_ops *in_ops; //抽象的inode操作,包含访问inode的函数指针
};
在 inode 中,有一成员变量为 in_ops,这是对此 inode 的操作函数指针列表,其数据结构定义如下:
struct inode_ops {
unsigned long vop_magic;
int (*vop_open)(struct inode *node, uint32_t open_flags);
int (*vop_close)(struct inode *node);
int (*vop_read)(struct inode *node, struct iobuf *iob);
int (*vop_write)(struct inode *node, struct iobuf *iob);
int (*vop_getdirentry)(struct inode *node, struct iobuf *iob);
int (*vop_create)(struct inode *node, const char *name, bool excl, struct inode **node_store);
int (*vop_lookup)(struct inode *node, char *path, struct inode **node_store);
……
};
参照上面对 SFS 中的索引节点操作函数的说明,可以看出 inode_ops 是对常规文件、目录、设备文件所有操作的一个抽象函数表示。对于某一具体的文件系统中的文件或目录,只需实现相关的函数,就可以被用户进程访问具体的文件了,且用户进程无需了解具体文件系统的实现细节。
open系统调用的执行过程
下面我们通过打开文件的系统调用open()
的执行过程, 看看文件系统的不同层次是如何交互的。
首先,经过syscall.c
的处理之后,进入内核态,执行sysfile_open()
函数
// kern/fs/sysfile.c
/* sysfile_open - open file */
int sysfile_open(const char *__path, uint32_t open_flags) {
int ret;
char *path;
if ((ret = copy_path(&path, __path)) != 0) {
return ret;
}
ret = file_open(path, open_flags);
kfree(path);
return ret;
}
可以看到,sysfile_open
把路径复制了一份,然后调用了file_open
, file_open
调用了vfs_open
, 使用了VFS的接口。
// kern/fs/file.c
// open file
int file_open(char *path, uint32_t open_flags) {
bool readable = 0, writable = 0;
switch (open_flags & O_ACCMODE) { //解析 open_flags
case O_RDONLY: readable = 1; break;
case O_WRONLY: writable = 1; break;
case O_RDWR:
readable = writable = 1;
break;
default:
return -E_INVAL;
}
int ret;
struct file *file;
if ((ret = fd_array_alloc(NO_FD, &file)) != 0) { //在当前进程分配file descriptor
return ret;
}
struct inode *node;
if ((ret = vfs_open(path, open_flags, &node)) != 0) { //打开文件的工作在vfs_open完成
fd_array_free(file); //打开失败,释放file descriptor
return ret;
}
file->pos = 0;
if (open_flags & O_APPEND) {
struct stat __stat, *stat = &__stat;
if ((ret = vop_fstat(node, stat)) != 0) {
vfs_close(node);
fd_array_free(file);
return ret;
}
file->pos = stat->st_size; //追加写模式,设置当前位置为文件尾
}
file->node = node;
file->readable = readable;
file->writable = writable;
fd_array_open(file); //设置该文件的状态为“打开”
return file->fd;
}
// fs_array_alloc - allocate a free file item (with FD_NONE status) in open files table
static int fd_array_alloc(int fd, struct file **file_store) {
struct file *file = get_fd_array();
if (fd == NO_FD) {
for (fd = 0; fd < FILES_STRUCT_NENTRY; fd ++, file ++) {
if (file->status == FD_NONE) {
goto found;
}
}
return -E_MAX_OPEN;
}
else {
if (testfd(fd)) {
file += fd;
if (file->status == FD_NONE) {
goto found;
}
return -E_BUSY;
}
return -E_INVAL;
}
found:
assert(fopen_count(file) == 0);
file->status = FD_INIT, file->node = NULL;
*file_store = file;
return 0;
}
void fd_array_open(struct file *file) {
assert(file->status == FD_INIT && file->node != NULL);
file->status = FD_OPENED; //设置状态为“打开”
fopen_count_inc(file); //增加文件的“打开计数”
}
vfs_open
是一个比较复杂的函数,这里我们使用的打开文件的flags, 基本是参照linux,如果希望详细了解,可以阅读linux manual: open。
// kern/fs/vfs/vfsfile.c
// open file in vfs, get/create inode for file with filename path.
int vfs_open(char *path, uint32_t open_flags, struct inode **node_store) {
bool can_write = 0;
// 解析open_flags并做合法性检查
switch (open_flags & O_ACCMODE) {
case O_RDONLY:
break;
case O_WRONLY:
case O_RDWR:
can_write = 1;
break;
default:
return -E_INVAL;
}
if (open_flags & O_TRUNC) {
if (!can_write) {
return -E_INVAL;
}
}
/*
linux manual
O_TRUNC
If the file already exists and is a regular file and the
access mode allows writing (i.e., is O_RDWR or O_WRONLY) it
will be truncated to length 0. If the file is a FIFO or ter‐
minal device file, the O_TRUNC flag is ignored. Otherwise,
the effect of O_TRUNC is unspecified.
*/
int ret;
struct inode *node;
bool excl = (open_flags & O_EXCL) != 0;
bool create = (open_flags & O_CREAT) != 0;
ret = vfs_lookup(path, &node); // vfs_lookup根据路径构造inode
if (ret != 0) {//要打开的文件还不存在,可能出错,也可能需要创建新文件
if (ret == -16 && (create)) {
char *name;
struct inode *dir;
if ((ret = vfs_lookup_parent(path, &dir, &name)) != 0) {
return ret;//需要在已经存在的目录下创建文件,目录不存在,则出错
}
ret = vop_create(dir, name, excl, &node);//创建新文件
} else return ret;
} else if (excl && create) {
return -E_EXISTS;
/*
linux manual
O_EXCL Ensure that this call creates the file: if this flag is
specified in conjunction with O_CREAT, and pathname already
exists, then open() fails with the error EEXIST.
*/
}
assert(node != NULL);
if ((ret = vop_open(node, open_flags)) != 0) {
vop_ref_dec(node);
return ret;
}
vop_open_inc(node);
if (open_flags & O_TRUNC || create) {
if ((ret = vop_truncate(node, 0)) != 0) {
vop_open_dec(node);
vop_ref_dec(node);
return ret;
}
}
*node_store = node;
return 0;
}
我们看看vfs_look_up
的实现
/*
* get_device- Common code to pull the device name, if any, off the front of a
* path and choose the inode to begin the name lookup relative to.
*/
static int get_device(char *path, char **subpath, struct inode **node_store) {
int i, slash = -1, colon = -1;
for (i = 0; path[i] != '\0'; i ++) {
if (path[i] == ':') { colon = i; break; }
if (path[i] == '/') { slash = i; break; }
}
if (colon < 0 && slash != 0) {
/* *
* No colon before a slash, so no device name specified, and the slash isn't leading
* or is also absent, so this is a relative path or just a bare filename. Start from
* the current directory, and use the whole thing as the subpath.
* */
*subpath = path;
return vfs_get_curdir(node_store); //把当前目录的inode存到node_store
}
if (colon > 0) {
/* device:path - get root of device's filesystem */
path[colon] = '\0';
/* device:/path - skip slash, treat as device:path */
while (path[++ colon] == '/');
*subpath = path + colon;
return vfs_get_root(path, node_store);
}
/* *
* we have either /path or :path
* /path is a path relative to the root of the "boot filesystem"
* :path is a path relative to the root of the current filesystem
* */
int ret;
if (*path == '/') {
if ((ret = vfs_get_bootfs(node_store)) != 0) {
return ret;
}
}
else {
assert(*path == ':');
struct inode *node;
if ((ret = vfs_get_curdir(&node)) != 0) {
return ret;
}
/* The current directory may not be a device, so it must have a fs. */
assert(node->in_fs != NULL);
*node_store = fsop_get_root(node->in_fs);
vop_ref_dec(node);
}
/* ///... or :/... */
while (*(++ path) == '/');
*subpath = path;
return 0;
}
/*
* vfs_lookup - get the inode according to the path filename
*/
int vfs_lookup(char *path, struct inode **node_store) {
int ret;
struct inode *node;
if ((ret = get_device(path, &path, &node)) != 0) {
return ret;
}
if (*path != '\0') {
ret = vop_lookup(node, path, node_store);
vop_ref_dec(node);
return ret;
}
*node_store = node;
return 0;
}
/*
* vfs_lookup_parent - Name-to-vnode translation.
* (In BSD, both of these are subsumed by namei().)
*/
int vfs_lookup_parent(char *path, struct inode **node_store, char **endp){
int ret;
struct inode *node;
if ((ret = get_device(path, &path, &node)) != 0) {
return ret;
}
*endp = path;
*node_store = node;
return 0;
}
我们注意到,这个流程中,有大量以vop
开头的函数,它们都通过一些宏和函数的转发,最后变成对inode
结构体里的inode_ops
结构体的“成员函数”(实际上是函数指针)的调用。对于SFS文件系统的inode
来说,会变成对sfs文件系统的具体操作。sfs的这些具体接口的实现较为繁琐,可以在kern/fs/sfs/sfs_inode.c
具体查看。我们的练习要求在kern/fs/sfs/sfs_io.c
填写一个函数。
// kern/fs/sfs/sfs_inode.c
// The sfs specific DIR operations correspond to the abstract operations on a inode.
static const struct inode_ops sfs_node_dirops = {
.vop_magic = VOP_MAGIC,
.vop_open = sfs_opendir,
.vop_close = sfs_close,
.vop_fstat = sfs_fstat,
.vop_fsync = sfs_fsync,
.vop_namefile = sfs_namefile,
.vop_getdirentry = sfs_getdirentry,
.vop_reclaim = sfs_reclaim,
.vop_gettype = sfs_gettype,
.vop_lookup = sfs_lookup,
};
/// The sfs specific FILE operations correspond to the abstract operations on a inode.
static const struct inode_ops sfs_node_fileops = {
.vop_magic = VOP_MAGIC,
.vop_open = sfs_openfile,
.vop_close = sfs_close,
.vop_read = sfs_read,
.vop_write = sfs_write,
.vop_fstat = sfs_fstat,
.vop_fsync = sfs_fsync,
.vop_reclaim = sfs_reclaim,
.vop_gettype = sfs_gettype,
.vop_tryseek = sfs_tryseek,
.vop_truncate = sfs_truncfile,
};
最后更新于