重读2.4 048 fs/locks.c
[url]http://docs.google.com/Doc?id=dcbsxfpf_71f49z6wdg[/url]2008-1-16
在fcntl.c的分析中已经对file lock做了一个概要性质的介绍. 这里简单回顾下. linux有两种文件加锁的系统调用:flock, fcntl
(lockf就是fcntl).
fcntl默认是Advisory lock. Mandatory lock需要以特殊的方式安装文件系统然后再把文件的属性改为:disabling group
execute + enabling the set-group-ID. 然后使用fcntl加锁后就是Mandatory lock了.
flock只容许对整个文件进行加锁,是BSD风格的,和fcntl实现的posix锁都是advisory锁.而在在linux内部,posix锁,flock锁和文件
lease都是使用 flle_lock来实现的.在浏览这个文件之前,先复习下fcntl这个重要接口:
static long do_fcntl(unsigned int fd, unsigned int cmd,
unsigned long arg, struct file * filp)
{
switch (cmd) {
.....
case F_GETLK: /*Posix Lock 操作*/
err = fcntl_getlk(fd, (struct flock *) arg);
break;
case F_SETLK:
case F_SETLKW:
err = fcntl_setlk(fd, cmd, (struct flock *) arg);
break;
case F_GETOWN: /*分析dnotify.c的时候说过,owener用于记录当文件发生变化
需要通知的进程, owener在不同的情景中有不同的用法:
1. dir notify 2.lease(via fl->fl_fasync)
3. fasync (via specific fasync queue)
*/
err = filp->f_owner.pid;
break;
case F_SETOWN:
lock_kernel();
filp->f_owner.pid = arg;
filp->f_owner.uid = current->uid;
filp->f_owner.euid = current->euid;
err = 0;
if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
err = sock_fcntl (filp, F_SETOWN, arg);
unlock_kernel();
break;
case F_GETSIG:
err = filp->f_owner.signum;
break;
case F_SETSIG:
/* arg == 0 restores default behaviour. */
if (arg < 0 || arg > _NSIG) {
break;
}
err = 0;
filp->f_owner.signum = arg;
break;
case F_GETLEASE:
err = fcntl_getlease(filp);
break;
case F_SETLEASE:
err = fcntl_setlease(fd, filp, arg);
break;
case F_NOTIFY: /*dir notify 通知*/
err = fcntl_dirnotify(fd, filp, arg);
break;
...
}
然后看文件的全部函数,可以将其分成几个功能部分:
0) struct file_lock 的管理
struct file_lock {
struct file_lock *fl_next; /* singly linked list for this inode:file_lock 在inode上配置*/
struct list_head fl_link; /* 接入全局链表file_lock_list,lease或者file_lock:正在使用*/
struct list_head fl_block; /* circular list of blocked processes:被block的file_lock(file_lock->
fl_wait才是那个进程) (Mandatory or flock)*/
fl_owner_t fl_owner; /*typedef struct files_struct *fl_owner_t;对文件加锁的进程之文件系统接口 */
unsigned int fl_pid; /*对文件加锁的进程的pid*/
wait_queue_head_t fl_wait; /*锁的等待队列:Mandatory 和flock使用,*/
struct file *fl_file;/*被加锁的文件*/
unsigned char fl_flags;/* FL_POSIX FL_FLOCK FL_BROKEN FL_ACCESS FL_LOCKD FL_LEASE*/
unsigned char fl_type; /* F_RDLC F_WRLCK F_UNLCK */
loff_t fl_start;
loff_t fl_end;
/*下面三个没啥用, 都是NULL*/
void (*fl_notify)(struct file_lock *); /* unblock callback */
void (*fl_insert)(struct file_lock *); /* lock insertion callback */
void (*fl_remove)(struct file_lock *); /* lock removal callback */
struct fasync_struct * fl_fasync; /* for lease break notifications */
union {
struct nfs_lock_info nfs_fl;
} fl_u;
};
管理比较简单,就是free alloc而已,没有像dentry和inode那样的cache机制了(因为也不把flock写到磁盘的...).再说下file_lock
的类别: fl_flags指出是那种类型的锁:posix,lease,flock三种,还有access,access是一个临时锁,用于加锁的时候发现有冲突时把
access类型的锁(总是posix类型的)挂入冲突链表,而进程是wait在这个access类型的锁的fl_wait上(Mandatory ).(这个设计需要注
意)
static struct file_lock *locks_alloc_lock(int account)
static inline void locks_free_lock(struct file_lock *fl)
void locks_init_lock(struct file_lock *fl)
void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
1)Advisoy lock/ Mandatory lock 的实现
上面说过advisoy lock有:fcntl实现的posix锁和flock实现的BSD风格的锁. Mandatory锁是posix锁的一种加强形式.开启方式上面也
都说过了. 下面几个函数是比较简单几个:
static int assign_type(struct file_lock *fl, int type)/*处理fl_type: F_RDLC F_WRLCK F_UNLCK*/
static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, struct flock *l)/*把用户传递的l转换成fl*/
static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,...)/*同上*/
另外以部分函数实现锁检测,判断是否已经加锁,有没有冲突.从大的模块看去,posix lock, mandatory lock,和flock各有一个函数用
于判定锁是否冲突:
/*这个函数用于posix advisory lock的冲突判定,主要caller:posix_lock_file,posix_test_lock */
static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
{
/* 某一个进程加的锁不会锁定自己*/
if (!(sys_fl->fl_flags & FL_POSIX) ||
locks_same_owner(caller_fl, sys_fl))
return (0);
/* Check whether they overlap */
if (!locks_overlap(caller_fl, sys_fl)) /*如果没有overlap也不冲突(锁定区域不同)*/
return 0;
return (locks_conflict(caller_fl, sys_fl)); /*reader不会block reader, writer要block 所有 .....*/
}
/*flock 的冲突判定: 唯一caller,flock_lock_file*/
static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
{
/* flock对同一个filep(比如dup会使用同一个fl_file,即struct file*)不会冲突*/
if (!(sys_fl->fl_flags & FL_FLOCK) ||(caller_fl->fl_file == sys_fl->fl_file))
return (0);
#ifdef MSNFS
if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
return 0;
#endif
return (locks_conflict(caller_fl, sys_fl)); /*已经看过了*/
}
/*mandatory lock的冲突判定,用于读写文件时判定mandatory lock,主要caller:locks_verify_area*/
/*只有mandatory性质的锁才会在检测的时候同时等待:强制执行,而flock和posix lock只是加锁的时候才必须等待(读写文件,
getlk都不用等的*/
int locks_mandatory_area(int read_write, struct inode *inode,struct file *filp, loff_t offset,size_t count)
{
struct file_lock *fl;
struct file_lock *new_fl = locks_alloc_lock(0);
int error;
/*创建一个access锁,*/
....
new_fl->fl_flags = FL_POSIX | FL_ACCESS;
new_fl->fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
....
error = 0;
lock_kernel();
repeat:
/* 找到冲突的lock,然后block到这个file_lock上:进程wait在new_fl->fl_wait,new_fl挂入fl->fl_block链表 */
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & FL_POSIX))
continue;
if (fl->fl_start > new_fl->fl_end)
break;
if (posix_locks_conflict(new_fl, fl)) {
error = -EAGAIN;
if (filp && (filp->f_flags & O_NONBLOCK))
break;
error = -EDEADLK;
if (posix_locks_deadlock(new_fl, fl)) /*检测死锁...*/
break;
/*block on操作同样用于flock和file lease*/
/*进程wait在new_fl->fl_wait,new_fl(access)挂入fl->fl_block链表*/
error = locks_block_on(fl, new_fl);
if (error != 0)
break;
/* 小心对待sleeping问题
* If we've been sleeping someone might have
* changed the permissions behind our back.
*/
if ((inode->i_mode & (S_ISGID | S_IXGRP)) != S_ISGID)
break;
goto repeat;
}
}
locks_free_lock(new_fl); /*释放access file lock*/
unlock_kernel();
return error;
}
然后看看死锁避免算法:
static int posix_locks_deadlock(struct file_lock *caller_fl,
struct file_lock *block_fl)
{
struct list_head *tmp;
fl_owner_t caller_owner, blocked_owner;
unsigned int caller_pid, blocked_pid;
caller_owner = caller_fl->fl_owner; //caller,比如是进程X试图获得此锁
caller_pid = caller_fl->fl_pid;
blocked_owner = block_fl->fl_owner; //进程A已经获得此锁了(没有被block)
blocked_pid = block_fl->fl_pid;
next_task:
if (caller_owner == blocked_owner && caller_pid == blocked_pid) /*如果X==A,证明A试图再次获取同一个锁,dead!*/
return 1;
list_for_each(tmp, &blocked_list) { /*试图获得此锁的进程不是A,那就看A是否在等待其他锁*/
struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link);
if ((fl->fl_owner == blocked_owner)
&& (fl->fl_pid == blocked_pid)) {
fl = fl->fl_next;
blocked_owner = fl->fl_owner; /*A在等待另一个进程B的锁,则其等待进程不能是X....*/
blocked_pid = fl->fl_pid;
goto next_task;
}
}
return 0;
}
出了上面提到的真正需要等带的情况,还有锁检测,只是看看有没有冲突...
posix_test_lock(struct file *filp, struct file_lock *fl) /*posix fcntl getlk使用,返回冲突的fl*/
int locks_mandatory_locked(struct inode *inode) /*只看有没有锁,不看锁定哪里,shared mmap互斥用*/
当文件关闭的时候需要清理locks,这一点上posix lock和flock的清理时机是不同的(具体的函数就不列举了....):
void locks_remove_posix(struct file *filp, fl_owner_t owner) /*posix lock在进程关闭时把本进程加的锁清理掉*/
void locks_remove_flock(struct file *filp) /*而flock只有当os释放文件本身时才会清理(比如dup了,那么原始进程close文
件的时候就不能吧flock清理掉*/
2)posix lock加锁的实现
就是函数int posix_lock_file,看起来挺长挺繁琐. 这里做一个分析方式的讨论. 首先,posix lock如果是setfl的话先去处理可能的
冲突情况,并预先分配好file_lock.然后就是合并操作带来的繁琐操作,合并分两种:
I)类型相同 那么就寻找当前fl中有没有邻接或者包含,只有这种情况才能合并.
I)类型不同 只能合并overlap的fl,newfl把当前的分成两个部分,或者newfl从左边或者从右边覆盖当前fl
分析的时候记着画张图,就简单多了. 具体代码就不列举了.
下面的函数也不再分析或列举.
int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l)
int fcntl_getlk(unsigned int fd, struct flock *l)
static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
注: locks_block_on 和 locks_wake_up_blocks是一对相互呼应的函数.file_lock的等待是这样实现的:(以mandatory为例)
分配一个新的file_lock,比如newfl,(mandatory lock将其标记为access类型),如果有冲突发生,将newfl接入冲突的fl值fl_block队
列,然后进程是wait在newfl->fl_wait之上的.
3)flock
flock和posix lock都共享一组机制.看完posix的lock再看flock也就比较容易理解.
static inline int flock_translate_cmd(int cmd) /*simple*/
static struct file_lock *flock_make_lock(struct file *filp, unsigned int type) /*只针对整个文件*/
asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)/*系统调用接口,*/
加锁的过程:(把filep想成是加锁者吧,好理解些)
1)如果是unlock,就找到同一个的filep的flock,删除即可. (想象是'filep'发起的lock,一个进程open两次同一个file...)
2)如果是相同的锁, read对read可以成功, r 对w,或者w对w就冲突,见flock_locks_conflict.
3)对同一个filep的flock可能有多个,比如都是read(多次open),如果要加write类型的锁可能需要等待多个flock撤销
static int flock_lock_file(struct file *filp, unsigned int lock_type,unsigned int wait)/*核心函数*/
{
struct file_lock *fl;
struct file_lock *new_fl = NULL; /*为了使用和posix lock一样的wait机制,总是试图file_lock*/
struct file_lock **before;
struct inode * inode = filp->f_dentry->d_inode;
int error, change;
int unlock = (lock_type == F_UNLCK);
/*
* If we need a new lock, get it in advance to avoid races.
*/
if (!unlock) {
error = -ENOLCK;
new_fl = flock_make_lock(filp, lock_type); /*预分配*/
if (!new_fl)
return error;
}
error = 0;
search: /*search这段保证一个filep只有一个flock存在...*/
change = 0;
before = &inode->i_flock;
while (((fl = *before) != NULL) && (fl->fl_flags & FL_FLOCK)) {
if (filp == fl->fl_file) {/*flock认为是一个filp(而不是进程)来进行加锁*/
if (lock_type == fl->fl_type) /*,如果对dup的fd进行两次加ex锁,那就都会成功....注意了....*/
goto out;
change = 1;
break;
}
before = &fl->fl_next;
}
/* change means that we are changing the type of an existing lock,
* or else unlocking it.
*/
if (change) {
/* N.B. What if the wait argument is false? */
locks_delete_lock(before, !unlock);/*对同一个filp进行不同种类的加锁,会修改现有的锁类型..注意了....*/
/*
* If we waited, another lock may have been added ...
*/
if (!unlock)
goto search;
}
if (unlock)
goto out;
repeat: /*这段保证不同的filep之间如果冲突了,就要进行互斥*/
for (fl = inode->i_flock; (fl != NULL) && (fl->fl_flags & FL_FLOCK);
fl = fl->fl_next) {/*flock应用在inode上(物理文件上)*/
if (!flock_locks_conflict(new_fl, fl))
continue;
error = -EAGAIN;
if (!wait)
goto out;
error = locks_block_on(fl, new_fl);
if (error != 0)
goto out;
goto repeat;
}
locks_insert_lock(&inode->i_flock, new_fl);
new_fl = NULL;
error = 0;
out:
if (new_fl)
locks_free_lock(new_fl);
return error;
}
3)file lease
还是看看man怎么说:
A file lease provides a mechanism whereby the process holding the lease (the "lease holder") is notified
(via delivery of a signal) when a process (the "lease breaker") tries to open(2) or truncate(2) that file.
我能找到的例子是 Samba 使用了file lease 来实现一种opportunistic locks (oplocks).具体的file lease也是一个比较隐晦的操作:
hold file lease的进程需要主动降级或者删除lease,否则内核会强制的降级或者删除其lease.不知道这样是什么道理....
另外看似如果在一个文件上使用了lease,就不能再有flock或者posix lock了(有冲突...很奇怪,file lease的代码假定lease类型的锁必须在
inode->i_flock的第一个上,否则就会有问题.... 好在,linux2.6修正了这个问题...).
static int lease_alloc(struct file *filp, int type, struct file_lock **flp) /*一个类型为FL_LEASE的file lock*/
int fcntl_getlease(struct file *filp)
{
struct file_lock *fl;
fl = filp->f_dentry->d_inode->i_flock; /*从这里看出lease有重大缺陷....linux2.6已经完全不同了...*/
if ((fl == NULL) || ((fl->fl_flags & FL_LEASE) == 0))
return F_UNLCK;
return fl->fl_type & ~F_INPROGRESS;
}
get_lease, linux2.6中更名为 _break_lease,更容易理解些..,这个函数是在open和truncate的时候进行的租约判定:
int __get_lease(struct inode *inode, unsigned int mode)
{
int error = 0, future;
struct file_lock *new_fl, *flock;
struct file_lock *fl;
int alloc_err;
alloc_err = lease_alloc(NULL, 0, &new_fl); /*类似posix lock 的access,总之用同样的代码...*/
lock_kernel();
flock = inode->i_flock; /*认为第一个floc就是FL_LEASE...!!! 2.6中已经修正了.(或者我想错了?!)*/
if (flock->fl_type & F_INPROGRESS) { /*等待lease的holder解除/降级lease*/
if ((mode & O_NONBLOCK)
|| (flock->fl_owner == current->files)) {
/*NONBLOCK好理解,但是自己拥有这个lease的情况约定返回would block?*/
/*如果是自己拥有这个lease,就返回would block(RequestComments)*/
error = -EWOULDBLOCK;
goto out;
}
if (alloc_err != 0) {
error = alloc_err;
goto out;
}
do {
error = locks_block_on(flock, new_fl); /*F_INPROGRESS的情况,等待所有的lease解除*/
if (error != 0)
goto out;
flock = inode->i_flock;
if (!(flock && (flock->fl_flags & FL_LEASE)))/*2.6干净许多,这个操作很tricky*/
goto out;
} while (flock->fl_type & F_INPROGRESS);
}
/*根据要求的操作,决定lease是降级还是解锁...*/
if (mode & FMODE_WRITE) {
/* If we want write access, we have to revoke any lease. */
future = F_UNLCK | F_INPROGRESS;
} else if (flock->fl_type & F_WRLCK) {
/* Downgrade the exclusive lease to a read-only lease. */
future = F_RDLCK | F_INPROGRESS;
} else {
/* the existing lease was read-only, so we can read too. */
goto out;
}
if (alloc_err && (flock->fl_owner != current->files)) {
error = alloc_err;
goto out;
}
/*降级lease*/
fl = flock;
do {
fl->fl_type = future;
fl = fl->fl_next;
} while (fl != NULL && (fl->fl_flags & FL_LEASE));
kill_fasync(&flock->fl_fasync, SIGIO, POLL_MSG); /*通知lease的holder,参考fcntl对fasync的分析*/
if ((mode & O_NONBLOCK) || (flock->fl_owner == current->files)) {
error = -EWOULDBLOCK;
goto out;
}
if (lease_break_time > 0)
error = lease_break_time * HZ;
else
error = 0;
restart:
error = locks_block_on_timeout(flock, new_fl, error); /*等待降级!*/
if (error == 0) {
/* We timed out. Unilaterally break the lease. */
locks_delete_lock(&inode->i_flock, 0); /*超时,强制降级*/
printk(KERN_WARNING "lease timed out\n");
} else if (error > 0) {
flock = inode->i_flock;
if (flock && (flock->fl_flags & FL_LEASE))
goto restart;
error = 0;
}
out:
unlock_kernel();
if (!alloc_err)
locks_free_lock(new_fl);
return error;
}
然后,关于lease,还有一个函数:
int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
强烈建议不要看这个函数了,读一下2.6的对应函数即可,这个实在不是很好.
{
/*sanity check*/
/*
* FIXME: What about F_RDLCK and files open for writing?
*/
if ((arg == F_WRLCK) /*加write租约要求没有其他人已经打开这个文件(dentry,open一次计数增1)*/
&& ((atomic_read(&dentry->d_count) > 1)
|| (atomic_read(&inode->i_count) > 1)))
return -EAGAIN;
before = &inode->i_flock;
lock_kernel();
/* comments in 2.6
* At this point, we know that if there is an exclusive
* lease on this file, then we hold it on this filp
* (otherwise our open of this file would have blocked).
* And if we are trying to acquire an exclusive lease,
* then the file is not open by anyone (including us)
* except for this filp.
*/
while ((fl = *before) != NULL) {
if (fl->fl_flags != FL_LEASE)
break;
if (fl->fl_file == filp)
my_before = before; /*对同一个filp lease几次的话只有最后一个生效,也就是只有一个flock*/
else if (fl->fl_type & F_WRLCK)
wrlease_count++;
else
rdlease_count++;
before = &fl->fl_next;
}
if ((arg == F_RDLCK && (wrlease_count > 0)) ||
(arg == F_WRLCK && ((rdlease_count + wrlease_count) > 0))) {
error = -EAGAIN;
goto out_unlock;
}
if (my_before != NULL) {/*对同一个filp lease几次的话只有最后一个生效,也就是只有一个flock*/
error = lease_modify(my_before, arg, fd, filp);
goto out_unlock;
}
............
error = lease_alloc(filp, arg, &fl);
if (error)
goto out_unlock;
error = fasync_helper(fd, filp, 1, &fl->fl_fasync);
if (error < 0) {
locks_free_lock(fl);
goto out_unlock;
}
..........
}
许有未决问题....
注:略过了对NFS的分析.... 辛苦了,呵呵 呵呵,hly的系列文章啊~ [attach]3441[/attach][attach]3442[/attach]
页:
[1]