本文檔的Copyleft歸yfydz所有,,使用GPL發(fā)布,,可以自由拷貝,轉(zhuǎn)載,,轉(zhuǎn)載時請保持文檔的完整性,,嚴(yán)禁用于任何商業(yè)用途。
msn: [email protected] 來源:http://yfydz. 1. 前言 inode是類Unix系統(tǒng)的文件系統(tǒng)的基本索引方法,,每個文件都對應(yīng)一個inode,,再通過inode找到文件中的實際數(shù)據(jù),因此根據(jù)文件路徑名找到具體的inode節(jié)點就是一個很重要的處理步驟,。系統(tǒng)會緩存用過的每個文件或目錄對應(yīng)的dentry結(jié)構(gòu), 從該結(jié)構(gòu)可以指向相應(yīng)的inode, 每次打開文件, 都會最終對應(yīng)到文件的inode,,中間查找過程稱為namei。 本文介紹Linux下的路徑到文件指針的轉(zhuǎn)換過程,內(nèi)核版本為2.6.19.2,。 虛擬文件系統(tǒng)的轉(zhuǎn)換源代碼在fs/namei.c中,,具體和文件系統(tǒng)相關(guān)的部分在fs/*/namei.c文件中。 2. 引子 由于這種轉(zhuǎn)換是一個中間過程,,在具體分析namei處理前,,先看看系統(tǒng)的調(diào)用順序是如何進(jìn)入轉(zhuǎn)換的: 當(dāng)用戶空間程序用open系統(tǒng)調(diào)用打開一個文件時,內(nèi)核對應(yīng)的處理是sys_open:
/* fs/open.c */
asmlinkage long sys_open(const char __user *filename, int flags, int mode) { long ret; if (force_o_largefile())
flags |= O_LARGEFILE; ret = do_sys_open(AT_FDCWD, filename, flags, mode);
/* avoid REGPARM breakage on x86: */ prevent_tail_call(ret); return ret; } 真正的打開函數(shù)是do_sys_open:
/* fs/open.c */
// dfd為AT_FDCWD long do_sys_open(int dfd, const char __user *filename, int flags, int mode) { // 通過該函數(shù)將用戶空間的文件名傳遞到內(nèi)核 // tmp是一個cache類的動態(tài)內(nèi)存空間,用于保存文件路徑名 // char *tmp = getname(filename); int fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) {
// 獲取一個未使用的文件描述符, 和inode無關(guān) fd = get_unused_fd(); if (fd >= 0) { // 打開文件,將文件名轉(zhuǎn)換為文件結(jié)構(gòu) struct file *f = do_filp_open(dfd, tmp, flags, mode); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); } else { fsnotify_open(f->f_dentry); fd_install(fd, f); } } putname(tmp); } return fd; } // 文件打開 static struct file *do_filp_open(int dfd, const char *filename, int flags, int mode) { int namei_flags, error; // 注意這是結(jié)構(gòu)而不是指針 struct nameidata nd; namei_flags = flags;
if ((namei_flags+1) & O_ACCMODE) namei_flags++; // 根據(jù)文件名得到nameidata, nd作為namei空間保存結(jié)果 error = open_namei(dfd, filename, namei_flags, mode, &nd); if (!error) // 成功, nameidata再轉(zhuǎn)換為file指針 return nameidata_to_filp(&nd, flags); return ERR_PTR(error);
} 因此重點函數(shù)是open_namei函數(shù), 實現(xiàn)了從文件名到inode的轉(zhuǎn)換, 也是namei的處理入口. 在分析open_namei前, 再分析一下getname, 這用到了kmem_cache來處理的: // 文件名轉(zhuǎn)換, 從用戶空間拷貝到內(nèi)核空間
/* fs/namei.c */ char * getname(const char __user * filename)
{ char *tmp, *result; result = ERR_PTR(-ENOMEM);
/* include/linux/fs.h */ // __getname和__putname的定義,,實際就是內(nèi)核cache的分配和釋放 // #define __getname() kmem_cache_alloc(names_cachep, SLAB_KERNEL) // #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) // 這里實際是分配names的cache, 該cache定義為 // names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, // SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); tmp = __getname();
if (tmp) { // cache分配成功 // 進(jìn)入實際操作函數(shù) int retval = do_getname(filename, tmp); // 要返回結(jié)果指向cache result = tmp; if (retval < 0) { // 操作失敗,釋放cache,返回錯誤 __putname(tmp); result = ERR_PTR(retval); } } // 編譯內(nèi)核時如果沒有設(shè)置CONFIG_AUDITSYSCALL, 則audit_getname為空 // 審計系統(tǒng)調(diào)用結(jié)果 audit_getname(result); return result; } static int do_getname(const char __user *filename, char *page) { int retval; unsigned long len = PATH_MAX; if (!segment_eq(get_fs(), KERNEL_DS)) {
if ((unsigned long) filename >= TASK_SIZE) return -EFAULT; if (TASK_SIZE - (unsigned long) filename < PATH_MAX) len = TASK_SIZE - (unsigned long) filename; } // 將用戶空間提供的文件名拷貝到cache中 retval = strncpy_from_user(page, filename, len); if (retval > 0) { if (retval < len) return 0; return -ENAMETOOLONG; } else if (!retval) retval = -ENOENT; return retval; } 3. namei相關(guān)數(shù)據(jù)結(jié)構(gòu)
/* include/linux/namei.h */
struct nameidata {
// 路徑點 struct dentry *dentry; // 虛擬系統(tǒng)掛接點 struct vfsmount *mnt; // 路徑名中的最后的文件名或目錄名 struct qstr last; unsigned int flags; int last_type; // 目錄深度 unsigned depth; char *saved_names[MAX_NESTED_LINKS + 1]; // 9 /* Intent data */
// 相關(guān)數(shù)據(jù) union { // 包含打開的文件的指針 struct open_intent open; } intent; }; struct open_intent { // 標(biāo)志 int flags; // 創(chuàng)建模式 int create_mode; // 文件指針 struct file *file; }; // 路徑結(jié)構(gòu), 屬于中間處理結(jié)構(gòu), 將文件系統(tǒng)掛接點和dentry捆綁在一起而已 struct path { struct vfsmount *mnt; struct dentry *dentry; }; /* include/linux/dcache.h */ // 文件目錄項, 在系統(tǒng)cache中
struct dentry { atomic_t d_count; unsigned int d_flags; /* protected by d_lock */ spinlock_t d_lock; /* per dentry lock */ struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ /* * The next three fields are touched by __d_lookup. Place them here * so they all fit in a cache line. */ struct hlist_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; struct list_head d_lru; /* LRU list */
/* * d_child and d_rcu can share memory */ union { struct list_head d_child; /* child of parent list */ struct rcu_head d_rcu; } d_u; struct list_head d_subdirs; /* our children */ struct list_head d_alias; /* inode alias list */ unsigned long d_time; /* used by d_revalidate */ struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ void *d_fsdata; /* fs-specific data */ #ifdef CONFIG_PROFILING struct dcookie_struct *d_cookie; /* cookie, if any */ #endif int d_mounted; unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; /* include/linux/fs.h */ // 文件結(jié)構(gòu) struct file { /* * fu_list becomes invalid after file_free is called and queued via * fu_rcuhead for RCU freeing */ union { struct list_head fu_list; struct rcu_head fu_rcuhead; } f_u; // 文件的dentry struct dentry *f_dentry; // 虛擬文件系統(tǒng)掛接點 struct vfsmount *f_vfsmnt; // 文件操作 const struct file_operations *f_op; atomic_t f_count; unsigned int f_flags; mode_t f_mode; loff_t f_pos; struct fown_struct f_owner; unsigned int f_uid, f_gid; struct file_ra_state f_ra; unsigned long f_version;
#ifdef CONFIG_SECURITY void *f_security; #endif /* needed for tty driver, and maybe others */ void *private_data; #ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */ struct list_head f_ep_links; spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; }; 4. namei操作
4.1 open_namei /* fs/namei.c */ /*
* open_namei() * * namei for open - this is in fact almost the whole open-routine. * * Note that the low bits of "flag" aren't the same as in the open * system call - they are 00 - no permissions needed * 01 - read permission needed * 10 - write permission needed * 11 - read/write permissions needed * which is a lot more logical, and also allows the "no perm" needed * for symlinks (where the permissions are checked later). * SMP-safe */ int open_namei(int dfd, const char *pathname, int flag, int mode, struct nameidata *nd) { int acc_mode, error; struct path path; struct dentry *dir; int count = 0; // #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
// 審計模式 acc_mode = ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */
// 截斷標(biāo)志, 基本上需要寫權(quán)限, 除非要截斷的長度實際大于文件本身長度 if (flag & O_TRUNC) acc_mode |= MAY_WRITE; /* Allow the LSM permission hook to distinguish append
access from general write access. */ // 添加標(biāo)志, 也是需要寫權(quán)限 if (flag & O_APPEND) acc_mode |= MAY_APPEND; /*
* The simplest case - just a plain lookup. */ // 不需要創(chuàng)建文件 if (!(flag & O_CREAT)) { // 直接找pathname的dentry和掛接點, 結(jié)果填在nd中 error = path_lookup_open(dfd, pathname, lookup_flags(flag), nd, flag); if (error) return error; goto ok; } /*
* Create - we need to know the parent. */ // 創(chuàng)建文件的dentry和掛接點, 數(shù)據(jù)填到nd中 error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode); if (error) return error; /*
* We have the parent and last component. First of all, check * that we are not asked to creat(2) an obvious directory - that * will not do. */ error = -EISDIR; // 檢查nameidata結(jié)構(gòu)中的last參數(shù)是否合法 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) goto exit; // 文件項dentry
dir = nd->dentry; // 去掉查詢父目錄標(biāo)志 nd->flags &= ~LOOKUP_PARENT; mutex_lock(&dir->d_inode->i_mutex); // 填充path參數(shù), 又根據(jù)nd的信息搜索一次當(dāng)前的緩存的dentry
// 不過dir與path.dentry難道不相同么? path.dentry = lookup_hash(nd); path.mnt = nd->mnt; do_last:
// 檢查path.entry是否合法 error = PTR_ERR(path.dentry); if (IS_ERR(path.dentry)) { mutex_unlock(&dir->d_inode->i_mutex); goto exit; } // 檢查nd->intent.open.file是否合法, 這是最終要返回的文件指針
if (IS_ERR(nd->intent.open.file)) { mutex_unlock(&dir->d_inode->i_mutex); error = PTR_ERR(nd->intent.open.file); goto exit_dput; } /* Negative dentry, just create the file */
if (!path.dentry->d_inode) { // 創(chuàng)建新文件的inode, 然后返回 error = open_namei_create(nd, &path, flag, mode); if (error) goto exit; return 0; } // 現(xiàn)在是打開已經(jīng)存在的文件
/* * It already exists. */ mutex_unlock(&dir->d_inode->i_mutex); audit_inode_update(path.dentry->d_inode); error = -EEXIST;
// O_EXCL標(biāo)志是只必須打開的是不存在的文件, 文件已存在時錯誤 if (flag & O_EXCL) goto exit_dput; if (__follow_mount(&path)) {
error = -ELOOP; if (flag & O_NOFOLLOW) goto exit_dput; } error = -ENOENT;
if (!path.dentry->d_inode) goto exit_dput; // 如果dentry的具體FS的實現(xiàn)中定義了follow_link操作, 轉(zhuǎn) // 不過大多數(shù)FS的實現(xiàn)中都沒有定義該函數(shù) if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) goto do_link; // 從路徑中的dentry和mnt信息賦值到nameidata path_to_nameidata(&path, nd); error = -EISDIR; // 如果是一個目錄, 返回錯誤 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) goto exit; ok: // 對nd中的dentry及其inode進(jìn)行打開前的錯誤檢查 error = may_open(nd, acc_mode, flag); if (error) goto exit; return 0; // 下面是錯誤處理, 釋放掉已分配的資源, 返回錯誤
exit_dput: dput_path(&path, nd); exit: if (!IS_ERR(nd->intent.open.file)) release_open_intent(nd); path_release(nd); return error; // 處理符號連接, 找到實際文件的inode,然后重新循環(huán), 要注意回環(huán)情況的錯誤處理
do_link: error = -ELOOP; if (flag & O_NOFOLLOW) goto exit_dput; /* * This is subtle. Instead of calling do_follow_link() we do the * thing by hands. The reason is that this way we have zero link_count * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. * After that we have the parent and last component, i.e. * we are in the same situation as after the first path_walk(). * Well, almost - if the last component is normal we get its copy * stored in nd->last.name and we will have to putname() it when we * are done. Procfs-like symlinks just set LAST_BIND. */ // 設(shè)置查找LOOKUP_PARENT標(biāo)志 nd->flags |= LOOKUP_PARENT; error = security_inode_follow_link(path.dentry, nd); if (error) goto exit_dput; // 處理符號鏈接 error = __do_follow_link(&path, nd); if (error) { /* Does someone understand code flow here? Or it is only
* me so stupid? Anathema to whoever designed this non-sense * with "intent.open". */ release_open_intent(nd); return error; } nd->flags &= ~LOOKUP_PARENT;
// 檢查最后一段文件或目錄名的屬性情況 if (nd->last_type == LAST_BIND) goto ok; error = -EISDIR; if (nd->last_type != LAST_NORM) goto exit; if (nd->last.name[nd->last.len]) { __putname(nd->last.name); goto exit; } error = -ELOOP; // 出現(xiàn)回環(huán)標(biāo)志: 循環(huán)超過32次 if (count++==32) { __putname(nd->last.name); goto exit; } dir = nd->dentry; mutex_lock(&dir->d_inode->i_mutex); // 更新路徑的掛接點和dentry path.dentry = lookup_hash(nd); path.mnt = nd->mnt; __putname(nd->last.name); goto do_last; } 4.2 path_lookup_open和path_lookup_create 這兩個函數(shù)找到路徑名對應(yīng)的掛接點和dentry結(jié)構(gòu), 賦值到nameidata結(jié)構(gòu)中, create時如果文件不存在, 建立新文件: /**
* path_lookup_open - lookup a file path with open intent * @dfd: the directory to use as base, or AT_FDCWD * @name: pointer to file name * @lookup_flags: lookup intent flags * @nd: pointer to nameidata * @open_flags: open intent flags */ int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, struct nameidata *nd, int open_flags) { return __path_lookup_intent_open(dfd, name, lookup_flags, nd, open_flags, 0); } /** * path_lookup_create - lookup a file path with open + create intent * @dfd: the directory to use as base, or AT_FDCWD * @name: pointer to file name * @lookup_flags: lookup intent flags * @nd: pointer to nameidata * @open_flags: open intent flags * @create_mode: create intent flags */ static int path_lookup_create(int dfd, const char *name, unsigned int lookup_flags, struct nameidata *nd, int open_flags, int create_mode) { return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE, nd, open_flags, create_mode); } 這兩個函數(shù)都是調(diào)用__path_lookup_intent_open, 只是參數(shù)不同,create中加入了LOOKUP_CREATE標(biāo)志和create_mode: static int __path_lookup_intent_open(int dfd, const char *name, unsigned int lookup_flags, struct nameidata *nd, int open_flags, int create_mode) { // 找一個空閑的文件指針 struct file *filp = get_empty_filp(); int err; // 找不到返回錯誤, 文件表溢出了
if (filp == NULL) return -ENFILE; // 在nameidate中填充打開的文件參數(shù), 這是最終會返回的文件指針 nd->intent.open.file = filp; nd->intent.open.flags = open_flags; nd->intent.open.create_mode = create_mode; // 進(jìn)行具體的路徑查找, name是路徑名 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); // 先檢查nd->intent.open.file而不是err if (IS_ERR(nd->intent.open.file)) { // 打開的文件指針錯誤 if (err == 0) { // do_path_lookup已經(jīng)成功了, 釋放path, err重新設(shè)置為錯誤值 err = PTR_ERR(nd->intent.open.file); path_release(nd); } } else if (err != 0) release_open_intent(nd); return err; } // 路徑查找 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ static int fastcall do_path_lookup(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { int retval = 0; int fput_needed; struct file *file; // 文件系統(tǒng)指針從進(jìn)程中獲取 struct fs_struct *fs = current->fs; // 缺省情況last_type為絕對路徑, 以"/"開頭的格式
nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; nd->depth = 0; // 下面只是用于增加某些變量的使用計數(shù)值, get是增加,put是減少
if (*name=='/') { // 絕對路徑格式 read_lock(&fs->lock); if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) { // 檢查是否更改了root, 即用chroot // 增加altrootmnt的使用計數(shù), 其為一vfsmount結(jié)構(gòu)指針 nd->mnt = mntget(fs->altrootmnt); nd->dentry = dget(fs->altroot); read_unlock(&fs->lock); if (__emul_lookup_dentry(name,nd)) goto out; /* found in altroot */ read_lock(&fs->lock); } // 增加rootmnt的使用計數(shù)然后賦值到nd中 nd->mnt = mntget(fs->rootmnt); // 增加根的dentry的使用計數(shù)然后賦值到nd中 nd->dentry = dget(fs->root); read_unlock(&fs->lock); } else if (dfd == AT_FDCWD) { // 從sys_open調(diào)用來的話會到這里, 表示從當(dāng)前工作目錄的路徑開始的相對路徑 read_lock(&fs->lock); // 增加pwdmnt使用計數(shù)然后賦值到nd中 nd->mnt = mntget(fs->pwdmnt); // 增加pwd使用計數(shù)然后賦值到nd中 nd->dentry = dget(fs->pwd); read_unlock(&fs->lock); } else { struct dentry *dentry; // 輕量級的路徑查找, fd不是共享的話不會增加引用計數(shù) file = fget_light(dfd, &fput_needed); retval = -EBADF; if (!file) goto out_fail; dentry = file->f_dentry;
retval = -ENOTDIR;
if (!S_ISDIR(dentry->d_inode->i_mode)) goto fput_fail; // 檢查文件的執(zhí)行權(quán)限 retval = file_permission(file, MAY_EXEC); if (retval) goto fput_fail; // 增加f_vfsmnt的使用計數(shù) nd->mnt = mntget(file->f_vfsmnt); nd->dentry = dget(dentry); // 輕量級釋放 fput_light(file, fput_needed); } // 清空總鏈接數(shù)
current->total_link_count = 0; // 變量路徑表查詢, 核心函數(shù) retval = link_path_walk(name, nd); out: if (likely(retval == 0)) { // 在大部分情況下都會執(zhí)行到這,能正確打開路徑 if (unlikely(!audit_dummy_context() && nd && nd->dentry && nd->dentry->d_inode)) audit_inode(name, nd->dentry->d_inode); } out_fail: return retval; fput_fail:
fput_light(file, fput_needed); goto out_fail; } do_path_lookup調(diào)用的核心函數(shù)是link_path_walk: /* * Wrapper to retry pathname resolution whenever the underlying * file system returns an ESTALE. * * Retry the whole path once, forcing real lookup requests * instead of relying on the dcache. */ int fastcall link_path_walk(const char *name, struct nameidata *nd) { // 先備份一下 struct nameidata save = *nd; int result; /* make sure the stuff we saved doesn't go away */
dget(save.dentry); mntget(save.mnt); result = __link_path_walk(name, nd);
if (result == -ESTALE) { // ESTALE是失效的文件句柄錯誤 // 用備份的nameidate重新恢復(fù), 設(shè)置LOOKUP_REVAL標(biāo)志后重新查詢 *nd = save; dget(nd->dentry); mntget(nd->mnt); nd->flags |= LOOKUP_REVAL; result = __link_path_walk(name, nd); } dput(save.dentry);
mntput(save.mnt); return result;
} 真正的名稱解析函數(shù)__link_path_walk: /*
* Name resolution. * This is the basic name resolution function, turning a pathname into * the final dentry. We expect 'base' to be positive and a directory. * * Returns 0 and nd will have valid dentry and mnt on success. * Returns error and drops reference to input namei data on failure. */ static fastcall int __link_path_walk(const char * name, struct nameidata *nd) { struct path next; struct inode *inode; int err; unsigned int lookup_flags = nd->flags; // 去掉起始多余的"/", 同時也說明系統(tǒng)可以允許你輸入多個"/"而不報錯
while (*name=='/') name++; // 空路徑 if (!*name) goto return_reval; // 路徑對應(yīng)的inode
inode = nd->dentry->d_inode; if (nd->depth) lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); /* At this point we know we have a real path component. */
for(;;) { // 循環(huán)處理,每個循環(huán)提取文件路徑的一個目錄名, '/'分隔 unsigned long hash; struct qstr this; unsigned int c; nd->flags |= LOOKUP_CONTINUE;
// 檢查文件權(quán)限, 包括讀寫執(zhí)行權(quán)限, 用戶/組/其他權(quán)限, 返回0為合法 err = exec_permission_lite(inode, nd); if (err == -EAGAIN) // EAGAIN表示該inode正在被操作, 檢查其執(zhí)行權(quán)限 // 而對于普通文件檢查結(jié)果將是錯誤 err = vfs_permission(nd, MAY_EXEC); // 出錯中斷循環(huán) if (err) break; // 填充quickstring結(jié)構(gòu) this.name = name; // name的第一個字符的數(shù)值 c = *(const unsigned char *)name; // 計算文件名的hash, 不包括'/'
hash = init_name_hash(); do { name++; hash = partial_name_hash(c, hash); c = *(const unsigned char *)name; } while (c && (c != '/')); // 目錄(如果有的話)的名稱長度 this.len = name - (const char *) this.name; // hash this.hash = end_name_hash(hash); /* remove trailing slashes? */
// c為0表示是最后的具體文件名了 if (!c) goto last_component; // 跳過中間的'/' while (*++name == '/'); // 到名稱尾, 說明文件名最后一個字符是'/' if (!*name) goto last_with_slashes; /*
* "." and ".." are special - ".." especially so because it has * to be able to know about the current root directory and * parent relationships. */ // 如果第一個字符是'.' if (this.name[0] == '.') switch (this.len) { default: // 是一個一'.'開頭的文件或目錄名稱 break; case 2: // 第2 個字符不是".", 是普通文件或路徑名 if (this.name[1] != '.') break; // 以".."開頭, 是父目錄, 更新nd為父目錄nameidata數(shù)據(jù), inode相應(yīng)更新重新循環(huán) follow_dotdot(nd); inode = nd->dentry->d_inode; /* fallthrough */ case 1: // 以'.'開頭的當(dāng)前目錄, 忽略, 重新循環(huán) continue; } /* * See if the low-level filesystem might want * to use its own hash.. */ // 底層FS實現(xiàn)中有自己的HASH算法 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { err = nd->dentry->d_op->d_hash(nd->dentry, &this); if (err < 0) break; } /* This does the actual lookups.. */ // 根據(jù)文件/目錄名進(jìn)行具體的查找 err = do_lookup(nd, &this, &next); if (err) break; err = -ENOENT;
// inode更新為本級文件目錄的inode inode = next.dentry->d_inode; // 找不到inode, 轉(zhuǎn)錯誤處理 if (!inode) goto out_dput; err = -ENOTDIR; if (!inode->i_op) goto out_dput; if (inode->i_op->follow_link) {
// 處理符號鏈接, 在其中考慮了遞歸互相鏈接的異常處理 err = do_follow_link(&next, nd); if (err) goto return_err; err = -ENOENT; // 更新inode為實際的inode inode = nd->dentry->d_inode; if (!inode) break; err = -ENOTDIR; if (!inode->i_op) break; } else // nd中得到下一級路徑信息 path_to_nameidata(&next, nd); err = -ENOTDIR; if (!inode->i_op->lookup) break; // 繼續(xù)循環(huán)找下一目錄文件名稱 continue; /* here ends the main loop */ // 最后的文件名了, 處理和前面類似
last_with_slashes: // 最后一個字符是'/', 是一個目錄 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; last_component: /* Clear LOOKUP_CONTINUE iff it was previously unset */ nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; if (lookup_flags & LOOKUP_PARENT) goto lookup_parent; if (this.name[0] == '.') switch (this.len) {
default: break; case 2: // 文件名不是"..", 繼續(xù) if (this.name[1] != '.') break; // 文件名是"..", 到父目錄 follow_dotdot(nd); inode = nd->dentry->d_inode; /* fallthrough */ case 1: // 文件名就是".", 跳到返回處理 goto return_reval; } // 一般文件處理 // 底層FS實現(xiàn)中有自己的HASH算法 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { err = nd->dentry->d_op->d_hash(nd->dentry, &this); if (err < 0) break; } // 查找最后的文件名 err = do_lookup(nd, &this, &next); if (err) break; inode = next.dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW) && inode && inode->i_op && inode->i_op->follow_link) { err = do_follow_link(&next, nd); if (err) goto return_err; inode = nd->dentry->d_inode; } else // 更新nameidata中的mnt, dentry值 path_to_nameidata(&next, nd); err = -ENOENT; if (!inode) break; if (lookup_flags & LOOKUP_DIRECTORY) { err = -ENOTDIR; if (!inode->i_op || !inode->i_op->lookup) break; } goto return_base; lookup_parent: // 復(fù)制當(dāng)前quickstring結(jié)構(gòu)this信息到nd的last中 // 類型為LAST_NORM nd->last = this; nd->last_type = LAST_NORM; if (this.name[0] != '.') goto return_base; if (this.len == 1) nd->last_type = LAST_DOT; else if (this.len == 2 && this.name[1] == '.') nd->last_type = LAST_DOTDOT; else goto return_base; return_reval: // 返回 /* * We bypassed the ordinary revalidation routines. * We may need to check the cached dentry for staleness. */ if (nd->dentry && nd->dentry->d_sb && (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { err = -ESTALE; /* Note: we do not d_invalidate() */ if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd)) break; } return_base: return 0; out_dput: dput_path(&next, nd); break; } // 到這里屬于出錯了 path_release(nd); return_err: return err; } /*
* It's more convoluted than I'd like it to be, but... it's still fairly * small and for now I'd prefer to have fast path as straight as possible. * It _is_ time-critical. */ static int do_lookup(struct nameidata *nd, struct qstr *name, struct path *path) { struct vfsmount *mnt = nd->mnt; // 從系統(tǒng)緩存的dentry的hash表中查找父dentry是nd->dentry, 名稱為name的dentry struct dentry *dentry = __d_lookup(nd->dentry, name); // 沒找到dentry, 進(jìn)行真正從存儲硬盤中查找
if (!dentry) goto need_lookup; // 需要進(jìn)行revalidate操作時先進(jìn)行validate操作 if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; done: // 找到, 填充path參數(shù): 掛接點mnt和目錄項dentry path->mnt = mnt; path->dentry = dentry; __follow_mount(path); return 0; need_lookup:
// 進(jìn)行真正的查找, 不過read_lookup會重新調(diào)用__d_lookup, 找不到才調(diào)用底層的fs實現(xiàn)去查找 // 好象是重復(fù)操作了 // real_lookup中的操作才反映了各個fs底層和相關(guān)標(biāo)志的區(qū)別處理 dentry = real_lookup(nd->dentry, name, nd); if (IS_ERR(dentry)) goto fail; goto done; need_revalidate:
// 進(jìn)行validate操作 dentry = do_revalidate(dentry, nd); if (!dentry) goto need_lookup; if (IS_ERR(dentry)) goto fail; goto done; fail:
return PTR_ERR(dentry); } /* * This is called when everything else fails, and we actually have * to go to the low-level filesystem to find out what we should do.. * * We get the directory semaphore, and after getting that we also * make sure that nobody added the entry to the dcache in the meantime.. * SMP-safe */ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) { struct dentry * result; struct inode *dir = parent->d_inode; mutex_lock(&dir->i_mutex);
/* * First re-do the cached lookup just in case it was created * while we waited for the directory semaphore.. * * FIXME! This could use version numbering or similar to * avoid unnecessary cache lookups. * * The "dcache_lock" is purely to protect the RCU list walker * from concurrent renames at this point (we mustn't get false * negatives from the RCU list walk here, unlike the optimistic * fast walk). * * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup */ // 查找緩存中的dentry項 result = d_lookup(parent, name); if (!result) { // 沒找到, 新建dentry項 struct dentry * dentry = d_alloc(parent, name); result = ERR_PTR(-ENOMEM); if (dentry) { // 調(diào)用inode的查找操作, 這是和具體文件系統(tǒng)相關(guān) result = dir->i_op->lookup(dir, dentry, nd); if (result) // 失敗, 釋放dentry dput(dentry); else // 成功, 找到的dentry作為結(jié)果返回 result = dentry; } mutex_unlock(&dir->i_mutex); return result; } /*
* Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ // 在緩存中找到dentry項, 進(jìn)行validate操作 mutex_unlock(&dir->i_mutex); if (result->d_op && result->d_op->d_revalidate) { result = do_revalidate(result, nd); if (!result) result = ERR_PTR(-ENOENT); } return result; } 小結(jié)一下函數(shù)調(diào)用順序: path_lookup_open path_lookup_create
| | V V __path_lookup_intent_open | V do_path_lookup | V link_path_walk | V __link_path_walk | V do_lookup | V real_lookup 這些函數(shù)操作都屬于虛擬文件系統(tǒng)操作, 對所有類型的文件系統(tǒng)都適用, 而從各個FS的具體實現(xiàn)才能看出差異和相關(guān)標(biāo)志的作用. 4.3 open_namei_create static int open_namei_create(struct nameidata *nd, struct path *path, int flag, int mode) { int error; // nd當(dāng)前的dentry struct dentry *dir = nd->dentry; if (!IS_POSIXACL(dir->d_inode))
mode &= ~current->fs->umask; error = vfs_create(dir->d_inode, path->dentry, mode, nd);
mutex_unlock(&dir->d_inode->i_mutex); dput(nd->dentry); nd->dentry = path->dentry; if (error) return error; /* Don't check for write permission, don't truncate */ return may_open(nd, 0, flag & ~O_TRUNC); } 4.4 path_to_nameidata // 將路徑參數(shù)賦值到nameidata結(jié)構(gòu)中 static inline void path_to_nameidata(struct path *path, struct nameidata *nd) { // 釋放原來的目錄項 dput(nd->dentry); // 如果掛接點也不同,釋放掉原來的 if (nd->mnt != path->mnt) mntput(nd->mnt); // 將新路徑參數(shù)賦值到nameidata結(jié)構(gòu)中 nd->mnt = path->mnt; nd->dentry = path->dentry; } 5. 結(jié)論 打開文件時, 目的是要生成一個struct file的結(jié)構(gòu)的指針, 該結(jié)構(gòu)中有相關(guān)文件名的名稱, dentry指針, 掛接點文件系統(tǒng)等信息, 而struct nameidata作為一個中間過程結(jié)構(gòu)保存相關(guān)的處理結(jié)果, 最終返回需要的文件信息,。 |
|