久久国产成人av_抖音国产毛片_a片网站免费观看_A片无码播放手机在线观看,色五月在线观看,亚洲精品m在线观看,女人自慰的免费网址,悠悠在线观看精品视频,一级日本片免费的,亚洲精品久,国产精品成人久久久久久久

分享

sys_open分析,從文件名找到文件信息(namei)

 szhlwang 2011-06-30
本文檔的Copyleft歸yfydz所有,,使用GPL發(fā)布,,可以自由拷貝,轉(zhuǎn)載,,轉(zhuǎn)載時請保持文檔的完整性,,嚴(yán)禁用于任何商業(yè)用途。
msn: [email protected]
來源:http://yfydz.

1. 前言

inode是類Unix系統(tǒng)的文件系統(tǒng)的基本索引方法,,每個文件都對應(yīng)一個inode,,再通過inode找到文件中的實際數(shù)據(jù),因此根據(jù)文件路徑名找到具體的inode節(jié)點就是一個很重要的處理步驟,。系統(tǒng)會緩存用過的每個文件或目錄對應(yīng)的dentry結(jié)構(gòu), 從該結(jié)構(gòu)可以指向相應(yīng)的inode, 每次打開文件, 都會最終對應(yīng)到文件的inode,,中間查找過程稱為namei。

本文介紹Linux下的路徑到文件指針的轉(zhuǎn)換過程,內(nèi)核版本為2.6.19.2,。

虛擬文件系統(tǒng)的轉(zhuǎn)換源代碼在fs/namei.c中,,具體和文件系統(tǒng)相關(guān)的部分在fs/*/namei.c文件中。

2. 引子

由于這種轉(zhuǎn)換是一個中間過程,,在具體分析namei處理前,,先看看系統(tǒng)的調(diào)用順序是如何進(jìn)入轉(zhuǎn)換的:
當(dāng)用戶空間程序用open系統(tǒng)調(diào)用打開一個文件時,內(nèi)核對應(yīng)的處理是sys_open:
/* fs/open.c */
asmlinkage long sys_open(const char __user *filename, int flags, int mode)
{
 long ret;
 if (force_o_largefile())
  flags |= O_LARGEFILE;
 ret = do_sys_open(AT_FDCWD, filename, flags, mode);
 /* avoid REGPARM breakage on x86: */
 prevent_tail_call(ret);
 return ret;
}
真正的打開函數(shù)是do_sys_open:
/* fs/open.c */
// dfd為AT_FDCWD
long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
{
// 通過該函數(shù)將用戶空間的文件名傳遞到內(nèi)核
// tmp是一個cache類的動態(tài)內(nèi)存空間,用于保存文件路徑名
//
 char *tmp = getname(filename);
 int fd = PTR_ERR(tmp);
 if (!IS_ERR(tmp)) {
// 獲取一個未使用的文件描述符, 和inode無關(guān)
  fd = get_unused_fd();
  if (fd >= 0) {
// 打開文件,將文件名轉(zhuǎn)換為文件結(jié)構(gòu)
   struct file *f = do_filp_open(dfd, tmp, flags, mode);
   if (IS_ERR(f)) {
    put_unused_fd(fd);
    fd = PTR_ERR(f);
   } else {
    fsnotify_open(f->f_dentry);
    fd_install(fd, f);
   }
  }
  putname(tmp);
 }
 return fd;
}

// 文件打開
static struct file *do_filp_open(int dfd, const char *filename, int flags,
     int mode)
{
 int namei_flags, error;
// 注意這是結(jié)構(gòu)而不是指針
 struct nameidata nd;
 namei_flags = flags;
 if ((namei_flags+1) & O_ACCMODE)
  namei_flags++;
// 根據(jù)文件名得到nameidata, nd作為namei空間保存結(jié)果
 error = open_namei(dfd, filename, namei_flags, mode, &nd);
 if (!error)
// 成功, nameidata再轉(zhuǎn)換為file指針
  return nameidata_to_filp(&nd, flags);
 return ERR_PTR(error);
}

因此重點函數(shù)是open_namei函數(shù), 實現(xiàn)了從文件名到inode的轉(zhuǎn)換, 也是namei的處理入口.

在分析open_namei前, 再分析一下getname, 這用到了kmem_cache來處理的:
// 文件名轉(zhuǎn)換, 從用戶空間拷貝到內(nèi)核空間
/* fs/namei.c */
char * getname(const char __user * filename)
{
 char *tmp, *result;
 result = ERR_PTR(-ENOMEM);
/* include/linux/fs.h */
// __getname和__putname的定義,,實際就是內(nèi)核cache的分配和釋放
// #define __getname() kmem_cache_alloc(names_cachep, SLAB_KERNEL)
// #define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
// 這里實際是分配names的cache, 該cache定義為
//  names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
//   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 tmp = __getname();
 if (tmp)  {
// cache分配成功
// 進(jìn)入實際操作函數(shù)
  int retval = do_getname(filename, tmp);
// 要返回結(jié)果指向cache
  result = tmp;
  if (retval < 0) {
// 操作失敗,釋放cache,返回錯誤
   __putname(tmp);
   result = ERR_PTR(retval);
  }
 }
// 編譯內(nèi)核時如果沒有設(shè)置CONFIG_AUDITSYSCALL, 則audit_getname為空
// 審計系統(tǒng)調(diào)用結(jié)果
 audit_getname(result);
 return result;
}

static int do_getname(const char __user *filename, char *page)
{
 int retval;
 unsigned long len = PATH_MAX;
 if (!segment_eq(get_fs(), KERNEL_DS)) {
  if ((unsigned long) filename >= TASK_SIZE)
   return -EFAULT;
  if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
   len = TASK_SIZE - (unsigned long) filename;
 }
// 將用戶空間提供的文件名拷貝到cache中
 retval = strncpy_from_user(page, filename, len);
 if (retval > 0) {
  if (retval < len)
   return 0;
  return -ENAMETOOLONG;
 } else if (!retval)
  retval = -ENOENT;
 return retval;
}
 
3. namei相關(guān)數(shù)據(jù)結(jié)構(gòu)
/* include/linux/namei.h */
struct nameidata {
// 路徑點
 struct dentry *dentry;
// 虛擬系統(tǒng)掛接點
 struct vfsmount *mnt;
// 路徑名中的最后的文件名或目錄名
 struct qstr last;
 unsigned int flags;
 int  last_type;
// 目錄深度
 unsigned depth;
 char *saved_names[MAX_NESTED_LINKS + 1]; // 9
 /* Intent data */
// 相關(guān)數(shù)據(jù)
 union {
// 包含打開的文件的指針
  struct open_intent open;
 } intent;
};

struct open_intent {
// 標(biāo)志
 int flags;
// 創(chuàng)建模式
 int create_mode;
// 文件指針
 struct file *file;
};

// 路徑結(jié)構(gòu), 屬于中間處理結(jié)構(gòu), 將文件系統(tǒng)掛接點和dentry捆綁在一起而已
struct path {
 struct vfsmount *mnt;
 struct dentry *dentry;
};

/* include/linux/dcache.h */
// 文件目錄項, 在系統(tǒng)cache中
struct dentry {
 atomic_t d_count;
 unsigned int d_flags;  /* protected by d_lock */
 spinlock_t d_lock;  /* per dentry lock */
 struct inode *d_inode;  /* Where the name belongs to - NULL is
      * negative */
 /*
  * The next three fields are touched by __d_lookup.  Place them here
  * so they all fit in a cache line.
  */
 struct hlist_node d_hash; /* lookup hash list */
 struct dentry *d_parent; /* parent directory */
 struct qstr d_name;
 struct list_head d_lru;  /* LRU list */
 /*
  * d_child and d_rcu can share memory
  */
 union {
  struct list_head d_child; /* child of parent list */
   struct rcu_head d_rcu;
 } d_u;
 struct list_head d_subdirs; /* our children */
 struct list_head d_alias; /* inode alias list */
 unsigned long d_time;  /* used by d_revalidate */
 struct dentry_operations *d_op;
 struct super_block *d_sb; /* The root of the dentry tree */
 void *d_fsdata;   /* fs-specific data */
#ifdef CONFIG_PROFILING
 struct dcookie_struct *d_cookie; /* cookie, if any */
#endif
 int d_mounted;
 unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
};

/* include/linux/fs.h */
// 文件結(jié)構(gòu)
struct file {
 /*
  * fu_list becomes invalid after file_free is called and queued via
  * fu_rcuhead for RCU freeing
  */
 union {
  struct list_head fu_list;
  struct rcu_head  fu_rcuhead;
 } f_u;
// 文件的dentry
 struct dentry  *f_dentry;
// 虛擬文件系統(tǒng)掛接點
 struct vfsmount         *f_vfsmnt;
// 文件操作
 const struct file_operations *f_op;
 atomic_t  f_count;
 unsigned int   f_flags;
 mode_t   f_mode;
 loff_t   f_pos;
 struct fown_struct f_owner;
 unsigned int  f_uid, f_gid;
 struct file_ra_state f_ra;
 unsigned long  f_version;
#ifdef CONFIG_SECURITY
 void   *f_security;
#endif
 /* needed for tty driver, and maybe others */
 void   *private_data;
#ifdef CONFIG_EPOLL
 /* Used by fs/eventpoll.c to link all the hooks to this file */
 struct list_head f_ep_links;
 spinlock_t  f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
 struct address_space *f_mapping;
};
 
4. namei操作

4.1 open_namei

/* fs/namei.c */
/*
 * open_namei()
 *
 * namei for open - this is in fact almost the whole open-routine.
 *
 * Note that the low bits of "flag" aren't the same as in the open
 * system call - they are 00 - no permissions needed
 *     01 - read permission needed
 *     10 - write permission needed
 *     11 - read/write permissions needed
 * which is a lot more logical, and also allows the "no perm" needed
 * for symlinks (where the permissions are checked later).
 * SMP-safe
 */
int open_namei(int dfd, const char *pathname, int flag,
  int mode, struct nameidata *nd)
{
 int acc_mode, error;
 struct path path;
 struct dentry *dir;
 int count = 0;
// #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
// 審計模式
 acc_mode = ACC_MODE(flag);
 /* O_TRUNC implies we need access checks for write permissions */
// 截斷標(biāo)志, 基本上需要寫權(quán)限, 除非要截斷的長度實際大于文件本身長度
 if (flag & O_TRUNC)
  acc_mode |= MAY_WRITE;
 /* Allow the LSM permission hook to distinguish append
    access from general write access. */
// 添加標(biāo)志, 也是需要寫權(quán)限
 if (flag & O_APPEND)
  acc_mode |= MAY_APPEND;
 /*
  * The simplest case - just a plain lookup.
  */
// 不需要創(chuàng)建文件
 if (!(flag & O_CREAT)) {
// 直接找pathname的dentry和掛接點, 結(jié)果填在nd中
  error = path_lookup_open(dfd, pathname, lookup_flags(flag),
      nd, flag);
  if (error)
   return error;
  goto ok;
 }
 /*
  * Create - we need to know the parent.
  */
// 創(chuàng)建文件的dentry和掛接點, 數(shù)據(jù)填到nd中
 error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
 if (error)
  return error;
 /*
  * We have the parent and last component. First of all, check
  * that we are not asked to creat(2) an obvious directory - that
  * will not do.
  */
 error = -EISDIR;
// 檢查nameidata結(jié)構(gòu)中的last參數(shù)是否合法
 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
  goto exit;
// 文件項dentry
 dir = nd->dentry;
// 去掉查詢父目錄標(biāo)志
 nd->flags &= ~LOOKUP_PARENT;
 mutex_lock(&dir->d_inode->i_mutex);
// 填充path參數(shù), 又根據(jù)nd的信息搜索一次當(dāng)前的緩存的dentry
// 不過dir與path.dentry難道不相同么?
 path.dentry = lookup_hash(nd);
 path.mnt = nd->mnt;
do_last:
// 檢查path.entry是否合法
 error = PTR_ERR(path.dentry);
 if (IS_ERR(path.dentry)) {
  mutex_unlock(&dir->d_inode->i_mutex);
  goto exit;
 }
// 檢查nd->intent.open.file是否合法, 這是最終要返回的文件指針
 if (IS_ERR(nd->intent.open.file)) {
  mutex_unlock(&dir->d_inode->i_mutex);
  error = PTR_ERR(nd->intent.open.file);
  goto exit_dput;
 }
 /* Negative dentry, just create the file */
 if (!path.dentry->d_inode) {
// 創(chuàng)建新文件的inode, 然后返回
  error = open_namei_create(nd, &path, flag, mode);
  if (error)
   goto exit;
  return 0;
 }
// 現(xiàn)在是打開已經(jīng)存在的文件
 /*
  * It already exists.
  */
 mutex_unlock(&dir->d_inode->i_mutex);
 audit_inode_update(path.dentry->d_inode);
 error = -EEXIST;
// O_EXCL標(biāo)志是只必須打開的是不存在的文件, 文件已存在時錯誤
 if (flag & O_EXCL)
  goto exit_dput;
 if (__follow_mount(&path)) {
  error = -ELOOP;
  if (flag & O_NOFOLLOW)
   goto exit_dput;
 }
 error = -ENOENT;
 if (!path.dentry->d_inode)
  goto exit_dput;
// 如果dentry的具體FS的實現(xiàn)中定義了follow_link操作, 轉(zhuǎn)
// 不過大多數(shù)FS的實現(xiàn)中都沒有定義該函數(shù)
 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
  goto do_link;
// 從路徑中的dentry和mnt信息賦值到nameidata
 path_to_nameidata(&path, nd);
 error = -EISDIR;
// 如果是一個目錄, 返回錯誤
 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
  goto exit;
ok:
// 對nd中的dentry及其inode進(jìn)行打開前的錯誤檢查
 error = may_open(nd, acc_mode, flag);
 if (error)
  goto exit;
 return 0;
// 下面是錯誤處理, 釋放掉已分配的資源, 返回錯誤
exit_dput:
 dput_path(&path, nd);
exit:
 if (!IS_ERR(nd->intent.open.file))
  release_open_intent(nd);
 path_release(nd);
 return error;
// 處理符號連接, 找到實際文件的inode,然后重新循環(huán), 要注意回環(huán)情況的錯誤處理
do_link:
 error = -ELOOP;
 if (flag & O_NOFOLLOW)
  goto exit_dput;
 /*
  * This is subtle. Instead of calling do_follow_link() we do the
  * thing by hands. The reason is that this way we have zero link_count
  * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
  * After that we have the parent and last component, i.e.
  * we are in the same situation as after the first path_walk().
  * Well, almost - if the last component is normal we get its copy
  * stored in nd->last.name and we will have to putname() it when we
  * are done. Procfs-like symlinks just set LAST_BIND.
  */
// 設(shè)置查找LOOKUP_PARENT標(biāo)志
 nd->flags |= LOOKUP_PARENT;
 error = security_inode_follow_link(path.dentry, nd);
 if (error)
  goto exit_dput;
// 處理符號鏈接
 error = __do_follow_link(&path, nd);
 if (error) {
  /* Does someone understand code flow here? Or it is only
   * me so stupid? Anathema to whoever designed this non-sense
   * with "intent.open".
   */
  release_open_intent(nd);
  return error;
 }
 nd->flags &= ~LOOKUP_PARENT;
// 檢查最后一段文件或目錄名的屬性情況
 if (nd->last_type == LAST_BIND)
  goto ok;
 error = -EISDIR;
 if (nd->last_type != LAST_NORM)
  goto exit;
 if (nd->last.name[nd->last.len]) {
  __putname(nd->last.name);
  goto exit;
 }
 error = -ELOOP;
// 出現(xiàn)回環(huán)標(biāo)志: 循環(huán)超過32次
 if (count++==32) {
  __putname(nd->last.name);
  goto exit;
 }
 dir = nd->dentry;
 mutex_lock(&dir->d_inode->i_mutex);
// 更新路徑的掛接點和dentry
 path.dentry = lookup_hash(nd);
 path.mnt = nd->mnt;
 __putname(nd->last.name);
 goto do_last;
}

4.2  path_lookup_open和path_lookup_create

這兩個函數(shù)找到路徑名對應(yīng)的掛接點和dentry結(jié)構(gòu), 賦值到nameidata結(jié)構(gòu)中, create時如果文件不存在, 建立新文件:
/**
 * path_lookup_open - lookup a file path with open intent
 * @dfd: the directory to use as base, or AT_FDCWD
 * @name: pointer to file name
 * @lookup_flags: lookup intent flags
 * @nd: pointer to nameidata
 * @open_flags: open intent flags
 */
int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
  struct nameidata *nd, int open_flags)
{
 return __path_lookup_intent_open(dfd, name, lookup_flags, nd,
   open_flags, 0);
}

/**
 * path_lookup_create - lookup a file path with open + create intent
 * @dfd: the directory to use as base, or AT_FDCWD
 * @name: pointer to file name
 * @lookup_flags: lookup intent flags
 * @nd: pointer to nameidata
 * @open_flags: open intent flags
 * @create_mode: create intent flags
 */
static int path_lookup_create(int dfd, const char *name,
         unsigned int lookup_flags, struct nameidata *nd,
         int open_flags, int create_mode)
{
 return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE,
   nd, open_flags, create_mode);
}

這兩個函數(shù)都是調(diào)用__path_lookup_intent_open, 只是參數(shù)不同,create中加入了LOOKUP_CREATE標(biāo)志和create_mode:

static int __path_lookup_intent_open(int dfd, const char *name,
  unsigned int lookup_flags, struct nameidata *nd,
  int open_flags, int create_mode)
{
// 找一個空閑的文件指針
 struct file *filp = get_empty_filp();
 int err;
// 找不到返回錯誤, 文件表溢出了
 if (filp == NULL)
  return -ENFILE;
// 在nameidate中填充打開的文件參數(shù), 這是最終會返回的文件指針
 nd->intent.open.file = filp;
 nd->intent.open.flags = open_flags;
 nd->intent.open.create_mode = create_mode;
// 進(jìn)行具體的路徑查找, name是路徑名
 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
// 先檢查nd->intent.open.file而不是err
 if (IS_ERR(nd->intent.open.file)) {
// 打開的文件指針錯誤
  if (err == 0) {
// do_path_lookup已經(jīng)成功了, 釋放path, err重新設(shè)置為錯誤值
   err = PTR_ERR(nd->intent.open.file);
   path_release(nd);
  }
 } else if (err != 0)
  release_open_intent(nd);
 return err;
}

// 路徑查找
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
static int fastcall do_path_lookup(int dfd, const char *name,
    unsigned int flags, struct nameidata *nd)
{
 int retval = 0;
 int fput_needed;
 struct file *file;
// 文件系統(tǒng)指針從進(jìn)程中獲取
 struct fs_struct *fs = current->fs;
// 缺省情況last_type為絕對路徑, 以"/"開頭的格式
 nd->last_type = LAST_ROOT; /* if there are only slashes... */
 nd->flags = flags;
 nd->depth = 0;
// 下面只是用于增加某些變量的使用計數(shù)值, get是增加,put是減少
 if (*name=='/') {
// 絕對路徑格式
  read_lock(&fs->lock);
  if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
// 檢查是否更改了root, 即用chroot
// 增加altrootmnt的使用計數(shù), 其為一vfsmount結(jié)構(gòu)指針
   nd->mnt = mntget(fs->altrootmnt);
   nd->dentry = dget(fs->altroot);
   read_unlock(&fs->lock);
   if (__emul_lookup_dentry(name,nd))
    goto out; /* found in altroot */
   read_lock(&fs->lock);
  }
// 增加rootmnt的使用計數(shù)然后賦值到nd中
  nd->mnt = mntget(fs->rootmnt);
// 增加根的dentry的使用計數(shù)然后賦值到nd中
  nd->dentry = dget(fs->root);
  read_unlock(&fs->lock);
 } else if (dfd == AT_FDCWD) {
// 從sys_open調(diào)用來的話會到這里, 表示從當(dāng)前工作目錄的路徑開始的相對路徑
  read_lock(&fs->lock);
// 增加pwdmnt使用計數(shù)然后賦值到nd中
  nd->mnt = mntget(fs->pwdmnt);
// 增加pwd使用計數(shù)然后賦值到nd中
  nd->dentry = dget(fs->pwd);
  read_unlock(&fs->lock);
 } else {
  struct dentry *dentry;
// 輕量級的路徑查找, fd不是共享的話不會增加引用計數(shù)
  file = fget_light(dfd, &fput_needed);
  retval = -EBADF;
  if (!file)
   goto out_fail;
  dentry = file->f_dentry;
  retval = -ENOTDIR;
  if (!S_ISDIR(dentry->d_inode->i_mode))
   goto fput_fail;
// 檢查文件的執(zhí)行權(quán)限
  retval = file_permission(file, MAY_EXEC);
  if (retval)
   goto fput_fail;
// 增加f_vfsmnt的使用計數(shù)
  nd->mnt = mntget(file->f_vfsmnt);
  nd->dentry = dget(dentry);
// 輕量級釋放
  fput_light(file, fput_needed);
 }
// 清空總鏈接數(shù)
 current->total_link_count = 0;
// 變量路徑表查詢, 核心函數(shù)
 retval = link_path_walk(name, nd);
out:
 if (likely(retval == 0)) {
// 在大部分情況下都會執(zhí)行到這,能正確打開路徑
  if (unlikely(!audit_dummy_context() && nd && nd->dentry &&
    nd->dentry->d_inode))
  audit_inode(name, nd->dentry->d_inode);
 }
out_fail:
 return retval;
fput_fail:
 fput_light(file, fput_needed);
 goto out_fail;
}

do_path_lookup調(diào)用的核心函數(shù)是link_path_walk:

/*
 * Wrapper to retry pathname resolution whenever the underlying
 * file system returns an ESTALE.
 *
 * Retry the whole path once, forcing real lookup requests
 * instead of relying on the dcache.
 */
int fastcall link_path_walk(const char *name, struct nameidata *nd)
{
// 先備份一下
 struct nameidata save = *nd;
 int result;
 /* make sure the stuff we saved doesn't go away */
 dget(save.dentry);
 mntget(save.mnt);
 result = __link_path_walk(name, nd);
 if (result == -ESTALE) {
// ESTALE是失效的文件句柄錯誤
// 用備份的nameidate重新恢復(fù), 設(shè)置LOOKUP_REVAL標(biāo)志后重新查詢
  *nd = save;
  dget(nd->dentry);
  mntget(nd->mnt);
  nd->flags |= LOOKUP_REVAL;
  result = __link_path_walk(name, nd);
 }
 dput(save.dentry);
 mntput(save.mnt);
 return result;
}

真正的名稱解析函數(shù)__link_path_walk:
/*
 * Name resolution.
 * This is the basic name resolution function, turning a pathname into
 * the final dentry. We expect 'base' to be positive and a directory.
 *
 * Returns 0 and nd will have valid dentry and mnt on success.
 * Returns error and drops reference to input namei data on failure.
 */
static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
{
 struct path next;
 struct inode *inode;
 int err;
 unsigned int lookup_flags = nd->flags;
// 去掉起始多余的"/", 同時也說明系統(tǒng)可以允許你輸入多個"/"而不報錯
 while (*name=='/')
  name++;
// 空路徑
 if (!*name)
  goto return_reval;
// 路徑對應(yīng)的inode
 inode = nd->dentry->d_inode;
 if (nd->depth)
  lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
 /* At this point we know we have a real path component. */
 for(;;) {
// 循環(huán)處理,每個循環(huán)提取文件路徑的一個目錄名, '/'分隔
  unsigned long hash;
  struct qstr this;
  unsigned int c;
  nd->flags |= LOOKUP_CONTINUE;
// 檢查文件權(quán)限, 包括讀寫執(zhí)行權(quán)限, 用戶/組/其他權(quán)限, 返回0為合法
  err = exec_permission_lite(inode, nd);
  if (err == -EAGAIN)
// EAGAIN表示該inode正在被操作, 檢查其執(zhí)行權(quán)限
// 而對于普通文件檢查結(jié)果將是錯誤
   err = vfs_permission(nd, MAY_EXEC);
// 出錯中斷循環(huán)
   if (err)
   break;
// 填充quickstring結(jié)構(gòu)
  this.name = name;
// name的第一個字符的數(shù)值
  c = *(const unsigned char *)name;
// 計算文件名的hash, 不包括'/'
  hash = init_name_hash();
  do {
   name++;
   hash = partial_name_hash(c, hash);
   c = *(const unsigned char *)name;
  } while (c && (c != '/'));
// 目錄(如果有的話)的名稱長度
  this.len = name - (const char *) this.name;
// hash
  this.hash = end_name_hash(hash);
  /* remove trailing slashes? */
// c為0表示是最后的具體文件名了
  if (!c)
   goto last_component;
// 跳過中間的'/'
  while (*++name == '/');
// 到名稱尾, 說明文件名最后一個字符是'/'
  if (!*name)
   goto last_with_slashes;
  /*
   * "." and ".." are special - ".." especially so because it has
   * to be able to know about the current root directory and
   * parent relationships.
   */
// 如果第一個字符是'.'
  if (this.name[0] == '.') switch (this.len) {
   default:
// 是一個一'.'開頭的文件或目錄名稱
    break;
   case 2: 
// 第2 個字符不是".", 是普通文件或路徑名
    if (this.name[1] != '.')
     break;
// 以".."開頭, 是父目錄, 更新nd為父目錄nameidata數(shù)據(jù), inode相應(yīng)更新重新循環(huán)
    follow_dotdot(nd);
    inode = nd->dentry->d_inode;
    /* fallthrough */
   case 1:
// 以'.'開頭的當(dāng)前目錄, 忽略, 重新循環(huán)
    continue;
  }
  /*
   * See if the low-level filesystem might want
   * to use its own hash..
   */
// 底層FS實現(xiàn)中有自己的HASH算法
  if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
   err = nd->dentry->d_op->d_hash(nd->dentry, &this);
   if (err < 0)
    break;
  }
  /* This does the actual lookups.. */
// 根據(jù)文件/目錄名進(jìn)行具體的查找
  err = do_lookup(nd, &this, &next);
  if (err)
   break;
  err = -ENOENT;
// inode更新為本級文件目錄的inode
  inode = next.dentry->d_inode;
// 找不到inode, 轉(zhuǎn)錯誤處理
  if (!inode)
   goto out_dput;
  err = -ENOTDIR;
  if (!inode->i_op)
   goto out_dput;
  if (inode->i_op->follow_link) {
// 處理符號鏈接, 在其中考慮了遞歸互相鏈接的異常處理
   err = do_follow_link(&next, nd);
   if (err)
    goto return_err;
   err = -ENOENT;
// 更新inode為實際的inode
   inode = nd->dentry->d_inode;
   if (!inode)
    break;
   err = -ENOTDIR;
   if (!inode->i_op)
    break;
  } else
// nd中得到下一級路徑信息
   path_to_nameidata(&next, nd);
  err = -ENOTDIR;
  if (!inode->i_op->lookup)
   break;
// 繼續(xù)循環(huán)找下一目錄文件名稱
  continue;
  /* here ends the main loop */
// 最后的文件名了, 處理和前面類似
last_with_slashes:
// 最后一個字符是'/', 是一個目錄
  lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
last_component:
  /* Clear LOOKUP_CONTINUE iff it was previously unset */
  nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
  if (lookup_flags & LOOKUP_PARENT)
   goto lookup_parent;
  if (this.name[0] == '.') switch (this.len) {
   default:
    break;
   case 2: 
// 文件名不是"..", 繼續(xù)
    if (this.name[1] != '.')
     break;
// 文件名是"..", 到父目錄
    follow_dotdot(nd);
    inode = nd->dentry->d_inode;
    /* fallthrough */
   case 1:
// 文件名就是".", 跳到返回處理
    goto return_reval;
  }
// 一般文件處理
// 底層FS實現(xiàn)中有自己的HASH算法
  if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
   err = nd->dentry->d_op->d_hash(nd->dentry, &this);
   if (err < 0)
    break;
  }
// 查找最后的文件名
  err = do_lookup(nd, &this, &next);
  if (err)
   break;
  inode = next.dentry->d_inode;
  if ((lookup_flags & LOOKUP_FOLLOW)
      && inode && inode->i_op && inode->i_op->follow_link) {
   err = do_follow_link(&next, nd);
   if (err)
    goto return_err;
   inode = nd->dentry->d_inode;
  } else
// 更新nameidata中的mnt, dentry值
   path_to_nameidata(&next, nd);
  err = -ENOENT;
  if (!inode)
   break;
  if (lookup_flags & LOOKUP_DIRECTORY) {
   err = -ENOTDIR;
   if (!inode->i_op || !inode->i_op->lookup)
    break;
  }
  goto return_base;
lookup_parent:
// 復(fù)制當(dāng)前quickstring結(jié)構(gòu)this信息到nd的last中
// 類型為LAST_NORM
  nd->last = this;
  nd->last_type = LAST_NORM;
  if (this.name[0] != '.')
   goto return_base;
  if (this.len == 1)
   nd->last_type = LAST_DOT;
  else if (this.len == 2 && this.name[1] == '.')
   nd->last_type = LAST_DOTDOT;
  else
   goto return_base;
return_reval:
// 返回
  /*
   * We bypassed the ordinary revalidation routines.
   * We may need to check the cached dentry for staleness.
   */
  if (nd->dentry && nd->dentry->d_sb &&
      (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
   err = -ESTALE;
   /* Note: we do not d_invalidate() */
   if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd))
    break;
  }
return_base:
  return 0;
out_dput:
  dput_path(&next, nd);
  break;
 }
// 到這里屬于出錯了
 path_release(nd);
return_err:
 return err;
}
 
/*
 *  It's more convoluted than I'd like it to be, but... it's still fairly
 *  small and for now I'd prefer to have fast path as straight as possible.
 *  It _is_ time-critical.
 */
static int do_lookup(struct nameidata *nd, struct qstr *name,
       struct path *path)
{
 struct vfsmount *mnt = nd->mnt;
// 從系統(tǒng)緩存的dentry的hash表中查找父dentry是nd->dentry, 名稱為name的dentry
 struct dentry *dentry = __d_lookup(nd->dentry, name);
// 沒找到dentry, 進(jìn)行真正從存儲硬盤中查找
 if (!dentry)
  goto need_lookup;
// 需要進(jìn)行revalidate操作時先進(jìn)行validate操作
 if (dentry->d_op && dentry->d_op->d_revalidate)
  goto need_revalidate;
done:
// 找到, 填充path參數(shù): 掛接點mnt和目錄項dentry
 path->mnt = mnt;
 path->dentry = dentry;
 __follow_mount(path);
 return 0;
need_lookup:
// 進(jìn)行真正的查找, 不過read_lookup會重新調(diào)用__d_lookup, 找不到才調(diào)用底層的fs實現(xiàn)去查找
// 好象是重復(fù)操作了
// real_lookup中的操作才反映了各個fs底層和相關(guān)標(biāo)志的區(qū)別處理
 dentry = real_lookup(nd->dentry, name, nd);
 if (IS_ERR(dentry))
  goto fail;
 goto done;
need_revalidate:
// 進(jìn)行validate操作
 dentry = do_revalidate(dentry, nd);
 if (!dentry)
  goto need_lookup;
 if (IS_ERR(dentry))
  goto fail;
 goto done;
fail:
 return PTR_ERR(dentry);
}

/*
 * This is called when everything else fails, and we actually have
 * to go to the low-level filesystem to find out what we should do..
 *
 * We get the directory semaphore, and after getting that we also
 * make sure that nobody added the entry to the dcache in the meantime..
 * SMP-safe
 */
static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
{
 struct dentry * result;
 struct inode *dir = parent->d_inode;
 mutex_lock(&dir->i_mutex);
 /*
  * First re-do the cached lookup just in case it was created
  * while we waited for the directory semaphore..
  *
  * FIXME! This could use version numbering or similar to
  * avoid unnecessary cache lookups.
  *
  * The "dcache_lock" is purely to protect the RCU list walker
  * from concurrent renames at this point (we mustn't get false
  * negatives from the RCU list walk here, unlike the optimistic
  * fast walk).
  *
  * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
  */
// 查找緩存中的dentry項
 result = d_lookup(parent, name);
 if (!result) {
// 沒找到, 新建dentry項
  struct dentry * dentry = d_alloc(parent, name);
  result = ERR_PTR(-ENOMEM);
  if (dentry) {
// 調(diào)用inode的查找操作, 這是和具體文件系統(tǒng)相關(guān)
   result = dir->i_op->lookup(dir, dentry, nd);
   if (result)
// 失敗, 釋放dentry
    dput(dentry);
   else
// 成功, 找到的dentry作為結(jié)果返回
    result = dentry;
  }
  mutex_unlock(&dir->i_mutex);
  return result;
 }
 /*
  * Uhhuh! Nasty case: the cache was re-populated while
  * we waited on the semaphore. Need to revalidate.
  */
// 在緩存中找到dentry項, 進(jìn)行validate操作
 mutex_unlock(&dir->i_mutex);
 if (result->d_op && result->d_op->d_revalidate) {
  result = do_revalidate(result, nd);
  if (!result)
   result = ERR_PTR(-ENOENT);
 }
 return result;
}

小結(jié)一下函數(shù)調(diào)用順序:
path_lookup_open    path_lookup_create
     |                     |
     V                     V
   __path_lookup_intent_open
               |
               V
        do_path_lookup
               |
               V
        link_path_walk
               |
               V
      __link_path_walk
               |
               V
           do_lookup
               |
               V
          real_lookup

這些函數(shù)操作都屬于虛擬文件系統(tǒng)操作, 對所有類型的文件系統(tǒng)都適用, 而從各個FS的具體實現(xiàn)才能看出差異和相關(guān)標(biāo)志的作用.

4.3 open_namei_create

static int open_namei_create(struct nameidata *nd, struct path *path,
    int flag, int mode)
{
 int error;
// nd當(dāng)前的dentry
 struct dentry *dir = nd->dentry;
 if (!IS_POSIXACL(dir->d_inode))
  mode &= ~current->fs->umask;
 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
 mutex_unlock(&dir->d_inode->i_mutex);
 dput(nd->dentry);
 nd->dentry = path->dentry;
 if (error)
  return error;
 /* Don't check for write permission, don't truncate */
 return may_open(nd, 0, flag & ~O_TRUNC);
}

4.4 path_to_nameidata

// 將路徑參數(shù)賦值到nameidata結(jié)構(gòu)中
static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
{
// 釋放原來的目錄項
 dput(nd->dentry);
// 如果掛接點也不同,釋放掉原來的
 if (nd->mnt != path->mnt)
  mntput(nd->mnt);
// 將新路徑參數(shù)賦值到nameidata結(jié)構(gòu)中
 nd->mnt = path->mnt;
 nd->dentry = path->dentry;
}

5. 結(jié)論

打開文件時, 目的是要生成一個struct file的結(jié)構(gòu)的指針, 該結(jié)構(gòu)中有相關(guān)文件名的名稱, dentry指針, 掛接點文件系統(tǒng)等信息, 而struct nameidata作為一個中間過程結(jié)構(gòu)保存相關(guān)的處理結(jié)果, 最終返回需要的文件信息,。

    本站是提供個人知識管理的網(wǎng)絡(luò)存儲空間,所有內(nèi)容均由用戶發(fā)布,,不代表本站觀點,。請注意甄別內(nèi)容中的聯(lián)系方式、誘導(dǎo)購買等信息,,謹(jǐn)防詐騙,。如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,請點擊一鍵舉報,。
    轉(zhuǎn)藏 分享 獻(xiàn)花(0

    0條評論

    發(fā)表

    請遵守用戶 評論公約

    類似文章 更多