open()系统调用(2010-08-0123:22)分类:LinuxFilesystemopen()系统调用的服务例程为sys_open()函数,该函数接收的参数为:要打开的文件的路径名filename、访问模式的一些标志flags,以及如果该文件被创建所需要的许可位掩码mode。如果该系统调用成功,就返回一个文件描述符,也就是指向文件对象的指针数组current->files->fd_array或者current->files->fdtable.fd中新分配给文件的索引;否则,返回-1。 open()系统调用的所有标志---------------------------------------------------------------------include/asm-generic/fcntl.h#defineO_ACCMODE00000003#defineO_RDONLY 00000000 /*为只读而打开*/#defineO_WRONLY 00000001 /*为只写而打开*/#defineO_RDWR 00000002 /*为读和写而打开*/#ifndefO_CREAT/*如果文件不存在则创建它*/#defineO_CREAT 00000100 /*notfcntl*/#endif#ifndefO_EXCL/*对于O_CREAT标志,如果文件已经存在,则失败*/#defineO_EXCL 00000200 /*notfcntl*/#endif#ifndefO_NOCTTY/*从不把文件看作终端*/#defineO_NOCTTY 00000400 /*notfcntl*/#endif#ifndefO_TRUNC/*截断文件(删除所有的现有
)*/#defineO_TRUNC 00001000 /*notfcntl*/#endif#ifndefO_APPEND/*总是在文件末尾写*/#defineO_APPEND 00002000#endif#ifndefO_NONBLOCK/*非阻塞打开*/#defineO_NONBLOCK 00004000#endif#ifndefO_DSYNC/*同步写(阻塞,直到物理写终止)*/#defineO_DSYNC 00010000 /*usedtobeO_SYNC,seebelow*/#endif#ifndefFASYNC/*通过信号发出I/O事件
*/#defineFASYNC 00020000 /*fcntl,forBSDcompatibility*/#endif#ifndefO_DIRECT#defineO_DIRECT 00040000 /*directdiskaccesshint*/#endif/*大型文件(文件长度大于off_t所能
示的范围但小于off64_t)*/#ifndefO_LARGEFILE#defineO_LARGEFILE 00100000#endif #ifndefO_DIRECTORY/*如果文件不是一个
,则失败*/#defineO_DIRECTORY 00200000 /*mustbeadirectory*/#endif #ifndefO_NOFOLLOW/*不解析路径名尾部的符号链接*/#defineO_NOFOLLOW 00400000 /*don'tfollowlinks*/#endif#ifndefO_NOATIME/*不更新索引节点的访问时间。*/#defineO_NOATIME01000000#endif#ifndefO_CLOEXEC#defineO_CLOEXEC02000000 /*setclose_on_exec*/#endif---------------------------------------------------------------------有一些标志的定义是因体系结构而异的。 sys_open()定义如下:---------------------------------------------------------------------fs/open.cSYSCALL_DEFINE3(open,constchar__user*,filename,int,flags,int,mode){ longret; if(force_o_largefile()) flags|=O_LARGEFILE; ret=do_sys_open(AT_FDCWD,filename,flags,mode); /*avoidREGPARMbreakageonx86:*/ asmlinkage_protect(3,ret,filename,flags,mode); returnret;}---------------------------------------------------------------------这个函数的操作如下:首先,调用force_o_largefile()来判断是否支持大文件,若是,则设置标志的O_LARGEFILE位。force_o_largefile()其实是一个宏。这个宏也是因体系结构而异的。其次,调用do_sys_open(AT_FDCWD,filename,flags,mode)来完成实际的打开文件的任务。下面有更详细说明。最后,调用asmlinkage_protect()以使系统调用正确返回。它也是一个宏,为了防止编译器错误而设。其他平台为空,只有x86平台有定义,为:---------------------------------------------------------------------arch/x86/include/asm/linkage.h/* *Makesurethecompilerdoesn'tdoanythingstupidwiththe *argumentsonthestack-theyareownedbythe*caller*,not *thecallee.Thisjustfoolsgccintonotspillingintothem, *andkeepsitfromdoingtailcallrecursionand/orusingthe *stackslotsfortemporaries,sincetheyareliveand"used" *allthewaytotheendofthefunction. * *NOTE!Onx86-64,alltheargumentsareinregisters,sothis *onlymattersona32-bitkernel. */#defineasmlinkage_protect(n,ret,args...)\ __asmlinkage_protect##n(ret,##args)#define__asmlinkage_protect_n(ret,args...)\ __asm____volatile__("":"=r"(ret):"0"(ret),##args)#define__asmlinkage_protect0(ret)\ __asmlinkage_protect_n(ret)#define__asmlinkage_protect1(ret,arg1)\ __asmlinkage_protect_n(ret,"g"(arg1))#define__asmlinkage_protect2(ret,arg1,arg2)\ __asmlinkage_protect_n(ret,"g"(arg1),"g"(arg2))#define__asmlinkage_protect3(ret,arg1,arg2,arg3)\ __asmlinkage_protect_n(ret,"g"(arg1),"g"(arg2),"g"(arg3))--------------------------------------------------------------------- do_sys_open()函数定义如下:---------------------------------------------------------------------fs/open.clongdo_sys_open(intdfd,constchar__user*filename,intflags,intmode){ char*tmp=getname(filename); intfd=PTR_ERR(tmp); if(!IS_ERR(tmp)){ fd=get_unused_fd_flags(flags); if(fd>=0){ structfile*f=do_filp_open(dfd,tmp,flags,mode,0); if(IS_ERR(f)){ put_unused_fd(fd); fd=PTR_ERR(f); }else{ fsnotify_open(f->f_path.dentry); fd_install(fd,f); } } putname(tmp); } returnfd;}---------------------------------------------------------------------第一个参数是多么的眼熟啊,查找路径名的do_path_lookup()函数的第一个参数也是它,用于说明查找路径名的基目录。关于它,前面路径名查找已有说明了。其余参数则是sys_open()将传进来的参数传给了它。这个函数执行如下操作:1、调用getname(filename)从进程地址空间读取文件的路径名,将路径名的地址存放在局部变量tmp。getname(filename)本质上,首先从名为"names_cache"路径名slab缓存中分配内存区,然后将路径名从用户空间复制到该内存区中。 2、调用get_unused_fd_flags(flags)从当前进程的文件描述符表中找一个空位。其定义为:---------------------------------------------------------------------include/linux/file.h#defineget_unused_fd_flags(flags)alloc_fd(0,(flags))---------------------------------------------------------------------这是一个宏,仅仅是对alloc_fd()函数特殊参数下调用的别名,对alloc_fd()函数定义如下:---------------------------------------------------------------------fs/file.c/* *allocateafiledescriptor,markitbusy. */intalloc_fd(unsignedstart,unsignedflags){ structfiles_struct*files=current->files; unsignedintfd; interror; structfdtable*fdt; spin_lock(&files->file_lock);repeat: fdt=files_fdtable(files); fd=start; if(fd<files->next_fd) fd=files->next_fd; if(fd<fdt->max_fds) fd=find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,fd); error=expand_files(files,fd); if(error<0) gotoout; /* *Ifweneededtoexpandthefsarraywe *mighthaveblocked-tryagain. */ if(error) gotorepeat; if(start<=files->next_fd) files->next_fd=fd+1; FD_SET(fd,fdt->open_fds); if(flags&O_CLOEXEC) FD_SET(fd,fdt->close_on_exec); else FD_CLR(fd,fdt->close_on_exec); error=fd;#if1 /*Sanitycheck*/ if(rcu_dereference_raw(fdt->fd[fd])!=NULL){ printk(KERN_WARNING"alloc_fd:slot%dnotNULL!\n",fd); rcu_assign_pointer(fdt->fd[fd],NULL); }#endif out: spin_unlock(&files->file_lock); returnerror;}---------------------------------------------------------------------这个函数执行如下操作:a.将current->files->next_fd字段赋值给局部变量。b.调用find_next_zero_bit(fdt->open_fds->fds_bits,fdt->max_fds,fd)来在文件描述符表中寻找下一个可以分配的文件描述符。这个函数也是因体系结构而异,系统中通用的函数定义为:---------------------------------------------------------------------lib/find_next_bit.cunsignedlongfind_next_zero_bit(constunsignedlong*addr,unsignedlongsize, unsignedlongoffset){ constunsignedlong*p=addr+BITOP_WORD(offset); unsignedlongresult=offset&~(BITS_PER_LONG-1); unsignedlongtmp; if(offset>=size) returnsize; size-=result; offset%=BITS_PER_LONG; if(offset){ tmp=*(p++); tmp|=~0UL>>(BITS_PER_LONG-offset); if(size<BITS_PER_LONG) gotofound_first; if(~tmp) gotofound_middle; size-=BITS_PER_LONG; result+=BITS_PER_LONG; } while(size&~(BITS_PER_LONG-1)){ if(~(tmp=*(p++))) gotofound_middle; result+=BITS_PER_LONG; size-=BITS_PER_LONG; } if(!size) returnresult; tmp=*p; found_first: tmp|=~0UL<<size; if(tmp==~0UL) /*Areanybitszero?*/ returnresult+size; /*Nope.*/found_middle: returnresult+ffz(tmp);}---------------------------------------------------------------------(1)、上面的BITOP_WORD(offset)清一色定义为:#defineBITOP_WORD(nr) ((nr)/BITS_PER_LONG)首先求得第一个要查找的long的位置。(2)、offset&~(BITS_PER_LONG-1)等价于offset除以32再乘以32,以此来求得所要查找的第一个字第0位在表中的位置,只不过会比除法运算效率高很多。(3)、如果传递的起始查找位置甚至大于最大可能值,则返回最大可能值。(4)、查找第一个0位所在的long型值的位置。而局部变量tmp中会保存该long型量的值。(5)、返回查找到的第一个0位的位置或可能的最大值。 c.调用expand_files(files,fd),来扩展文件描述符表,files_struct的fd_array数组成员可以在打开的文件较少时使用,但当打开的文件较多时,就会对文件描述符表进行扩展。 d.更新files->next_fd字段,将分配的文件描述符添加进fdt->open_fds,如果设置了O_CLOEXEC则将文件描述符添加进fdt->close_on_exec,若没有,则清除fdt->close_on_exec中的相应位。 e.返回文件描述符。 3、调用do_filp_open(dfd,tmp,flags,mode,0)函数,传递给它的参数依次为查找路径名的基目录、文件路径名、访问模式标志以及许可权位掩码、访问模式位。这个函数定义为:---------------------------------------------------------------------fs/namei.c1761/*1762 *Notethatthelowbitsofthepassedin"open_flag"1763 *arenotthesameasinthelocalvariable"flag".See1764 *open_to_namei_flags()formoredetails.1765 */1766structfile*do_filp_open(intdfd,constchar*pathname,1767 intopen_flag,intmode,intacc_mode)1768{1769 structfile*filp;1770 structnameidatand;1771 interror;1772 structpathpath;1773 intcount=0;1774 intflag=open_to_namei_flags(open_flag);1775 intforce_reval=0;17761777 if(!(open_flag&O_CREAT))1778 mode=0;17791780 /*1781 *O_SYNCisimplementedas__O_SYNC|O_DSYNC. Asmanyplacesonly1782 *checkforO_DSYNCiftheneedanysyncingatallweenforceit's1783 *alwayssetinsteadofhavingtodealwithpossiblyweirdbehaviour1784 *formaliciousapplicationssettingonly__O_SYNC.1785 */1786 if(open_flag&__O_SYNC)1787 open_flag|=O_DSYNC;17881789 if(!acc_mode)1790 acc_mode=MAY_OPEN|ACC_MODE(open_flag);17911792 /*O_TRUNCimpliesweneedaccesschecksforwritepermissions*/1793 if(open_flag&O_TRUNC)1794 acc_mode|=MAY_WRITE;17951796 /*AllowtheLSMpermissionhooktodistinguishappend1797 accessfromgeneralwriteaccess.*/1798 if(open_flag&O_APPEND)1799 acc_mode|=MAY_APPEND;18001801 /*findtheparent*/1802reval:1803 error=path_init(dfd,pathname,LOOKUP_PARENT,&nd);1804 if(error)1805 returnERR_PTR(error);1806 if(force_reval)1807 nd.flags|=LOOKUP_REVAL;18081809 current->total_link_count=0;1810 error=link_path_walk(pathname,&nd);1811 if(error){1812 filp=ERR_PTR(error);1813 gotoout;1814 }1815 if(unlikely(!audit_dummy_context())&&(open_flag&O_CREAT))1816 audit_inode(pathname,nd.path.dentry);18171818 /*1819 *Wehavetheparentandlastcomponent.1820 */18211822 error=-ENFILE;1823 filp=get_empty_filp();1824 if(filp==NULL)1825 gotoexit_parent;1826 nd.intent.open.file=filp;1827 filp->f_flags=open_flag;1828 nd.intent.open.flags=flag;1829 nd.intent.open.create_mode=mode;1830 nd.flags&=~LOOKUP_PARENT;1831 nd.flags|=LOOKUP_OPEN;1832 if(open_flag&O_CREAT){1833 nd.flags|=LOOKUP_CREATE;1834 if(open_flag&O_EXCL)1835 nd.flags|=LOOKUP_EXCL;1836 }1837 if(open_flag&O_DIRECTORY)1838 nd.flags|=LOOKUP_DIRECTORY;1839 if(!(open_flag&O_NOFOLLOW))1840 nd.flags|=LOOKUP_FOLLOW;1841 filp=do_last(&nd,&path,open_flag,acc_mode,mode,pathname);1842 while(unlikely(!filp)){/*trailingsymlink*/1843 structpathholder;1844 structinode*inode=path.dentry->d_inode;1845 void*cookie;1846 error=-ELOOP;1847 /*S_ISDIRpartisatemporaryautomountkludge*/1848 if(!(nd.flags&LOOKUP_FOLLOW)&&!S_ISDIR(inode->i_mode))1849 gotoexit_dput;1850 if(count++==32)1851 gotoexit_dput;1852 /*1853 *Thisissubtle.Insteadofcallingdo_follow_link()wedo1854 *thethingbyhands.Thereasonisthatthiswaywehavezero1855 *link_countandpath_walk()(calledfrom->follow_link)1856 *honoringLOOKUP_PARENT. Afterthatwehavetheparentand1857 *lastcomponent,i.e.weareinthesamesituationasafter1858 *thefirstpath_walk(). Well,almost-ifthelastcomponent1859 *isnormalwegetitscopystoredinnd->last.nameandwewill1860 *havetoputname()itwhenwearedone.Procfs-likesymlinks1861 &nb