_IO_File 是个比较神奇的东西,而且用起来比较方便,因此花点时间捋一下它的源码,方便后面的精进
这里使用glibc 2.23的源码进行调试,需要注意的是2.23版本之后的vtable添加了检查,这里调试的是没有检查的,加了debug symbol的glibc。源码与编译方式如下
1 | //gcc -Wl,-dynamic-linker /glibc/glibc-2.23/debug_x64/lib/ld-linux-x86-64.so.2 -o iofile -g iofile.c |
fopen
框架
malloc为结构体分配内存空间。
_IO_no_init 对FILE结构体进行初始化。
_IO_file_init将结构体链接进_IO_list_all链表。
_IO_file_fopen执行系统调用打开文件。
返回句柄。
细节
fopen->_IO_new_fopen
跟进去,发现是_IO_new_fopen函数,在libio/iofopen.c中。(这里展示的不是2.23的代码,所以与下面调试时的代码会有所偏差)1
2
3
4
5FILE *
_IO_new_fopen (const char *filename, const char *mode)
{
return __fopen_internal (filename, mode, 1);
}
继续跟进__fopen_internal
__fopen_internal
1 | FILE * |
可以看到先申请了一个locked_FILE 结构体,其内部有三个结构体,分别为fp = _IO_FILE_plus,lock = _IO_lock_t,*wd = _IO_wide_data
太大了就不贴出来了1
2
3
4
5
6
7
8
9pwndbg> print new_f
$3 = (struct locked_FILE *) 0x602010
pwndbg> x/20gx 0x602010-0x10
0x602000: 0x0000000000000000 0x0000000000000231
0x602010: 0x0000000000000000 0x0000000000000000
0x602020: 0x0000000000000000 0x0000000000000000
0x602030: 0x0000000000000000 0x0000000000000000
0x602040: 0x0000000000000000 0x0000000000000000
0x602050: 0x0000000000000000 0x0000000000000000
_IO_no_init
继续跟进,_IO_no_init 对file结构体进行初始化。这里2.23版本是这样的
1 | #if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T |
进入函数内部,该函数位于/libio/genops.c中,开始时进入_IO_old_init (fp, flags),对fp结构体部分初始化,绝大多数为空,返回后主要对fp->_wide_data结构体进行了初始化。
返回后设置vtable为_IO_file_jumps。
_IO_file_init
继续运行,调用_IO_file_init (&new_f->fp),这里位于/libio/fileops.c;
将前面创建的file结构体放入_IO_list_all中。可以看到主体是调用的_IO_link_in(fp)
1 | void |
_IO_link_in
跟进该函数,又回到了genops.c中,看到开始检查flag的标志位是否是_IO_LINKED,因为FILE结构体是通过_IO_list_all的单链表进行组织管理的,所以如果不在链表中便对其进行相应的处理,1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22void
_IO_link_in (struct _IO_FILE_plus *fp)
{
if ((fp->file._flags & _IO_LINKED) == 0)
{
fp->file._flags |= _IO_LINKED;
#ifdef _IO_MTSAFE_IO
_IO_cleanup_region_start_noarg (flush_cleanup);
_IO_lock_lock (list_all_lock);
run_fp = (FILE *) fp;
_IO_flockfile ((FILE *) fp);
#endif
fp->file._chain = (FILE *) _IO_list_all;
_IO_list_all = fp;
#ifdef _IO_MTSAFE_IO
_IO_funlockfile ((FILE *) fp);
run_fp = NULL;
_IO_lock_unlock (list_all_lock);
_IO_cleanup_region_end (0);
#endif
}
}
查看下_IO_list_all申请的结构体,即前面三个结构体中第一个结构体。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35pwndbg> print *_IO_list_all
$5 = {
file = {
_flags = -72538996,
_IO_read_ptr = 0x0,
_IO_read_end = 0x0,
_IO_read_base = 0x0,
_IO_write_base = 0x0,
_IO_write_ptr = 0x0,
_IO_write_end = 0x0,
_IO_buf_base = 0x0,
_IO_buf_end = 0x0,
_IO_save_base = 0x0,
_IO_backup_base = 0x0,
_IO_save_end = 0x0,
_markers = 0x0,
_chain = 0x7ffff7dd6540 <_IO_2_1_stderr_>,
_fileno = -1,
_flags2 = 0,
_old_offset = 0,
_cur_column = 0,
_vtable_offset = 0 '\000',
_shortbuf = "",
_lock = 0x6020f0,
_offset = -1,
_codecvt = 0x0,
_wide_data = 0x602100,
_freeres_list = 0x0,
_freeres_buf = 0x0,
__pad5 = 0,
_mode = 0,
_unused2 = '\000' <repeats 19 times>
},
vtable = 0x7ffff7dd46e0 <__GI__IO_file_jumps>
}
函数返回到__fopen_internal中,进入一个判断1
2if (_IO_file_fopen ((FILE *) new_f, filename, mode, is32) != NULL)
return __fopen_maybe_mmap (&new_f->fp.file);
_IO_new_file_fopen
跟进判断函数,跳转到了/libio/fileops.c中的_IO_new_file_fopen函数1
2
3FILE *
_IO_new_file_fopen (FILE *fp, const char *filename, const char *mode,
int is32not64)
前面先检查文件是否打开,之后设置打开模式,最后调用 _IO_file_open
1 | result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write, |
_IO_file_open
跟进_IO_file_open函数,可以看到,调用了系统级函数__open打开文件,之后设置fp->_fileno为文件描述符,最后再次调用_IO_link_in确保设置进入_IO_list_all。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29FILE *
_IO_file_open (FILE *fp, const char *filename, int posix_mode, int prot,
int read_write, int is32not64)
{
int fdesc;
if (__glibc_unlikely (fp->_flags2 & _IO_FLAGS2_NOTCANCEL))
fdesc = __open_nocancel (filename,
posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
else
fdesc = __open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
if (fdesc < 0)
return NULL;
fp->_fileno = fdesc;
_IO_mask_flags (fp, read_write,_IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
/* For append mode, send the file offset to the end of the file. Don't
update the offset cache though, since the file handle is not active. */
if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
== (_IO_IS_APPENDING | _IO_NO_READS))
{
off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end);
if (new_pos == _IO_pos_BAD && errno != ESPIPE)
{
__close_nocancel (fdesc);
return NULL;
}
}
_IO_link_in ((struct _IO_FILE_plus *) fp);
return fp;
}
之后返回之后调用了__fopen_maybe_mmap,之后将fp指针返回,整个流程就结束了。
fread
框架
如果_IO_buf_base为空,调用_IO_doallocbuf(fp)初始化指针,建立输入缓冲区
根据输入缓冲区与目标缓冲区情况分别进行处理,底层调用read的系统调用来进行读取并复制
细节
进入之前再查看下FILE结构体fp指针的内容1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32pwndbg> print *fp
$6 = {
_flags = -72539008,
_IO_read_ptr = 0x0,
_IO_read_end = 0x0,
_IO_read_base = 0x0,
_IO_write_base = 0x0,
_IO_write_ptr = 0x0,
_IO_write_end = 0x0,
_IO_buf_base = 0x0,
_IO_buf_end = 0x0,
_IO_save_base = 0x0,
_IO_backup_base = 0x0,
_IO_save_end = 0x0,
_markers = 0x0,
_chain = 0x7ffff7dd6540 <_IO_2_1_stderr_>,
_fileno = 3,
_flags2 = 0,
_old_offset = 0,
_cur_column = 0,
_vtable_offset = 0 '\000',
_shortbuf = "",
_lock = 0x6020f0,
_offset = -1,
_codecvt = 0x0,
_wide_data = 0x602100,
_freeres_list = 0x0,
_freeres_buf = 0x0,
__pad5 = 0,
_mode = 0,
_unused2 = '\000' <repeats 19 times>
}
fread->_IO_fread
跟进fread函数,看到是位于/libio/fread.c中的_IO_fread,看到先计算了请求字节数,之后对fp加锁,执行_IO_sgetn,之后释放锁。1
2
3
4
5
6
7
8
9
10
11
12
13size_t
_IO_fread (void *buf, size_t size, size_t count, FILE *fp)
{
size_t bytes_requested = size * count;
size_t bytes_read;
CHECK_FILE (fp, 0);
if (bytes_requested == 0)
return 0;
_IO_acquire_lock (fp);
bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);
_IO_release_lock (fp);
return bytes_requested == bytes_read ? count : bytes_read / size;
}
_IO_sgetn ==> _IO_XSGETN
可以看到主体应该是_IO_sgetn函数,跟进该函数,发现主体为_IO_XSGETN函数1
2
3
4
5
6
7size_t
_IO_sgetn (FILE *fp, void *data, size_t n)
{
/* FIXME handle putback buffer here! */
return _IO_XSGETN (fp, data, n);
}
libc_hidden_def (_IO_sgetn)
查看定义1
#define _IO_XSGETN(FP, DATA, N) JUMP2 (__xsgetn, FP, DATA, N)
继续跟进,进入/libio/fileops.c,该函数较长,做了不少事情。1
2size_t
_IO_file_xsgetn (FILE *fp, void *data, size_t n)
如果_IO_buf_base为空,调用_IO_doallocbuf(fp)初始化指针,建立输入缓冲区1
2
3
4
5
6
7
8
9
10if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}
第一步:_IO_doallocbuf
跟进_IO_doallocbuf,进入/libio/genops.c1
2
3
4
5
6
7
8
9
10
11void
_IO_doallocbuf (FILE *fp)
{
if (fp->_IO_buf_base)
return;
if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0)
if (_IO_DOALLOCATE (fp) != EOF)
return;
_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
}
libc_hidden_def (_IO_doallocbuf)
这里重新回顾下开始时的结构体状态,这些变量未被初始化1
2
3
4
5_IO_read_ptr = 0x0,
_IO_read_end = 0x0,
_IO_read_base = 0x0,
_IO_buf_base = 0x0,
_IO_buf_end = 0x0,
_IO_file_doallocate
因此开始检验是否被初始化,如果已经初始化就返回。检查标志位之后调用vtable中的_IO_file_doallocate,由注释可以看到这个函数主要用来分配输入缓冲区。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34/* Allocate a file buffer, or switch to unbuffered I/O. Streams for
TTY devices default to line buffered. */
int
_IO_file_doallocate (FILE *fp)
{
size_t size;
char *p;
struct stat64 st;
size = BUFSIZ;
if (fp->_fileno >= 0 && __builtin_expect (_IO_SYSSTAT (fp, &st), 0) >= 0)
{
if (S_ISCHR (st.st_mode))
{
/* Possibly a tty. */
if (
#ifdef DEV_TTY_P
DEV_TTY_P (&st) ||
#endif
local_isatty (fp->_fileno))
fp->_flags |= _IO_LINE_BUF;
}
#if defined _STATBUF_ST_BLKSIZE
if (st.st_blksize > 0 && st.st_blksize < BUFSIZ)
size = st.st_blksize;
#endif
}
p = malloc (size);
if (__glibc_unlikely (p == NULL))
return EOF;
_IO_setb (fp, p, p + size, 1);
return 1;
}
libc_hidden_def (_IO_file_doallocate)
首先调用_IO_SYSSTAT去获取文件信息,_IO_SYSSTAT函数是vtable中的 __stat函数,获取文件信息,这里是通过_fxstat64来获取,其内部是通过系统调用来实现,之后对size进行设置。1
2
3
4
5
6int
_IO_file_stat (_IO_FILE *fp, void *st)
{
return __fxstat64 (_STAT_VER, fp->_fileno, (struct stat64 *) st);
}
libc_hidden_def (_IO_file_stat)
再之后通过malloc得到分配的缓冲区,再调用_IO_setb
设置FILE缓冲区
跟进_IO_setb,设置了_IO_buf_base和_IO_buf_end,还有_flags1
2
3
4
5
6
7
8
9
10
11
12
13void
_IO_setb (FILE *f, char *b, char *eb, int a)
{
if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
free (f->_IO_buf_base);
f->_IO_buf_base = b;
f->_IO_buf_end = eb;
if (a)
f->_flags &= ~_IO_USER_BUF;
else
f->_flags |= _IO_USER_BUF;
}
libc_hidden_def (_IO_setb)
之后逐步返回到_IO_file_xsgetn
可以看到_IO_buf_base与_IO_buf_end都被设置了,大小为0x10001
2_IO_buf_base = 0x602240 "",
_IO_buf_end = 0x603240 "",
第二步:
1 | while (want > 0) |
第三步:
1 | else |
第四步:__underflow
因为是第一次读取数据,此时的fp->_IO_read_end以及fp->_IO_read_ptr都是0,因此会进入到__underflow1
2
3
4
5
6
7
8
9
10
11/* If we now want less than a buffer, underflow and repeat
the copy. Otherwise, _IO_SYSREAD directly to
the user buffer. */
if (fp->_IO_buf_base
&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base)) //输入缓冲区不能满足需求,调用__underflow读入数据
{
if (__underflow (fp) == EOF)
break;
continue;
}
进入__underflow函数,位于/libio/genops.c1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29int
__underflow (FILE *fp)
{
if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
return EOF;
if (fp->_mode == 0)
_IO_fwide (fp, -1);
if (_IO_in_put_mode (fp))
if (_IO_switch_to_get_mode (fp) == EOF)
return EOF;
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
}
if (_IO_have_markers (fp))
{
if (save_for_backup (fp, fp->_IO_read_end))
return EOF;
}
else if (_IO_have_backup (fp))
_IO_free_backup_area (fp);
return _IO_UNDERFLOW (fp);
}
libc_hidden_def (__underflow)
可以看到经过一些检查之后会调用_IO_UNDERFLOW,跟进,调用了/libio/fileops.c中的1
2int
_IO_new_file_underflow (FILE *fp)
检查FILE结构体的_flag标志位是否包含_IO_NO_READS,如果存在这个标志位则直接返回EOF,其中_IO_NO_READS标志位的定义是#define _IO_NO_READS 4 / Reading not allowed /。1
2
3
4
5
6if (fp->_flags & _IO_NO_READS)
{
fp->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
如果fp->_IO_buf_base为null,则调用_IO_doallocbuf分配输入缓冲区。和前面一样1
2
3
4
5
6
7
8
9
10
11
12
13if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}
接着初始化设置FILE结构体指针,将他们都设置成fp->_IO_buf_base1
2
3
4fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;
调用_IO_SYSREAD(vtable中的_IO_file_read函数),该函数最终执行系统调用read,读取文件数据,
数据读入到fp->_IO_buf_base中,读入大小为输入缓冲区的大小fp->_IO_buf_end - fp->_IO_buf_base。1
2count = _IO_SYSREAD (fp, fp->_IO_buf_base,
fp->_IO_buf_end - fp->_IO_buf_base);
设置输入缓冲区已有数据的size,即设置fp->_IO_read_end为fp->_IO_read_end += count。
设置完之后通过while循环进入第二部分将输入缓冲区拷贝至目标缓冲区,流程结束,返回。
- ray-cp大佬解释了下为什么最后在_IO_UNDERFLOW中又一次检查调用了_IO_doallocbuf分配输入缓冲区。因为虽然一般的输入底层都在调用__underflow函数,但是并非全部,scanf函数调用的是_u_flow函数,其内部并未提前分配缓冲区,之后也调用了_IO_UNDERFLOW函数,因此需要在这里进行缓冲区的分配。
fwrite
框架
主体是_IO_new_file_xsputn函数,该函数调用_IO_new_file_overflow建立刷新缓冲区,
其中_IO_file_doallocate负责缓冲区的申请建立
最后调用_IO_default_xsputn将剩余输出至输出缓冲区
细节
fwrite->_IO_fwrite
跟进,进入_IO_fwrite函数
1 | size_t |
_IO_sputn->_IO_new_file_xsputn
可以看到前面的流程和fread相似,之后调用了主体函数_IO_sputn,跟进,进入了/libio/fileops.c中的_IO_new_file_xsputn1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23size_t
_IO_new_file_xsputn (FILE *f, const void *data, size_t n)
...
/* First figure out how much space is available in the buffer. */
if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING))
{
count = f->_IO_buf_end - f->_IO_write_ptr;
if (count >= n)
{
const char *p;
for (p = s + n; p > s; )
{
if (*--p == '\n')
{
count = p - s + 1;
must_flush = 1;
break;
}
}
}
}
else if (f->_IO_write_end > f->_IO_write_ptr)
count = f->_IO_write_end - f->_IO_write_ptr; /* Space available. */
这一段判断了缓冲区剩余的空间,存在了count中1
2
3
4
5
6
7
8
9/* Then fill the buffer. */
if (count > 0)
{
if (count > to_do)
count = to_do;
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
to_do -= count;
}
如果缓冲区还有剩余,则将数据拷贝至输出缓冲区。并计算是否还有目标输出剩余1
2
3
4
5
6
7
8 if (to_do + must_flush > 0)
{
size_t block_size, do_write;
/* Next flush the (full) buffer. */
if (_IO_OVERFLOW (f, EOF) == EOF)
/* If nothing else has to be written we must not signal the
caller that everything has been written. */
return to_do == 0 ? EOF : n - to_do;
_IO_OVERFLOW->_IO_new_file_overflow
如果还有剩余,则说明缓冲区未建立或已经满了,需要使用_IO_OVERFLOW刷新缓冲区。跟进该函数,是位于fileops.c中的_IO_new_file_overflow1
2int
_IO_new_file_overflow (_IO_FILE *f, int ch)
继续跟进,先检查是否有_IO_NO_WRITE标志位1
2
3
4
5
6if (f->_flags & _IO_NO_WRITES) /* SET ERROR */
{
f->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
检查_IO_write_base是否为空或_IO_CURRENTLY_PUTTING标志位是否为0,若为空则分配输出缓冲区。(所以一般只要调用过stdout输出过该标志位就是1)
这里调用的是_IO_doallocbuf来分配,与fread中相同。1
2
3
4
5
6
7
8 if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL)
{
/* Allocate a buffer if needed. */
if (f->_IO_write_base == NULL)
{
_IO_doallocbuf (f);
_IO_setg (f, f->_IO_buf_base, f->_IO_buf_base, f->_IO_buf_base);
}
跟进_IO_setg,是一句宏,将与read相关的三个指针赋值为_IO_buf_base1
2#define _IO_setg(fp, eb, g, eg) ((fp)->_IO_read_base = (eb),\
(fp)->_IO_read_ptr = (g), (fp)->_IO_read_end = (eg))
可以看下修改结果1
2
3
4
5
6
7
8_IO_read_ptr = 0x603480 "",
_IO_read_end = 0x603480 "",
_IO_read_base = 0x603480 "",
_IO_write_base = 0x0,
_IO_write_ptr = 0x0,
_IO_write_end = 0x0,
_IO_buf_base = 0x603480 "",
_IO_buf_end = 0x604480 "",
再之后就是相关指针的设置,对读写相关的指针与flags等进行了赋值。1
2
3
4
5
6
7
8
9_flags = -72536956, --> 0xfbad2c84
_IO_read_ptr = 0x603480 "",
_IO_read_end = 0x603480 "",
_IO_read_base = 0x603480 "",
_IO_write_base = 0x603480 "",
_IO_write_ptr = 0x603480 "",
_IO_write_end = 0x604480 "",
_IO_buf_base = 0x603480 "",
_IO_buf_end = 0x604480 "",
赋值之后就要开始调用_IO_do_write函数。1
2
3
4
5if ((f->_flags & _IO_UNBUFFERED)
|| ((f->_flags & _IO_LINE_BUF) && ch == '\n'))
if (_IO_do_write (f, f->_IO_write_base,
f->_IO_write_ptr - f->_IO_write_base) == EOF)
return EOF;
_IO_do_write->_IO_new_do_write
跟进调用了fileops.c中的_IO_new_do_write1
2
3
4
5
6
7int
_IO_new_do_write (FILE *fp, const char *data, size_t to_do)
{
return (to_do == 0
|| (size_t) new_do_write (fp, data, to_do) == to_do) ? 0 : EOF;
}
libc_hidden_ver (_IO_new_do_write, _IO_do_write)
new_do_write
看到主要调用了new_do_write函数,位于该函数下方。1
2static size_t
new_do_write (FILE *fp, const char *data, size_t to_do)
看到刚开始进行了标志位的判断,然后看read_end与write_base是否存在偏移,有则调用_IO_SYSSEEK校正指针位置。1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 if (fp->_flags & _IO_IS_APPENDING)
/* On a system without a proper O_APPEND implementation,
you would need to sys_seek(0, SEEK_END) here, but is
not needed nor desirable for Unix- or Posix-like systems.
Instead, just indicate that offset (before and after) is
unpredictable. */
fp->_offset = _IO_pos_BAD;
else if (fp->_IO_read_end != fp->_IO_write_base)
{
off64_t new_pos
= _IO_SYSSEEK (fp, fp->_IO_write_base - fp->_IO_read_end, 1);
if (new_pos == _IO_pos_BAD)
return 0;
fp->_offset = new_pos;
}
if (fp->_cur_column && count)
fp->_cur_column = _IO_adjust_column (fp->_cur_column - 1, data, count) + 1;
之后调用_IO_SYSWRITE输出输出缓冲区的内容。1
count = _IO_SYSWRITE (fp, data, to_do);
_IO_SYSWRITE->_IO_new_file_write
跟进_IO_SYSWRITE查看输出缓冲区的内容是怎样被输出的1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23ssize_t
_IO_new_file_write (FILE *f, const void *data, ssize_t n)
{
ssize_t to_do = n;
while (to_do > 0)
{
ssize_t count = (__builtin_expect (f->_flags2
& _IO_FLAGS2_NOTCANCEL, 0)
? __write_nocancel (f->_fileno, data, to_do)
: __write (f->_fileno, data, to_do));
if (count < 0)
{
f->_flags |= _IO_ERR_SEEN;
break;
}
to_do -= count;
data = (void *) ((char *) data + count);
}
n -= to_do;
if (f->_offset >= 0)
f->_offset += n;
return n;
}
可以看到执行了系统调用__write来将其输出。
返回之后调用_IO_setg刷新缓冲区指针并返回。1
2
3
4
5
6
7 _IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_buf_base;
fp->_IO_write_end = (fp->_mode <= 0
&& (fp->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
? fp->_IO_buf_base : fp->_IO_buf_end);
return count;
}
返回到_IO_new_file_xsputn中继续往后走,计算了buf是否为比较大的block(0x1000),如果是则直接调用new_do_write进行输出1
2
3
4
5
6
7
8
9
10
11 /* Try to maintain alignment: write a whole number of blocks. */
block_size = f->_IO_buf_end - f->_IO_buf_base;
do_write = to_do - (block_size >= 128 ? to_do % block_size : 0);
if (do_write)
{
count = new_do_write (f, s, do_write);
to_do -= count;
if (count < do_write)
return n - to_do;
}
最后处理缓冲区剩余数据,将剩余数据输出至输出缓冲区1
2
3
4
5
6 /* Now write out the remainder. Normally, this will fit in the
buffer, but it's somewhat messier for line-buffered files,
so we let _IO_default_xsputn handle the general case. */
if (to_do)
to_do -= _IO_default_xsputn (f, s+do_write, to_do);
}
关于这里只将数据输出到输出缓冲区而没有将其写至文件,其实是主函数会调用 _IO_flush_all_lockp()函数来清空缓冲区,该函数会在下面三种情况下被调用:
1:当 libc 执行 abort 流程时。
2:当执行 exit 函数时。当执行流从 main 函数返回时
3:当执行流从 main 函数返回时
_IO_default_xsputn
这里调用的是_IO_default_xsputn,跟进该函数,跳转到genops.c中的_IO_default_xsputn1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27for (;;)
{
/* Space available. */
if (f->_IO_write_ptr < f->_IO_write_end)
{
size_t count = f->_IO_write_end - f->_IO_write_ptr;
if (count > more)
count = more;
if (count > 20)
{
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
}
else if (count)
{
char *p = f->_IO_write_ptr;
ssize_t i;
for (i = count; --i >= 0; )
*p++ = *s++;
f->_IO_write_ptr = p;
}
more -= count;
}
if (more == 0 || _IO_OVERFLOW (f, (unsigned char) *s++) == EOF)
break;
more--;
}
这里对剩余大小不同进行了分别处理,如果大于20则调用__memcpy,否则使用for循环直接赋值。最后如果输出缓冲区为0,则直接调用_IO_OVERFLOW输出,之后返回,这个流程基本就结束了。最后看下此时的fp,bcede…是data2中的数据。1
2
3
4
5
6
7
8_IO_read_ptr = 0x603480 "bcdefghij1234567890",
_IO_read_end = 0x603480 "bcdefghij1234567890",
_IO_read_base = 0x603480 "bcdefghij1234567890",
_IO_write_base = 0x603480 "bcdefghij1234567890",
_IO_write_ptr = 0x603494 "",
_IO_write_end = 0x604480 "",
_IO_buf_base = 0x603480 "bcdefghij1234567890",
_IO_buf_end = 0x604480 "",
fclose
框架
主要是对链接入_IO_list_all中的FILE结构体,还有fread,fwrite建立的输入输出缓冲区进行释放处理。
细节
fclose->_IO_new_fclose
跟进fclose,进入了iofclose.c中的_IO_new_fclose函数,开始先对fp进行了检查1
CHECK_FILE(fp, EOF);
之后第一步:unlink,将fp从_IO_list_all中脱下。
这里需要注意下,在后面的利用中,为了能够直接调用_io_finish,会这里进行修改来绕过unlink与io_close
1 | /* First unlink the stream. */ |
_IO_un_link
跟进该函数,是位于genops.c中的_IO_un_link1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33void
_IO_un_link (struct _IO_FILE_plus *fp)
{
if (fp->file._flags & _IO_LINKED)
{
FILE **f;
#ifdef _IO_MTSAFE_IO
_IO_cleanup_region_start_noarg (flush_cleanup);
_IO_lock_lock (list_all_lock);
run_fp = (FILE *) fp;
_IO_flockfile ((FILE *) fp);
#endif
if (_IO_list_all == NULL)
;
else if (fp == _IO_list_all)
_IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
else
for (f = &_IO_list_all->file._chain; *f; f = &(*f)->_chain)
if (*f == (FILE *) fp)
{
*f = fp->file._chain;
break;
}
fp->file._flags &= ~_IO_LINKED;
#ifdef _IO_MTSAFE_IO
_IO_funlockfile ((FILE *) fp);
run_fp = NULL;
_IO_lock_unlock (list_all_lock);
_IO_cleanup_region_end (0);
#endif
}
}
libc_hidden_def (_IO_un_link)
可以看到,开始时先检查标志位_flags,之后判断_IO_list_all是否为空,不为空看是否在表头,若不再表头则遍历_IO_list_all单链表进行寻找。最后对其标志位_flags进行修改,该标志位表明了是否位于_IO_list_all链表中。
返回之后,调用_IO_file_close_it函数关闭释放缓冲区
1 | if (fp->_flags & _IO_IS_FILEBUF) |
_IO_file_close_it
跟进该函数1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39int
_IO_new_file_close_it (FILE *fp)
{
int write_status;
if (!_IO_file_is_open (fp))
return EOF;
if ((fp->_flags & _IO_NO_WRITES) == 0
&& (fp->_flags & _IO_CURRENTLY_PUTTING) != 0)
write_status = _IO_do_flush (fp);
else
write_status = 0;
_IO_unsave_markers (fp);
int close_status = ((fp->_flags2 & _IO_FLAGS2_NOCLOSE) == 0
? _IO_SYSCLOSE (fp) : 0);
/* Free buffer. */
if (fp->_mode > 0)
{
if (_IO_have_wbackup (fp))
_IO_free_wbackup_area (fp);
_IO_wsetb (fp, NULL, NULL, 0);
_IO_wsetg (fp, NULL, NULL, NULL);
_IO_wsetp (fp, NULL, NULL);
}
_IO_setb (fp, NULL, NULL, 0);
_IO_setg (fp, NULL, NULL, NULL);
_IO_setp (fp, NULL, NULL);
_IO_un_link ((struct _IO_FILE_plus *) fp);
fp->_flags = _IO_MAGIC|CLOSED_FILEBUF_FLAGS;
fp->_fileno = -1;
fp->_offset = _IO_pos_BAD;
return close_status ? close_status : write_status;
}
libc_hidden_ver (_IO_new_file_close_it, _IO_file_close_it)
首先通过标志位_fileno 判断了文件是否打开,然后对其标志位进行了判断,目的是判断是否为输出缓冲区,是则调用_IO_do_flush刷新缓冲区。查看_IO_do_flush,发现是宏定义1
2
3
4
5
6
7#define _IO_do_flush(_f) \
((_f)->_mode <= 0 \
? _IO_do_write(_f, (_f)->_IO_write_base, \
(_f)->_IO_write_ptr-(_f)->_IO_write_base) \
: _IO_wdo_write(_f, (_f)->_wide_data->_IO_write_base, \
((_f)->_wide_data->_IO_write_ptr \
- (_f)->_wide_data->_IO_write_base)))
可以看到主要调用_IO_do_write将输出缓冲区输出。此时原先在输出缓冲区中的值才会被写入文件,所以一般程序运行中没有close的话打开文件可能会发现还没有写进去就是这个原因。再之后对_markers标志位进行了处理。然后可以看到后面调用了_IO_SYSCLOSE函数,看到是fileops.c中的_IO_file_close
_IO_SYSCLOSE->_IO_file_close
1 | int |
#define close_not_cancel(fd) \
__close (fd)1
2
3
4
实际就是通过系统调用__close关闭
返回之后,调用_IO_setb/_IO_setg/_IO_setp,其中_IO_setb是设置结构体的buf指针, _IO_setg是设置read相关的指针,_IO_setp是设置write相关的指针
_IO_setb (fp, NULL, NULL, 0);
_IO_setg (fp, NULL, NULL, NULL);
_IO_setp (fp, NULL, NULL);1
2
3
##### _IO_setb
进入_IO_setb,可以看到释放了缓冲区
void
_IO_setb (_IO_FILE f, char b, char *eb, int a)
{
if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
free (f->_IO_buf_base);
f->_IO_buf_base = b;
f->_IO_buf_end = eb;
if (a)
f->_flags &= ~_IO_USER_BUF;
else
f->_flags |= _IO_USER_BUF;
}
libc_hidden_def (_IO_setb)1
2
返回之后,再次调用了_IO_un_link
_IO_un_link ((struct _IO_FILE_plus *) fp);
fp->_flags = _IO_MAGIC|CLOSED_FILEBUF_FLAGS;
fp->_fileno = -1;
fp->_offset = _IO_pos_BAD;
return close_status ? close_status : write_status;1
2
3
4
5返回到_IO_new_close函数,最后调用_IO_FINISH,进入到fileopsc.c中的_IO_new_file_finish
#### _IO_FINISH->_IO_new_file_finish
看到先检查文件是否仍打开,如果打开就刷新缓冲区并关闭,不过之前已经关闭过了,这里会直接进入genops.c中的_IO_default_finish
void
_IO_new_file_finish (FILE *fp, int dummy)
{
if (_IO_file_is_open (fp))
{
_IO_do_flush (fp);
if (!(fp->_flags & _IO_DELETE_DONT_CLOSE))
_IO_SYSCLOSE (fp);
}
_IO_default_finish (fp, 0);
}
libc_hidden_ver (_IO_new_file_finish, _IO_file_finish)1
2
3
##### _IO_default_finish
可以看到还是对缓冲区的释放,指针的释放等等
void
_IO_default_finish (FILE fp, int dummy)
{
struct _IO_marker mark;
if (fp->_IO_buf_base && !(fp->_flags & _IO_USER_BUF))
{
free (fp->_IO_buf_base);
fp->_IO_buf_base = fp->_IO_buf_end = NULL;
}
for (mark = fp->_markers; mark != NULL; mark = mark->_next)
mark->_sbuf = NULL;
if (fp->_IO_save_base)
{
free (fp->_IO_save_base);
fp->_IO_save_base = NULL;
}
_IO_un_link ((struct _IO_FILE_plus *) fp);
#ifdef _IO_MTSAFE_IO
if (fp->_lock != NULL)
_IO_lock_fini (*fp->_lock);
#endif
}
libc_hidden_def (_IO_default_finish)1
2
3
4
5
6
7
8最后返回后对fp指针进行了free,就结束了。
# 2.24 check
libc 2.24 之后专门添加了对虚表的检查,分别是/libio/libioP.h中的IO_validate_vtable 与 /libio/vtables.c中的_IO_vtable_check。
所有的 libio vtables 被放进了专用的只读的 __libc_IO_vtables 段,以使它们在内存中连续。在任何间接跳转之前,vtable 指针将根据段边界进行检查,如果指针不在这个段,则调用函数 _IO_vtable_check() 做进一步的检查,并且在必要时终止进程。
/ Perform vtable pointer validation. If validation fails, terminate
the process. /
static inline const struct _IO_jump_t
IO_validate_vtable (const struct _IO_jump_t vtable)
{
/ Fast path: The vtable pointer is within the __libc_IO_vtables
section. /
uintptr_t section_length = stop_libc_IO_vtables - start_libc_IO_vtables;
uintptr_t ptr = (uintptr_t) vtable;
uintptr_t offset = ptr - (uintptr_t) start_libc_IO_vtables;
/* 对vtable指针范围进行检查,不满足则调用_IO_vtable_check进行检查
if (__glibc_unlikely (offset >= section_length))
/ The vtable pointer is not in the expected section. Use the
slow path, which will terminate the process if necessary. /
_IO_vtable_check ();
return vtable;
}
void attribute_hidden
_IO_vtable_check (void)
{
#ifdef SHARED
/ Honor the compatibility flag. /
void (*flag) (void) = atomic_load_relaxed (&IO_accept_foreign_vtables);
#ifdef PTR_DEMANGLE
PTR_DEMANGLE (flag);
#endif
if (flag == &_IO_vtable_check)
return;
/ In case this libc copy is in a non-default namespace, we always
need to accept foreign vtables because there is always a
possibility that FILE objects are passed across the linking
boundary. /
{
Dl_info di;
struct link_map l;
if (!rtld_active ()
|| (_dl_addr (_IO_vtable_check, &di, &l, NULL) != 0
&& l->l_ns != LM_ID_BASE))
return;
}
#else / !SHARED /
/ We cannot perform vtable validation in the static dlopen case
because FILE handles might be passed back and forth across the
boundary. Therefore, we disable checking in this case. */
if (__dlopen != NULL)
return;
#endif
__libc_fatal (“Fatal error: glibc detected an invalid stdio handle\n”);
}
`