_IO_File 是个比较神奇的东西,而且用起来比较方便,因此花点时间捋一下它的源码,方便后面的精进

这里使用glibc 2.23的源码进行调试,需要注意的是2.23版本之后的vtable添加了检查,这里调试的是没有检查的,加了debug symbol的glibc。源码与编译方式如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
//gcc -Wl,-dynamic-linker /glibc/glibc-2.23/debug_x64/lib/ld-linux-x86-64.so.2 -o iofile -g iofile.c
#include<stdio.h>

int main(){
FILE*fp1=fopen("test1","wb+");

char data1[20];
fread(data1,1,20,fp1);

FILE*fp2=fopen("test2","wb");

char data2[20]={"abcdefghij1234567890"};
fwrite(data2,1,20,fp2);

fclose(fp1);
fclose(fp2);
return 0;
}

fopen

框架

malloc为结构体分配内存空间。

_IO_no_init 对FILE结构体进行初始化。

_IO_file_init将结构体链接进_IO_list_all链表。

_IO_file_fopen执行系统调用打开文件。

返回句柄。

细节

fopen->_IO_new_fopen

跟进去,发现是_IO_new_fopen函数,在libio/iofopen.c中。(这里展示的不是2.23的代码,所以与下面调试时的代码会有所偏差)

1
2
3
4
5
FILE *
_IO_new_fopen (const char *filename, const char *mode)
{
return __fopen_internal (filename, mode, 1);
}

继续跟进__fopen_internal

__fopen_internal

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
FILE *
__fopen_internal (const char *filename, const char *mode, int is32)
{
struct locked_FILE
{
struct _IO_FILE_plus fp;
#ifdef _IO_MTSAFE_IO
_IO_lock_t lock;
#endif
struct _IO_wide_data wd;
} *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));

if (new_f == NULL)
return NULL;
#ifdef _IO_MTSAFE_IO
new_f->fp.file._lock = &new_f->lock;
#endif
_IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps); //对file结构体new_f进行初始化。
_IO_JUMPS (&new_f->fp) = &_IO_file_jumps; // 设置vtable为_IO_file_jumps
_IO_new_file_init_internal (&new_f->fp); // 将file结构体链接进去_IO_list_all
if (_IO_file_fopen ((FILE *) new_f, filename, mode, is32) != NULL)
return __fopen_maybe_mmap (&new_f->fp.file);

_IO_un_link (&new_f->fp);
free (new_f);
return NULL;
}

可以看到先申请了一个locked_FILE 结构体,其内部有三个结构体,分别为fp = _IO_FILE_plus,lock = _IO_lock_t,*wd = _IO_wide_data
太大了就不贴出来了

1
2
3
4
5
6
7
8
9
pwndbg> print new_f
$3 = (struct locked_FILE *) 0x602010
pwndbg> x/20gx 0x602010-0x10
0x602000: 0x0000000000000000 0x0000000000000231
0x602010: 0x0000000000000000 0x0000000000000000
0x602020: 0x0000000000000000 0x0000000000000000
0x602030: 0x0000000000000000 0x0000000000000000
0x602040: 0x0000000000000000 0x0000000000000000
0x602050: 0x0000000000000000 0x0000000000000000

_IO_no_init

继续跟进,_IO_no_init 对file结构体进行初始化。这里2.23版本是这样的

1
2
3
4
5
#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
_IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps);
#else
_IO_no_init (&new_f->fp.file, 1, 0, NULL, NULL);
#endif

进入函数内部,该函数位于/libio/genops.c中,开始时进入_IO_old_init (fp, flags),对fp结构体部分初始化,绝大多数为空,返回后主要对fp->_wide_data结构体进行了初始化。

返回后设置vtable为_IO_file_jumps。

_IO_file_init

继续运行,调用_IO_file_init (&new_f->fp),这里位于/libio/fileops.c;

将前面创建的file结构体放入_IO_list_all中。可以看到主体是调用的_IO_link_in(fp)

1
2
3
4
5
6
7
8
9
10
11
12
void
_IO_file_init (struct _IO_FILE_plus *fp)
{
/* POSIX.1 allows another file handle to be used to change the position
of our file descriptor. Hence we actually don't know the actual
position before we do the first fseek (and until a following fflush). */
fp->file._offset = _IO_pos_BAD;
fp->file._flags |= CLOSED_FILEBUF_FLAGS;

_IO_link_in (fp);
fp->file._fileno = -1;
}

跟进该函数,又回到了genops.c中,看到开始检查flag的标志位是否是_IO_LINKED,因为FILE结构体是通过_IO_list_all的单链表进行组织管理的,所以如果不在链表中便对其进行相应的处理,

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
void
_IO_link_in (struct _IO_FILE_plus *fp)
{
if ((fp->file._flags & _IO_LINKED) == 0)
{
fp->file._flags |= _IO_LINKED;
#ifdef _IO_MTSAFE_IO
_IO_cleanup_region_start_noarg (flush_cleanup);
_IO_lock_lock (list_all_lock);
run_fp = (FILE *) fp;
_IO_flockfile ((FILE *) fp);
#endif
fp->file._chain = (FILE *) _IO_list_all;
_IO_list_all = fp;
#ifdef _IO_MTSAFE_IO
_IO_funlockfile ((FILE *) fp);
run_fp = NULL;
_IO_lock_unlock (list_all_lock);
_IO_cleanup_region_end (0);
#endif
}
}

查看下_IO_list_all申请的结构体,即前面三个结构体中第一个结构体。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
pwndbg> print *_IO_list_all
$5 = {
file = {
_flags = -72538996,
_IO_read_ptr = 0x0,
_IO_read_end = 0x0,
_IO_read_base = 0x0,
_IO_write_base = 0x0,
_IO_write_ptr = 0x0,
_IO_write_end = 0x0,
_IO_buf_base = 0x0,
_IO_buf_end = 0x0,
_IO_save_base = 0x0,
_IO_backup_base = 0x0,
_IO_save_end = 0x0,
_markers = 0x0,
_chain = 0x7ffff7dd6540 <_IO_2_1_stderr_>,
_fileno = -1,
_flags2 = 0,
_old_offset = 0,
_cur_column = 0,
_vtable_offset = 0 '\000',
_shortbuf = "",
_lock = 0x6020f0,
_offset = -1,
_codecvt = 0x0,
_wide_data = 0x602100,
_freeres_list = 0x0,
_freeres_buf = 0x0,
__pad5 = 0,
_mode = 0,
_unused2 = '\000' <repeats 19 times>
},
vtable = 0x7ffff7dd46e0 <__GI__IO_file_jumps>
}

函数返回到__fopen_internal中,进入一个判断

1
2
if (_IO_file_fopen ((FILE *) new_f, filename, mode, is32) != NULL)
return __fopen_maybe_mmap (&new_f->fp.file);

_IO_new_file_fopen

跟进判断函数,跳转到了/libio/fileops.c中的_IO_new_file_fopen函数

1
2
3
FILE *
_IO_new_file_fopen (FILE *fp, const char *filename, const char *mode,
int is32not64)

前面先检查文件是否打开,之后设置打开模式,最后调用 _IO_file_open

1
2
result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write,
is32not64);
_IO_file_open

跟进_IO_file_open函数,可以看到,调用了系统级函数__open打开文件,之后设置fp->_fileno为文件描述符,最后再次调用_IO_link_in确保设置进入_IO_list_all。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
FILE *
_IO_file_open (FILE *fp, const char *filename, int posix_mode, int prot,
int read_write, int is32not64)
{
int fdesc;
if (__glibc_unlikely (fp->_flags2 & _IO_FLAGS2_NOTCANCEL))
fdesc = __open_nocancel (filename,
posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
else
fdesc = __open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
if (fdesc < 0)
return NULL;
fp->_fileno = fdesc;
_IO_mask_flags (fp, read_write,_IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
/* For append mode, send the file offset to the end of the file. Don't
update the offset cache though, since the file handle is not active. */
if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
== (_IO_IS_APPENDING | _IO_NO_READS))
{
off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end);
if (new_pos == _IO_pos_BAD && errno != ESPIPE)
{
__close_nocancel (fdesc);
return NULL;
}
}
_IO_link_in ((struct _IO_FILE_plus *) fp);
return fp;
}

之后返回之后调用了__fopen_maybe_mmap,之后将fp指针返回,整个流程就结束了。

fread

框架

如果_IO_buf_base为空,调用_IO_doallocbuf(fp)初始化指针,建立输入缓冲区

根据输入缓冲区与目标缓冲区情况分别进行处理,底层调用read的系统调用来进行读取并复制

细节

进入之前再查看下FILE结构体fp指针的内容

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
pwndbg> print *fp
$6 = {
_flags = -72539008,
_IO_read_ptr = 0x0,
_IO_read_end = 0x0,
_IO_read_base = 0x0,
_IO_write_base = 0x0,
_IO_write_ptr = 0x0,
_IO_write_end = 0x0,
_IO_buf_base = 0x0,
_IO_buf_end = 0x0,
_IO_save_base = 0x0,
_IO_backup_base = 0x0,
_IO_save_end = 0x0,
_markers = 0x0,
_chain = 0x7ffff7dd6540 <_IO_2_1_stderr_>,
_fileno = 3,
_flags2 = 0,
_old_offset = 0,
_cur_column = 0,
_vtable_offset = 0 '\000',
_shortbuf = "",
_lock = 0x6020f0,
_offset = -1,
_codecvt = 0x0,
_wide_data = 0x602100,
_freeres_list = 0x0,
_freeres_buf = 0x0,
__pad5 = 0,
_mode = 0,
_unused2 = '\000' <repeats 19 times>
}

fread->_IO_fread

跟进fread函数,看到是位于/libio/fread.c中的_IO_fread,看到先计算了请求字节数,之后对fp加锁,执行_IO_sgetn,之后释放锁。

1
2
3
4
5
6
7
8
9
10
11
12
13
size_t
_IO_fread (void *buf, size_t size, size_t count, FILE *fp)
{
size_t bytes_requested = size * count;
size_t bytes_read;
CHECK_FILE (fp, 0);
if (bytes_requested == 0)
return 0;
_IO_acquire_lock (fp);
bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);
_IO_release_lock (fp);
return bytes_requested == bytes_read ? count : bytes_read / size;
}

_IO_sgetn ==> _IO_XSGETN

可以看到主体应该是_IO_sgetn函数,跟进该函数,发现主体为_IO_XSGETN函数

1
2
3
4
5
6
7
size_t
_IO_sgetn (FILE *fp, void *data, size_t n)
{
/* FIXME handle putback buffer here! */
return _IO_XSGETN (fp, data, n);
}
libc_hidden_def (_IO_sgetn)

查看定义

1
#define _IO_XSGETN(FP, DATA, N) JUMP2 (__xsgetn, FP, DATA, N)

继续跟进,进入/libio/fileops.c,该函数较长,做了不少事情。

1
2
size_t
_IO_file_xsgetn (FILE *fp, void *data, size_t n)

如果_IO_buf_base为空,调用_IO_doallocbuf(fp)初始化指针,建立输入缓冲区

1
2
3
4
5
6
7
8
9
10
if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}

第一步:_IO_doallocbuf

跟进_IO_doallocbuf,进入/libio/genops.c

1
2
3
4
5
6
7
8
9
10
11
void
_IO_doallocbuf (FILE *fp)
{
if (fp->_IO_buf_base)
return;
if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0)
if (_IO_DOALLOCATE (fp) != EOF)
return;
_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
}
libc_hidden_def (_IO_doallocbuf)

这里重新回顾下开始时的结构体状态,这些变量未被初始化

1
2
3
4
5
_IO_read_ptr = 0x0, 
_IO_read_end = 0x0,
_IO_read_base = 0x0,
_IO_buf_base = 0x0,
_IO_buf_end = 0x0,

_IO_file_doallocate

因此开始检验是否被初始化,如果已经初始化就返回。检查标志位之后调用vtable中的_IO_file_doallocate,由注释可以看到这个函数主要用来分配输入缓冲区。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
/* Allocate a file buffer, or switch to unbuffered I/O.  Streams for
TTY devices default to line buffered. */
int
_IO_file_doallocate (FILE *fp)
{
size_t size;
char *p;
struct stat64 st;

size = BUFSIZ;
if (fp->_fileno >= 0 && __builtin_expect (_IO_SYSSTAT (fp, &st), 0) >= 0)
{
if (S_ISCHR (st.st_mode))
{
/* Possibly a tty. */
if (
#ifdef DEV_TTY_P
DEV_TTY_P (&st) ||
#endif
local_isatty (fp->_fileno))
fp->_flags |= _IO_LINE_BUF;
}
#if defined _STATBUF_ST_BLKSIZE
if (st.st_blksize > 0 && st.st_blksize < BUFSIZ)
size = st.st_blksize;
#endif
}
p = malloc (size);
if (__glibc_unlikely (p == NULL))
return EOF;
_IO_setb (fp, p, p + size, 1);
return 1;
}
libc_hidden_def (_IO_file_doallocate)

首先调用_IO_SYSSTAT去获取文件信息,_IO_SYSSTAT函数是vtable中的 __stat函数,获取文件信息,这里是通过_fxstat64来获取,其内部是通过系统调用来实现,之后对size进行设置。

1
2
3
4
5
6
int
_IO_file_stat (_IO_FILE *fp, void *st)
{
return __fxstat64 (_STAT_VER, fp->_fileno, (struct stat64 *) st);
}
libc_hidden_def (_IO_file_stat)

再之后通过malloc得到分配的缓冲区,再调用_IO_setb设置FILE缓冲区
跟进_IO_setb,设置了_IO_buf_base和_IO_buf_end,还有_flags

1
2
3
4
5
6
7
8
9
10
11
12
13
void
_IO_setb (FILE *f, char *b, char *eb, int a)
{
if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
free (f->_IO_buf_base);
f->_IO_buf_base = b;
f->_IO_buf_end = eb;
if (a)
f->_flags &= ~_IO_USER_BUF;
else
f->_flags |= _IO_USER_BUF;
}
libc_hidden_def (_IO_setb)

之后逐步返回到_IO_file_xsgetn

可以看到_IO_buf_base与_IO_buf_end都被设置了,大小为0x1000

1
2
_IO_buf_base = 0x602240 "", 
_IO_buf_end = 0x603240 "",

第二步:
1
2
3
4
5
6
7
8
9
while (want > 0)    
{
have = fp->_IO_read_end - fp->_IO_read_ptr;
if (want <= have) //如果缓冲区里有足够的字符,就直接将缓冲区里的字符复制到目标区
{
memcpy (s, fp->_IO_read_ptr, want);
fp->_IO_read_ptr += want;
want = 0;
}
第三步:
1
2
3
4
5
6
7
8
   else
{
if (have > 0) //如果缓冲区字符不够,就将其先复制到目标区
{
s = __mempcpy (s, fp->_IO_read_ptr, have);
want -= have;
fp->_IO_read_ptr += have;
}
第四步:__underflow

因为是第一次读取数据,此时的fp->_IO_read_end以及fp->_IO_read_ptr都是0,因此会进入到__underflow

1
2
3
4
5
6
7
8
9
10
11
/* If we now want less than a buffer, underflow and repeat
the copy. Otherwise, _IO_SYSREAD directly to
the user buffer. */
if (fp->_IO_buf_base
&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base)) //输入缓冲区不能满足需求,调用__underflow读入数据
{
if (__underflow (fp) == EOF)
break;

continue;
}

进入__underflow函数,位于/libio/genops.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
int
__underflow (FILE *fp)
{
if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
return EOF;

if (fp->_mode == 0)
_IO_fwide (fp, -1);
if (_IO_in_put_mode (fp))
if (_IO_switch_to_get_mode (fp) == EOF)
return EOF;
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
}
if (_IO_have_markers (fp))
{
if (save_for_backup (fp, fp->_IO_read_end))
return EOF;
}
else if (_IO_have_backup (fp))
_IO_free_backup_area (fp);
return _IO_UNDERFLOW (fp);
}
libc_hidden_def (__underflow)

可以看到经过一些检查之后会调用_IO_UNDERFLOW,跟进,调用了/libio/fileops.c中的

1
2
int
_IO_new_file_underflow (FILE *fp)

检查FILE结构体的_flag标志位是否包含_IO_NO_READS,如果存在这个标志位则直接返回EOF,其中_IO_NO_READS标志位的定义是#define _IO_NO_READS 4 / Reading not allowed /。

1
2
3
4
5
6
if (fp->_flags & _IO_NO_READS)
{
fp->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}

如果fp->_IO_buf_base为null,则调用_IO_doallocbuf分配输入缓冲区。和前面一样

1
2
3
4
5
6
7
8
9
10
11
12
13
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;

if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}

接着初始化设置FILE结构体指针,将他们都设置成fp->_IO_buf_base

1
2
3
4
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;

调用_IO_SYSREAD(vtable中的_IO_file_read函数),该函数最终执行系统调用read,读取文件数据,
数据读入到fp->_IO_buf_base中,读入大小为输入缓冲区的大小fp->_IO_buf_end - fp->_IO_buf_base。

1
2
count = _IO_SYSREAD (fp, fp->_IO_buf_base,
fp->_IO_buf_end - fp->_IO_buf_base);

设置输入缓冲区已有数据的size,即设置fp->_IO_read_end为fp->_IO_read_end += count。
设置完之后通过while循环进入第二部分将输入缓冲区拷贝至目标缓冲区,流程结束,返回。

  • ray-cp大佬解释了下为什么最后在_IO_UNDERFLOW中又一次检查调用了_IO_doallocbuf分配输入缓冲区。因为虽然一般的输入底层都在调用__underflow函数,但是并非全部,scanf函数调用的是_u_flow函数,其内部并未提前分配缓冲区,之后也调用了_IO_UNDERFLOW函数,因此需要在这里进行缓冲区的分配。

fwrite

框架

主体是_IO_new_file_xsputn函数,该函数调用_IO_new_file_overflow建立刷新缓冲区,
其中_IO_file_doallocate负责缓冲区的申请建立
最后调用_IO_default_xsputn将剩余输出至输出缓冲区

细节

fwrite->_IO_fwrite

跟进,进入_IO_fwrite函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
size_t
_IO_fwrite (const void *buf, size_t size, size_t count, FILE *fp)
{
size_t request = size * count;
size_t written = 0;
CHECK_FILE (fp, 0);
if (request == 0)
return 0;
_IO_acquire_lock (fp);
if (_IO_vtable_offset (fp) != 0 || _IO_fwide (fp, -1) == -1)
written = _IO_sputn (fp, (const char *) buf, request);
_IO_release_lock (fp);
/* We have written all of the input in case the return value indicates
this or EOF is returned. The latter is a special case where we
simply did not manage to flush the buffer. But the data is in the
buffer and therefore written as far as fwrite is concerned. */
if (written == request || written == EOF)
return count;
else
return written / size;
}
libc_hidden_def (_IO_fwrite)

_IO_sputn->_IO_new_file_xsputn

可以看到前面的流程和fread相似,之后调用了主体函数_IO_sputn,跟进,进入了/libio/fileops.c中的_IO_new_file_xsputn

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
size_t
_IO_new_file_xsputn (FILE *f, const void *data, size_t n)
...
/* First figure out how much space is available in the buffer. */
if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING))
{
count = f->_IO_buf_end - f->_IO_write_ptr;
if (count >= n)
{
const char *p;
for (p = s + n; p > s; )
{
if (*--p == '\n')
{
count = p - s + 1;
must_flush = 1;
break;
}
}
}
}
else if (f->_IO_write_end > f->_IO_write_ptr)
count = f->_IO_write_end - f->_IO_write_ptr; /* Space available. */

这一段判断了缓冲区剩余的空间,存在了count中

1
2
3
4
5
6
7
8
9
/* Then fill the buffer. */
if (count > 0)
{
if (count > to_do)
count = to_do;
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
to_do -= count;
}

如果缓冲区还有剩余,则将数据拷贝至输出缓冲区。并计算是否还有目标输出剩余

1
2
3
4
5
6
7
8
 if (to_do + must_flush > 0)
{
size_t block_size, do_write;
/* Next flush the (full) buffer. */
if (_IO_OVERFLOW (f, EOF) == EOF)
/* If nothing else has to be written we must not signal the
caller that everything has been written. */
return to_do == 0 ? EOF : n - to_do;

_IO_OVERFLOW->_IO_new_file_overflow

如果还有剩余,则说明缓冲区未建立或已经满了,需要使用_IO_OVERFLOW刷新缓冲区。跟进该函数,是位于fileops.c中的_IO_new_file_overflow

1
2
int
_IO_new_file_overflow (_IO_FILE *f, int ch)

继续跟进,先检查是否有_IO_NO_WRITE标志位

1
2
3
4
5
6
if (f->_flags & _IO_NO_WRITES) /* SET ERROR */
{
f->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}

检查_IO_write_base是否为空或_IO_CURRENTLY_PUTTING标志位是否为0,若为空则分配输出缓冲区。(所以一般只要调用过stdout输出过该标志位就是1)
这里调用的是_IO_doallocbuf来分配,与fread中相同。

1
2
3
4
5
6
7
8
 if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL)
{
/* Allocate a buffer if needed. */
if (f->_IO_write_base == NULL)
{
_IO_doallocbuf (f);
_IO_setg (f, f->_IO_buf_base, f->_IO_buf_base, f->_IO_buf_base);
}

跟进_IO_setg,是一句宏,将与read相关的三个指针赋值为_IO_buf_base

1
2
#define _IO_setg(fp, eb, g, eg)  ((fp)->_IO_read_base = (eb),\
(fp)->_IO_read_ptr = (g), (fp)->_IO_read_end = (eg))

可以看下修改结果

1
2
3
4
5
6
7
8
_IO_read_ptr = 0x603480 "", 
_IO_read_end = 0x603480 "",
_IO_read_base = 0x603480 "",
_IO_write_base = 0x0,
_IO_write_ptr = 0x0,
_IO_write_end = 0x0,
_IO_buf_base = 0x603480 "",
_IO_buf_end = 0x604480 "",

再之后就是相关指针的设置,对读写相关的指针与flags等进行了赋值。

1
2
3
4
5
6
7
8
9
_flags = -72536956,   --> 0xfbad2c84
_IO_read_ptr = 0x603480 "",
_IO_read_end = 0x603480 "",
_IO_read_base = 0x603480 "",
_IO_write_base = 0x603480 "",
_IO_write_ptr = 0x603480 "",
_IO_write_end = 0x604480 "",
_IO_buf_base = 0x603480 "",
_IO_buf_end = 0x604480 "",

赋值之后就要开始调用_IO_do_write函数。

1
2
3
4
5
if ((f->_flags & _IO_UNBUFFERED)
|| ((f->_flags & _IO_LINE_BUF) && ch == '\n'))
if (_IO_do_write (f, f->_IO_write_base,
f->_IO_write_ptr - f->_IO_write_base) == EOF)
return EOF;

_IO_do_write->_IO_new_do_write

跟进调用了fileops.c中的_IO_new_do_write

1
2
3
4
5
6
7
int
_IO_new_do_write (FILE *fp, const char *data, size_t to_do)
{
return (to_do == 0
|| (size_t) new_do_write (fp, data, to_do) == to_do) ? 0 : EOF;
}
libc_hidden_ver (_IO_new_do_write, _IO_do_write)

new_do_write

看到主要调用了new_do_write函数,位于该函数下方。

1
2
static size_t
new_do_write (FILE *fp, const char *data, size_t to_do)

看到刚开始进行了标志位的判断,然后看read_end与write_base是否存在偏移,有则调用_IO_SYSSEEK校正指针位置。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
 if (fp->_flags & _IO_IS_APPENDING)
/* On a system without a proper O_APPEND implementation,
you would need to sys_seek(0, SEEK_END) here, but is
not needed nor desirable for Unix- or Posix-like systems.
Instead, just indicate that offset (before and after) is
unpredictable. */
fp->_offset = _IO_pos_BAD;
else if (fp->_IO_read_end != fp->_IO_write_base)
{
off64_t new_pos
= _IO_SYSSEEK (fp, fp->_IO_write_base - fp->_IO_read_end, 1);
if (new_pos == _IO_pos_BAD)
return 0;
fp->_offset = new_pos;
}
if (fp->_cur_column && count)
fp->_cur_column = _IO_adjust_column (fp->_cur_column - 1, data, count) + 1;

之后调用_IO_SYSWRITE输出输出缓冲区的内容。

1
count = _IO_SYSWRITE (fp, data, to_do);

_IO_SYSWRITE->_IO_new_file_write

跟进_IO_SYSWRITE查看输出缓冲区的内容是怎样被输出的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
ssize_t
_IO_new_file_write (FILE *f, const void *data, ssize_t n)
{
ssize_t to_do = n;
while (to_do > 0)
{
ssize_t count = (__builtin_expect (f->_flags2
& _IO_FLAGS2_NOTCANCEL, 0)
? __write_nocancel (f->_fileno, data, to_do)
: __write (f->_fileno, data, to_do));
if (count < 0)
{
f->_flags |= _IO_ERR_SEEN;
break;
}
to_do -= count;
data = (void *) ((char *) data + count);
}
n -= to_do;
if (f->_offset >= 0)
f->_offset += n;
return n;
}

可以看到执行了系统调用__write来将其输出。

返回之后调用_IO_setg刷新缓冲区指针并返回。

1
2
3
4
5
6
7
 _IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_buf_base;
fp->_IO_write_end = (fp->_mode <= 0
&& (fp->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
? fp->_IO_buf_base : fp->_IO_buf_end);
return count;
}

返回到_IO_new_file_xsputn中继续往后走,计算了buf是否为比较大的block(0x1000),如果是则直接调用new_do_write进行输出

1
2
3
4
5
6
7
8
9
10
11
     /* Try to maintain alignment: write a whole number of blocks.  */
block_size = f->_IO_buf_end - f->_IO_buf_base;
do_write = to_do - (block_size >= 128 ? to_do % block_size : 0);

if (do_write)
{
count = new_do_write (f, s, do_write);
to_do -= count;
if (count < do_write)
return n - to_do;
}

最后处理缓冲区剩余数据,将剩余数据输出至输出缓冲区

1
2
3
4
5
6
     /* Now write out the remainder.  Normally, this will fit in the
buffer, but it's somewhat messier for line-buffered files,
so we let _IO_default_xsputn handle the general case. */
if (to_do)
to_do -= _IO_default_xsputn (f, s+do_write, to_do);
}

关于这里只将数据输出到输出缓冲区而没有将其写至文件,其实是主函数会调用 _IO_flush_all_lockp()函数来清空缓冲区,该函数会在下面三种情况下被调用:

1:当 libc 执行 abort 流程时。

2:当执行 exit 函数时。当执行流从 main 函数返回时

3:当执行流从 main 函数返回时

_IO_default_xsputn

这里调用的是_IO_default_xsputn,跟进该函数,跳转到genops.c中的_IO_default_xsputn

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
for (;;)
{
/* Space available. */
if (f->_IO_write_ptr < f->_IO_write_end)
{
size_t count = f->_IO_write_end - f->_IO_write_ptr;
if (count > more)
count = more;
if (count > 20)
{
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
}
else if (count)
{
char *p = f->_IO_write_ptr;
ssize_t i;
for (i = count; --i >= 0; )
*p++ = *s++;
f->_IO_write_ptr = p;
}
more -= count;
}
if (more == 0 || _IO_OVERFLOW (f, (unsigned char) *s++) == EOF)
break;
more--;
}

这里对剩余大小不同进行了分别处理,如果大于20则调用__memcpy,否则使用for循环直接赋值。最后如果输出缓冲区为0,则直接调用_IO_OVERFLOW输出,之后返回,这个流程基本就结束了。最后看下此时的fp,bcede…是data2中的数据。

1
2
3
4
5
6
7
8
_IO_read_ptr = 0x603480 "bcdefghij1234567890", 
_IO_read_end = 0x603480 "bcdefghij1234567890",
_IO_read_base = 0x603480 "bcdefghij1234567890",
_IO_write_base = 0x603480 "bcdefghij1234567890",
_IO_write_ptr = 0x603494 "",
_IO_write_end = 0x604480 "",
_IO_buf_base = 0x603480 "bcdefghij1234567890",
_IO_buf_end = 0x604480 "",

fclose

框架

主要是对链接入_IO_list_all中的FILE结构体,还有fread,fwrite建立的输入输出缓冲区进行释放处理。

细节

fclose->_IO_new_fclose

跟进fclose,进入了iofclose.c中的_IO_new_fclose函数,开始先对fp进行了检查

1
CHECK_FILE(fp, EOF);

之后第一步:unlink,将fp从_IO_list_all中脱下。
这里需要注意下,在后面的利用中,为了能够直接调用_io_finish,会这里进行修改来绕过unlink与io_close

1
2
3
  /* First unlink the stream.  */
if (fp->_IO_file_flags & _IO_IS_FILEBUF)
_IO_un_link ((struct _IO_FILE_plus *) fp);

跟进该函数,是位于genops.c中的_IO_un_link

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
void
_IO_un_link (struct _IO_FILE_plus *fp)
{
if (fp->file._flags & _IO_LINKED)
{
FILE **f;
#ifdef _IO_MTSAFE_IO
_IO_cleanup_region_start_noarg (flush_cleanup);
_IO_lock_lock (list_all_lock);
run_fp = (FILE *) fp;
_IO_flockfile ((FILE *) fp);
#endif
if (_IO_list_all == NULL)
;
else if (fp == _IO_list_all)
_IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
else
for (f = &_IO_list_all->file._chain; *f; f = &(*f)->_chain)
if (*f == (FILE *) fp)
{
*f = fp->file._chain;
break;
}
fp->file._flags &= ~_IO_LINKED;
#ifdef _IO_MTSAFE_IO
_IO_funlockfile ((FILE *) fp);
run_fp = NULL;
_IO_lock_unlock (list_all_lock);
_IO_cleanup_region_end (0);
#endif
}
}
libc_hidden_def (_IO_un_link)

可以看到,开始时先检查标志位_flags,之后判断_IO_list_all是否为空,不为空看是否在表头,若不再表头则遍历_IO_list_all单链表进行寻找。最后对其标志位_flags进行修改,该标志位表明了是否位于_IO_list_all链表中。

返回之后,调用_IO_file_close_it函数关闭释放缓冲区

1
2
3
4
if (fp->_flags & _IO_IS_FILEBUF)
status = _IO_file_close_it (fp);
else
status = fp->_flags & _IO_ERR_SEEN ? -1 : 0;

_IO_file_close_it

跟进该函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
int
_IO_new_file_close_it (FILE *fp)
{
int write_status;
if (!_IO_file_is_open (fp))
return EOF;

if ((fp->_flags & _IO_NO_WRITES) == 0
&& (fp->_flags & _IO_CURRENTLY_PUTTING) != 0)
write_status = _IO_do_flush (fp);
else
write_status = 0;

_IO_unsave_markers (fp);

int close_status = ((fp->_flags2 & _IO_FLAGS2_NOCLOSE) == 0
? _IO_SYSCLOSE (fp) : 0);

/* Free buffer. */
if (fp->_mode > 0)
{
if (_IO_have_wbackup (fp))
_IO_free_wbackup_area (fp);
_IO_wsetb (fp, NULL, NULL, 0);
_IO_wsetg (fp, NULL, NULL, NULL);
_IO_wsetp (fp, NULL, NULL);
}
_IO_setb (fp, NULL, NULL, 0);
_IO_setg (fp, NULL, NULL, NULL);
_IO_setp (fp, NULL, NULL);

_IO_un_link ((struct _IO_FILE_plus *) fp);
fp->_flags = _IO_MAGIC|CLOSED_FILEBUF_FLAGS;
fp->_fileno = -1;
fp->_offset = _IO_pos_BAD;

return close_status ? close_status : write_status;
}
libc_hidden_ver (_IO_new_file_close_it, _IO_file_close_it)

首先通过标志位_fileno 判断了文件是否打开,然后对其标志位进行了判断,目的是判断是否为输出缓冲区,是则调用_IO_do_flush刷新缓冲区。查看_IO_do_flush,发现是宏定义

1
2
3
4
5
6
7
#define _IO_do_flush(_f) \
((_f)->_mode <= 0 \
? _IO_do_write(_f, (_f)->_IO_write_base, \
(_f)->_IO_write_ptr-(_f)->_IO_write_base) \
: _IO_wdo_write(_f, (_f)->_wide_data->_IO_write_base, \
((_f)->_wide_data->_IO_write_ptr \
- (_f)->_wide_data->_IO_write_base)))

可以看到主要调用_IO_do_write将输出缓冲区输出。此时原先在输出缓冲区中的值才会被写入文件,所以一般程序运行中没有close的话打开文件可能会发现还没有写进去就是这个原因。再之后对_markers标志位进行了处理。然后可以看到后面调用了_IO_SYSCLOSE函数,看到是fileops.c中的_IO_file_close

_IO_SYSCLOSE->_IO_file_close

1
2
3
4
5
6
7
8
9
10
11
int
_IO_file_close (FILE *fp)
{
/* Cancelling close should be avoided if possible since it leaves an
unrecoverable state behind. */
return close_not_cancel (fp->_fileno);
}
libc_hidden_def (_IO_file_close)
```

看到主要调用了close_not_cancel,而它本身是个宏定义,

#define close_not_cancel(fd) \
__close (fd)

1
2
3
4

实际就是通过系统调用__close关闭

返回之后,调用_IO_setb/_IO_setg/_IO_setp,其中_IO_setb是设置结构体的buf指针, _IO_setg是设置read相关的指针,_IO_setp是设置write相关的指针

_IO_setb (fp, NULL, NULL, 0);
_IO_setg (fp, NULL, NULL, NULL);
_IO_setp (fp, NULL, NULL);

1
2
3

##### _IO_setb
进入_IO_setb,可以看到释放了缓冲区

void
_IO_setb (_IO_FILE f, char b, char *eb, int a)
{
if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
free (f->_IO_buf_base);
f->_IO_buf_base = b;
f->_IO_buf_end = eb;
if (a)
f->_flags &= ~_IO_USER_BUF;
else
f->_flags |= _IO_USER_BUF;
}
libc_hidden_def (_IO_setb)

1
2

返回之后,再次调用了_IO_un_link

_IO_un_link ((struct _IO_FILE_plus *) fp);
fp->_flags = _IO_MAGIC|CLOSED_FILEBUF_FLAGS;
fp->_fileno = -1;
fp->_offset = _IO_pos_BAD;

return close_status ? close_status : write_status;

1
2
3
4
5
返回到_IO_new_close函数,最后调用_IO_FINISH,进入到fileopsc.c中的_IO_new_file_finish

#### _IO_FINISH->_IO_new_file_finish

看到先检查文件是否仍打开,如果打开就刷新缓冲区并关闭,不过之前已经关闭过了,这里会直接进入genops.c中的_IO_default_finish

void
_IO_new_file_finish (FILE *fp, int dummy)
{
if (_IO_file_is_open (fp))
{
_IO_do_flush (fp);
if (!(fp->_flags & _IO_DELETE_DONT_CLOSE))
_IO_SYSCLOSE (fp);
}
_IO_default_finish (fp, 0);
}
libc_hidden_ver (_IO_new_file_finish, _IO_file_finish)

1
2
3

##### _IO_default_finish
可以看到还是对缓冲区的释放,指针的释放等等

void
_IO_default_finish (FILE fp, int dummy)
{
struct _IO_marker
mark;
if (fp->_IO_buf_base && !(fp->_flags & _IO_USER_BUF))
{
free (fp->_IO_buf_base);
fp->_IO_buf_base = fp->_IO_buf_end = NULL;
}

for (mark = fp->_markers; mark != NULL; mark = mark->_next)
mark->_sbuf = NULL;

if (fp->_IO_save_base)
{
free (fp->_IO_save_base);
fp->_IO_save_base = NULL;
}

_IO_un_link ((struct _IO_FILE_plus *) fp);

#ifdef _IO_MTSAFE_IO
if (fp->_lock != NULL)
_IO_lock_fini (*fp->_lock);

#endif
}
libc_hidden_def (_IO_default_finish)

1
2
3
4
5
6
7
8
最后返回后对fp指针进行了free,就结束了。


# 2.24 check

libc 2.24 之后专门添加了对虚表的检查,分别是/libio/libioP.h中的IO_validate_vtable 与 /libio/vtables.c中的_IO_vtable_check。

所有的 libio vtables 被放进了专用的只读的 __libc_IO_vtables 段,以使它们在内存中连续。在任何间接跳转之前,vtable 指针将根据段边界进行检查,如果指针不在这个段,则调用函数 _IO_vtable_check() 做进一步的检查,并且在必要时终止进程。

/ Perform vtable pointer validation. If validation fails, terminate
the process.
/
static inline const struct _IO_jump_t
IO_validate_vtable (const struct _IO_jump_t
vtable)
{
/ Fast path: The vtable pointer is within the __libc_IO_vtables
section.
/
uintptr_t section_length = stop_libc_IO_vtables - start_libc_IO_vtables;
uintptr_t ptr = (uintptr_t) vtable;
uintptr_t offset = ptr - (uintptr_t) start_libc_IO_vtables;

/* 对vtable指针范围进行检查,不满足则调用_IO_vtable_check进行检查

if (__glibc_unlikely (offset >= section_length))
/ The vtable pointer is not in the expected section. Use the
slow path, which will terminate the process if necessary.
/
_IO_vtable_check ();
return vtable;
}

void attribute_hidden
_IO_vtable_check (void)
{

#ifdef SHARED
/ Honor the compatibility flag. /
void (*flag) (void) = atomic_load_relaxed (&IO_accept_foreign_vtables);

#ifdef PTR_DEMANGLE
PTR_DEMANGLE (flag);

#endif
if (flag == &_IO_vtable_check)
return;

/ In case this libc copy is in a non-default namespace, we always
need to accept foreign vtables because there is always a
possibility that FILE
objects are passed across the linking
boundary. /
{
Dl_info di;
struct link_map
l;
if (!rtld_active ()
|| (_dl_addr (_IO_vtable_check, &di, &l, NULL) != 0
&& l->l_ns != LM_ID_BASE))
return;
}

#else / !SHARED /
/ We cannot perform vtable validation in the static dlopen case
because FILE
handles might be passed back and forth across the
boundary. Therefore, we disable checking in this case. */
if (__dlopen != NULL)
return;

#endif

__libc_fatal (“Fatal error: glibc detected an invalid stdio handle\n”);
}

`