Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Guide: Notes about code of state-threads. ST代码分析. #15

Open
winlinvip opened this issue Oct 30, 2020 · 4 comments
Open

Guide: Notes about code of state-threads. ST代码分析. #15

winlinvip opened this issue Oct 30, 2020 · 4 comments

Comments

@winlinvip
Copy link
Member

代码分析备忘录。

@winlinvip
Copy link
Member Author

winlinvip commented Oct 30, 2020

_st_iterate_threads

分析如何迭代所有的coroutines。

_ST_THREADQ

这个是所有线程的列表,迭代所有的coroutines当然是要迭代这个双向链表,它会在创建线程时把线程加到链表中。

_st_thread_t *st_thread_create(void *(*start)(void *arg), void *arg, int joinable, int stk_size)
{
#ifdef DEBUG
    _ST_ADD_THREADQ(thread);
#endif

这个ADD宏定义,详细实现如下图,简单来说就是把thread添加到双向链表_st_this_vp.thread_q中:

image

当我们迭代线程链表时,我们需要根据thread的成员tlinks,来获取thread的指针:

    #define _ST_THREAD_THREADQ_PTR(_qp) \
        ((_st_thread_t *)((char *)(_qp) - offsetof(_st_thread_t, tlink)))

struct _st_thread {
    _st_clist_t tlink;          /* For putting on thread queue */

// https://stackoverflow.com/questions/7897877/how-does-the-c-offsetof-macro-work
#define offsetof(a,b) ((int)(&(((a*)(0))->b)))

(gdb) p ((int)(&(((_st_thread_t*)(0))->tlink)))
$38 = 72

// 展开上面的宏定义,可以获取到所有的coroutine的入口
(gdb) p *(_st_thread_t*)(((char*)_st_this_vp.thread_q.next) - ((int)(&(((_st_thread_t*)(0))->tlink))))
$46 = {state = 1, flags = 2, start = 0x5a565e <_st_idle_thread_start>, arg = 0x0, retval = 0x0, stack = 0xc2e190, links = {
    next = 0x9d7ab0 <_st_this_vp+16>, prev = 0x9d7ab0 <_st_this_vp+16>}, wait_links = {next = 0x0, prev = 0x0}, tlink = {
    next = 0xc2e218, prev = 0x9d7ae0 <_st_this_vp+64>}, due = 0, left = 0x0, right = 0x0, heap_index = 0, 
  private_data = 0x7f4444cf2f80, term = 0x0, context = {{__jmpbuf = {4096, 139931188931920, 12653800, 12654984, 0, 0, 
        139931188931888, 5920469}, __mask_was_saved = 0, __saved_mask = {__val = {0 <repeats 16 times>}}}}}

coroutine RSP

上面我们可以拿到每个thread的jmp_buf,也就是保存的堆栈信息。我们可以拿到RSP地址:

(gdb) info frame
Stack level 1, frame at 0x7fffffffe560:
 called by frame at 0x7fffffffe580, caller of frame at 0x7fffffffe530

(gdb) p $rsp
$14 = (void *) 0x7fffffffe530

#elif defined(__amd64__) || defined(__x86_64__)
    #define JB_RBX  0
    #define JB_RBP  1
    #define JB_R12  2
    #define JB_R13  3
    #define JB_R14  4
    #define JB_R15  5
    #define JB_RSP  6
    #define JB_PC   7

(gdb) p/x (*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next) - ((int)(&(((_st_thread_t*)(0))->tlink))))).context
$49 = {{__jmpbuf = {0x1000, 0x7f4444cf2d50, 0xc114e8, 0xc11988, 0x0, 0x0, 0x7f4444cf2d30, 0x5a56d5}, __mask_was_saved = 0x0, 
    __saved_mask = {__val = {0x0 <repeats 16 times>}}}}

倒数第二个就是RSP的值了,注意第一个是idle coroutine,第二个是一个原生coroutine也就是main(它的start是0x00):

    /*
     * Initialize primordial thread
     */
    thread = (_st_thread_t *) calloc(1, sizeof(_st_thread_t) + (ST_KEYS_MAX * sizeof(void *)));
    if (!thread)
        return -1;
    thread->private_data = (void **) (thread + 1);
    thread->state = _ST_ST_RUNNING;
    thread->flags = _ST_FL_PRIMORDIAL;
    _ST_SET_CURRENT_THREAD(thread);
    _st_active_count++;
#ifdef DEBUG
    _ST_ADD_THREADQ(thread);
#endif

(gdb) p (*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next.next) - ((int)(&(((_st_thread_t*)(0))->tlink)))))     
$7 = {state = 5, flags = 5, start = 0x0, arg = 0x0, retval = 0x0, stack = 0x0, links = {next = 0x9d7ab0 <_st_this_vp+16>, 
    prev = 0x9d7ab0 <_st_this_vp+16>}, wait_links = {next = 0x0, prev = 0x0}, tlink = {next = 0x7f4444ce0e78, prev = 0x7f4444cf2e78}, 
  due = 1603639789498746, left = 0x7f43ba533e30, right = 0x7f43b493be30, heap_index = 44, private_data = 0xc2e320, term = 0x0, context = {{
      __jmpbuf = {12646464, 140721635898384, 12653800, 12654984, 0, 0, 140721635898352, 5925227}, __mask_was_saved = 0, __saved_mask = {__val = {
          0 <repeats 16 times>}}}}}

堆栈分析看后面的注释。

iterator coroutines

当能拿到coroutine的context时,就可以直接跳到这个coroutine执行:

void _st_iterate_threads(void)
{
    static _st_thread_t *thread = NULL;
    static jmp_buf orig_jb, save_jb;
    _st_clist_t *q;
    
    if (!_st_iterate_threads_flag) {
        if (thread) {
            memcpy(thread->context, save_jb, sizeof(jmp_buf));
            MD_LONGJMP(orig_jb, 1);
        }
        return;
    }
    
    if (thread) {
        memcpy(thread->context, save_jb, sizeof(jmp_buf));
        _st_show_thread_stack(thread, NULL);
    } else {
        if (MD_SETJMP(orig_jb)) {
            _st_iterate_threads_flag = 0;
            thread = NULL;
            _st_show_thread_stack(thread, "Iteration completed");
            return;
        }
        thread = _ST_CURRENT_THREAD();
        _st_show_thread_stack(thread, "Iteration started");
    }
    
    q = thread->tlink.next;
    if (q == &_ST_THREADQ)
        q = q->next;
    ST_ASSERT(q != &_ST_THREADQ);
    thread = _ST_THREAD_THREADQ_PTR(q);
    if (thread == _ST_CURRENT_THREAD())
        MD_LONGJMP(orig_jb, 1);
    memcpy(save_jb, thread->context, sizeof(jmp_buf));
    MD_LONGJMP(thread->context, 1);
}

注意这个函数,使用的是裸MD_LONGJMP和MD_SETJMP,它的执行顺序很不直观,需要逐步分析:

// 实际上这个setjmp第一次执行是返回0的。
// 而longjmp回来到这里时返回的是1。
        if (MD_SETJMP(orig_jb)) {
            // 这里意味着是longjmp(orig_jb),迭代所有coroutine结束了。
            _st_iterate_threads_flag = 0;
            thread = NULL;
            _st_show_thread_stack(thread, "Iteration completed");
            return;
        }
       
       // 这里是setjmp结束,也就是开始迭代,thread保存为当前的coroutine,从它开始迭代。
        thread = _ST_CURRENT_THREAD();
        _st_show_thread_stack(thread, "Iteration started");

Remark:这就是longjmp(ctx, val)第二个参数的原因,实际上这个val是给setjmp用的。

Remark: 只有打开汇编模式,用si执行,才能看到longjmp到setjmp的这个地方来。

后面就开始迭代各个coroutine执行了:

    // 这个就是迭代所有的coroutine的链表,如前面的分析。
    q = thread->tlink.next;
    if (q == &_ST_THREADQ)
        q = q->next;
    ST_ASSERT(q != &_ST_THREADQ);
    thread = _ST_THREAD_THREADQ_PTR(q);

   // 如果是开始迭代的thread,并不是跳到thread的context,而是到这个迭代函数的context。
    if (thread == _ST_CURRENT_THREAD())
        MD_LONGJMP(orig_jb, 1); // 到上面的返回的地方了。
  
   // 把coroutine的context保存一份,后面有用。
    memcpy(save_jb, thread->context, sizeof(jmp_buf));
    MD_LONGJMP(thread->context, 1);

注意下面代码实际上是迭代开始之后才会执行,而开始迭代并不会执行,从执行顺序看正好是反的:

     // 还原thread的context,这时候是跳到了coroutine的堆栈,
    // 但是执行时还是会再次进入这个迭代,所以还原后就继续切下一个线程了,
    // 就相当于进了下coroutine的堆栈,然后又切走了。
    if (thread) {
        memcpy(thread->context, save_jb, sizeof(jmp_buf));
        _st_show_thread_stack(thread, NULL);

// 比如如果有个coroutine是调用了sleep而切走的,
// 那么它堆栈就是在_ST_SWITCH_CONTEXT(me);这个地方,
// 但这个是一段代码,从迭代切到这里后,又立刻回到了迭代函数中去了。
int st_usleep(st_utime_t usecs)
{
    _st_thread_t *me = _ST_CURRENT_THREAD();
    
    if (me->flags & _ST_FL_INTERRUPT) {
        me->flags &= ~_ST_FL_INTERRUPT;
        errno = EINTR;
        return -1;
    }
    
    if (usecs != ST_UTIME_NO_TIMEOUT) {
        me->state = _ST_ST_SLEEPING;
        _ST_ADD_SLEEPQ(me, usecs);
    } else
        me->state = _ST_ST_SUSPENDED;
    
    _ST_SWITCH_CONTEXT(me);

上面的SWITCH的宏实际上是一段代码:

    ST_SWITCH_OUT_CB(_thread);            \
    if (!MD_SETJMP((_thread)->context)) { \
        _st_vp_schedule();                  \
    }                                     \
    ST_DEBUG_ITERATE_THREADS();           \

同样的这个setjmp在sleep第一次调用时为0,所以进入schedule切走。
而被迭代函数longjmp回来时,又再次进入了迭代函数,再恢复了这个coroutine的context。
所以下次再执行时这个还是sleep的地方,并不会对coroutine有什么影响。

最后一段代码,是如果中途改变了flag,则直接恢复thread的context,并结束迭代:

    // _st_iterate_threads_flag=0, thread != NULL,只有手动设置flag=0才会实现,
    // 正常逻辑肯定是flag=0时,thread肯定NULL了。
    if (!_st_iterate_threads_flag) {
        if (thread) {
            memcpy(thread->context, save_jb, sizeof(jmp_buf));
            MD_LONGJMP(orig_jb, 1);
        }
        return;
    }

这个调用链条如下图所示:

image

这样也能看懂调度的逻辑:

image

GDB scripts

根据上面的分析,我们可以写一个GDB脚本gdb/srs.py,可以看当前或者coredump中有多少个coroutines:

(gdb) source gdb/srs.py 
(gdb) nn_coroutines 
this coroutine(&_st_this_thread->tlink) is: 0x7f43ba761e78
next is 0x7f43b92d9e78, total 500
next is 0x7f43b5c37e78, total 1000
next is 0x7f43bfd71e78, total 31500
next is 0x7f43bdad9e78, total 32000
next is 0x7f43bd8f3e78, total 32500
total coroutines: 32717

从上面可以看出,一共有3.2万个coroutine,如果使用mmap会出问题,可以用这个程序验证。参考SRS #509

Conclusion

  1. _st_this_vp.thread_q中有所有的coroutine,可以手动获取它们的堆栈信息。
  2. _st_iterate_threads迭代所有的coroutine,它本身直接longjmp到coroutine,获取堆栈后又再进入它还原coroutine。
  3. setjmp第一次调用返回0,而longjmp到setjmp的地方时返回值是1,只有通过si汇编单步调试才能看到。

@winlinvip
Copy link
Member Author

winlinvip commented Oct 30, 2020

_st_stack_new

coroutine的堆栈,是在调用函数st_thread_create时创建的,创建后顶部和尾部4k的区间mprotect设置为REDZONE,访问这个区间的内存时会直接报错,也就是堆栈消耗完了:

_st_thread_t *st_thread_create(void *(*start)(void *arg), void *arg, int joinable, int stk_size)
{
    stack = _st_stack_new(stk_size);

_st_stack_t *_st_stack_new(int stack_size)
{
    ts->vaddr = _st_new_stk_segment(ts->vaddr_size);
#ifdef DEBUG
    mprotect(ts->vaddr, REDZONE, PROT_NONE);
    mprotect(ts->stk_top + extra, REDZONE, PROT_NONE);
#endif

可以通过mmap看到分配的内存块:

[root@05ff04a933cd trunk]# cat /proc/5904/maps
7ffa1f4df000-7ffa1f4e0000 ---p 00000000 00:00 0  // 这个是不可访问的REDZONE,栈底4K内存。
7ffa1f4e0000-7ffa1f4f0000 rwxp 00000000 00:00 0 // 这个是coroutine的64KB堆栈。
7ffa1f4f0000-7ffa1f4f2000 ---p 00000000 00:00 0   // 这个是不可访问的REDZONE,栈顶4K内存。

但是在core中是看不到这块mmap的内存的:

(gdb) info proc m
Mapped address spaces:

          Start Addr           End Addr       Size     Offset objfile
      0x7ffa1f2ee000     0x7ffa1f310000    0x22000        0x0 /usr/lib64/ld-2.17.so
      0x7ffa1f50f000     0x7ffa1f510000     0x1000    0x21000 /usr/lib64/ld-2.17.so

7ffa1f2ee000-7ffa1f310000 r-xp 00000000 fe:01 1969247                    /usr/lib64/ld-2.17.so // 这个有
7ffa1f4df000-7ffa1f4e0000 ---p 00000000 00:00 0   // 这个没有
7ffa1f4e0000-7ffa1f4f0000 rwxp 00000000 00:00 0  // 这个没有
7ffa1f4f0000-7ffa1f4f2000 ---p 00000000 00:00 0  // 这个没有
7ffa1f50f000-7ffa1f510000 r-xp 00021000 fe:01 1969247                    /usr/lib64/ld-2.17.so // 这个有

内存分配后,会以下图方式布局,thread对象实际上是从里面分配的:

image

而实际分配内存是下面的函数,它可能从heap或者mmap分配:

static char *_st_new_stk_segment(int size)
{
#ifdef MALLOC_STACK
    void *vaddr = malloc(size);
#else
    vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, mmap_flags, zero_fd, 0);
#endif

mmap时,在coredump文件中,vaddr地址是不可访问,而在gdb运行时却不是:

// GDB调试时,可以看到vaddr是0x7f6f114dc000
[root@05ff04a933cd research]# gdb -p 2087
(gdb) p *(*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next.next.next) - ((int)(&(((_st_thread_t*)(0))->tlink)))))->stack
$1 = {links = {next = 0x0, prev = 0x0}, vaddr = 0x7f6f114dc000 "", vaddr_size = 73728, stk_size = 65536, stk_bottom = 0x7f6f114dd000 "", 
  stk_top = 0x7f6f114ed000 "", sp = 0x7f6f114ecd80}

// 生成core文件
[root@05ff04a933cd research]# kill -11 2087

// 这里vaddr和top都是不能访问,而bottom和sp没问题。
[root@05ff04a933cd trunk]# gdb objs/srs -c core 
(gdb) p *(*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next.next.next) - ((int)(&(((_st_thread_t*)(0))->tlink)))))->stack
$1 = {links = {next = 0x0, prev = 0x0}, vaddr = 0x7f6f114dc000 <Address 0x7f6f114dc000 out of bounds>, vaddr_size = 73728, stk_size = 65536, 
  stk_bottom = 0x7f6f114dd000 "", stk_top = 0x7f6f114ed000 "", sp = 0x7f6f114ecd80}

实际上mmap的堆栈是不释放的,所以这个地方并不是内存破坏,而是就是这么显示的。

vaddr = 0x7f6f114dc000 <Address 0x7f6f114dc000 out of bounds>, 
vaddr_size = 73728, 
stk_size = 65536, 
stk_bottom = 0x7f6f114dd000 "", 
stk_top = 0x7f6f114ed000 "", 
sp = 0x7f6f114ecd80

coroutine stack

根据rsp和stack sp,就可以知道coroutine的堆栈信息,把堆栈打出来就可以看到调用过程了,比如第三个coroutine:

// RSP是倒数第二个:0x7f4444ce0b90
(gdb) p/x (*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next.next.next) - ((int)(&(((_st_thread_t*)(0))->tlink))))).context.__jmpbuf
$16 = {0x1000, 0x7f4444ce0c00, 0x7ffc4f1eb360, 0xc11988, 0x0, 0x0, 0x7f4444ce0b90, 0x5a5287}

// SP就是coroutine的堆栈开始点。
(gdb) p/x *(*(_st_thread_t*)(((char*)_st_this_vp.thread_q.next.next.next) - ((int)(&(((_st_thread_t*)(0))->tlink))))).stack
$19 = {links = {next = 0x0, prev = 0x0}, vaddr = 0x7f4444cd0000, vaddr_size = 0x12000, stk_size = 0x10000, stk_bottom = 0x7f4444cd1000, 
  stk_top = 0x7f4444ce1000, sp = 0x7f4444ce0d80}

// 堆栈大小是496字节,也就是62个指针
(gdb) p 0x7f4444ce0d80-0x7f4444ce0b90
$20 = 496

// 可以看到这个是个listener,在accept时切走的
(gdb) x/62xa 0x7f4444ce0b90
0x7f4444ce0b90:	0x8e597a00	0xffffffffffffffff
0x7f4444ce0ba0:	0x1b3c4be30	0x7f4444ce0c30
0x7f4444ce0bb0:	0x7f43b3c4ac70	0x7f43b3cb79e0
0x7f4444ce0bc0:	0x7f4444ce0e30	0x7f4444ce0c30
0x7f4444ce0bd0:	0x100000001	0x4f9c54 <SrsSTCoroutine::start()+224>
0x7f4444ce0be0:	0x7f4444ce0e30	0x7f4444ce0c38
0x7f4444ce0bf0:	0x7f4444ce0c10	0x0
0x7f4444ce0c00:	0x7f4444ce0c40	0x5a776c <st_netfd_poll+62>
0x7f4444ce0c10:	0x7f4444ce0c40	0xffffffffffffffff
0x7f4444ce0c20:	0x144ce0c40	0xc2e450
0x7f4444ce0c30:	0x100000009	0x0
0x7f4444ce0c40:	0x7f4444ce0c90	0x5a783d <st_accept+83>
0x7f4444ce0c50:	0xffffffffffffffff	0x0
0x7f4444ce0c60:	0x0	0xc2e450
0x7f4444ce0c70:	0x947c3e18	0x0
0x7f4444ce0c80:	0x7f4444ce0cc0	0xffffffff004b8ba9
0x7f4444ce0c90:	0x7f4444ce0cc0	0x4aaa6c <srs_accept(void*, sockaddr*, int*, long)+48>
0x7f4444ce0ca0:	0xffffffffffffffff	0x0
0x7f4444ce0cb0:	0x0	0xc2e450
0x7f4444ce0cc0:	0x7f4444ce0cf0	0x589da0 <SrsTcpListener::cycle()+148>
0x7f4444ce0cd0:	0x0	0xc2e500
0x7f4444ce0ce0:	0xc786e0	0x0
0x7f4444ce0cf0:	0x7f4444ce0d20	0x4f9f96 <SrsSTCoroutine::cycle()+142>
0x7f4444ce0d00:	0x0	0xc2e5e0
0x7f4444ce0d10:	0x0	0x0
0x7f4444ce0d20:	0x7f4444ce0d50	0x4fa00b <SrsSTCoroutine::pfn(void*)+43>
0x7f4444ce0d30:	0x0	0xc2e5e0
0x7f4444ce0d40:	0x0	0xc2e5e0
0x7f4444ce0d50:	0x7f4444ce0d70	0x5a5a92 <_st_thread_main+40>
0x7f4444ce0d60:	0x0	0x7f4444ce0e30
0x7f4444ce0d70:	0x7ffc4f1eb2f0	0x5a6202 <st_thread_create+306>

// 对应代码如下:
srs_error_t SrsTcpListener::cycle()
{
    while (true) {
        srs_netfd_t fd = srs_accept(lfd, NULL, NULL, SRS_UTIME_NO_TIMEOUT);

coroutine frame

frame地址可以由jmpbuf算出:

Frame Address = RBP + 16 = jmpbuf[1] + 16

下面详细分析setjmp的情况。

coroutine在调度时,是调用的宏定义_ST_SWITCH_CONTEXT,也就是切到其他的coroutine:

int st_usleep(st_utime_t usecs)
{
    _st_thread_t *me = _ST_CURRENT_THREAD();
    
        me->state = _ST_ST_SLEEPING;
        _ST_ADD_SLEEPQ(me, usecs);
    
    _ST_SWITCH_CONTEXT(me);

这个宏定义展开如下:

#define _ST_SWITCH_CONTEXT(_thread)       \
    if (!MD_SETJMP((_thread)->context)) { \
        _st_vp_schedule();                  \
    }                                     \

实际上就是setjmp。用一个最简单的调用例子,来看ST的寄存器变化,以及RBP和frame的关系:

int main(int argc, char** argv) {
    st_init();
    for (;;) {
        st_usleep(1000 * 1000);
    }
    return 0;
}

// 设置断点在st_usleep的入口
B+>│0x4028a2 <st_usleep>    push   %rbp 
       │0x4028a3 <st_usleep+1>  mov    %rsp,%rbp
       │0x4028a6 <st_usleep+4>  sub    $0x20,%rsp 

// 这时候RBP还是上一个函数(main)的堆栈开始地址。
(gdb) p	$rbp
$34 = (void *) 0x7fffffffe570

// 而fp是函数的堆栈开始的地方,fp到frame地址中间有16字节两个指针,
// 分别是main的rip和rbp,也就是从这个函数的stack中可以找到调用者的信息。
(gdb) p	$fp
$33 = (void *) 0x7fffffffe550
(gdb) x/2xa $fp 
0x7fffffffe550:	0x7fffffffe570	0x40113b <main(int, char**)+30>

// frame地址就是fp+16字节
(gdb) info frame
Stack level 0, frame at	0x7fffffffe560:
(gdb) p	$fp
$36 = (void *) 0x7fffffffe550

当然fp这个寄存器我们并没有保存,所以我们继续执行三条汇编,看下RBP和frame关系:

B+│0x4028a2 <st_usleep>    push   %rbp             // 保存RBP,上个函数的RBP
     │0x4028a3 <st_usleep+1>  mov    %rsp,%rbp // 将RSP赋值给RBP,现在RBP就是这个函数的RBP了
     │0x4028a6 <st_usleep+4>  sub    $0x20,%rsp  // 堆栈下移2个指针,这个是为了保存返回值的,并不固定是2指针
   >│0x4028aa <st_usleep+8>  mov    %rdi,-0x18(%rbp) // 从这里开始初始化函数的输入参数,rdi就是第一个参数。

// RBP就是FP了,不过FP是调用函数时设置的,而RBP是函数的入口的两条汇编设置的。
(gdb) p	$rbp
$37 = (void *) 0x7fffffffe550
(gdb) p	$fp
$38 = (void *) 0x7fffffffe550
(gdb) info frame
Stack level 0, frame at	0x7fffffffe560:

Notes: Linux x86_64传参方法是:第一个参数rdi,第二个rsi,第三个是rdx,第四个是rcx,第五个是r8,第六个是r9,再往后就在rsp堆栈往上存储。
返回值是rax。

从上面可以看到,如果我们知道了RBP,就可以知道Frame的地址了:

Frame Address = RBP + 16 // Linux x86_64,其他平台需要调试看看。

我们继续执行到st_usleep开始切换上下文的地方:

int st_usleep(st_utime_t usecs)
{
    _st_thread_t *me = _ST_CURRENT_THREAD();
        me->state = _ST_ST_SLEEPING;
        _ST_ADD_SLEEPQ(me, usecs);
    
    _ST_SWITCH_CONTEXT(me);

(gdb) f
#0  st_usleep (usecs=1000000) at sync.c:131
131	    _ST_SWITCH_CONTEXT(me);

// 可以看到RBP和FP不会变化,是这个函数的堆栈开始的点,
// 而RSP堆栈指针会随着局部变量和返回值而变化。
(gdb) p $rsp
$39 = (void *) 0x7fffffffe530
(gdb) p $rbp
$40 = (void *) 0x7fffffffe550
(gdb) p $fp 
$43 = (void *) 0x7fffffffe550
(gdb) info frame
Stack level 1, frame at	0x7fffffffe560:

// 下面要单步指令执行si,进入到setjmp函数的汇编实现 _st_md_cxt_save
0x402959 <st_usleep+183>        callq  0x4070c0 <_st_md_cxt_save>
0x4070c0 <_st_md_cxt_save>      mov    %rbx,(%rdi)

// 进入函数_st_md_cxt_save后,可以看到RBP还是上个函数的RBP,而FP是由callq自动设置了。
// callq还自动把上个函数的rip,保存到了堆栈,RSP也降低了一个指针。
(gdb) p	$fp
$44 = (void *) 0x7fffffffe520
(gdb) p	$rbp
$45 = (void *) 0x7fffffffe550
(gdb) p	$rsp
$46 = (void *) 0x7fffffffe528
(gdb) info frame
Stack level 0, frame at	0x7fffffffe530:

// 如果查看堆栈的信息,可以看到上个函数的rip在当前RSP位置:
(gdb) x /1xa $rsp
0x7fffffffe528:	0x40295e <st_usleep+188>

总结下,在setjmp/_st_md_cxt_save这个函数中几个关键寄存器的保存:

  • RBX, RBP, R12~R15,直接保存到env(地址在rdi中),对应jmpbuf的0到5个元素。
  • RSP,由于callq自动把前个函数的rip保存到了堆栈,所以我们+8后就是上个函数的RSP。
  • PC,我们取RSP地址的内容,就是上颚函数的RIP也就是PC了。

image

不过我们知道了frame地址,也无法直接切换过去,比如下面两个coroutine:

(gdb) info frame
Stack level 1, frame at t:
 rip = 0x4016d8 in _st_idle_thread_start (sched.c:231); saved rip 0x401a87
 called by frame at 0x7ffff7fe9d80, caller of frame at 0x7ffff7fe9d30
 source language c.
 Arglist at 0x7ffff7fe9d50, args: arg=0x0
 Locals at 0x7ffff7fe9d50, Previous frame's sp is 0x7ffff7fe9d60
 Saved registers:
  rbp at 0x7ffff7fe9d50, rip at 0x7ffff7fe9d58

(gdb) p/x _st_this_thread->context
$73 = {{__jmpbuf = {0x1000, 0x7ffff7fe9d50, 0x401030, 0x7fffffffe650, 0x0, 0x0, 0x7ffff7fe9d30, 0x4016ca}, __mask_was_saved = 0x0, __saved_mask = {
      __val = {0x0 <repeats 16 times>}}}}

(gdb) f
#1  0x00000000004016d8 in _st_idle_thread_start (arg=0x0) at sched.c:231
231	        _ST_SWITCH_CONTEXT(me);

(gdb) bt
#0  _st_iterate_threads () at sched.c:672
#1  0x00000000004016d8 in _st_idle_thread_start (arg=0x0) at sched.c:231
#2  0x0000000000401a87 in _st_thread_main () at sched.c:337
(gdb) info frame
Stack level 1, frame at 0x7fffffffe560:
 rip = 0x40296c in st_usleep (sync.c:131); saved rip 0x40113b
 called by frame at 0x7fffffffe580, caller of frame at 0x7fffffffe530
 source language c.
 Arglist at 0x7fffffffe550, args: usecs=1000000
 Locals at 0x7fffffffe550, Previous frame's sp is 0x7fffffffe560
 Saved registers:
  rbp at 0x7fffffffe550, rip at 0x7fffffffe558

(gdb) p/x _st_this_thread->context
$74 = {{__jmpbuf = {0x0, 0x7fffffffe550, 0x401030, 0x7fffffffe650, 0x0, 0x0, 0x7fffffffe530, 0x40295e}, __mask_was_saved = 0x0, __saved_mask = {
      __val = {0x0 <repeats 16 times>}}}}

(gdb) f
#1  0x000000000040296c in st_usleep (usecs=1000000) at sync.c:131
131	    _ST_SWITCH_CONTEXT(me);

(gdb) bt
#0  _st_iterate_threads () at sched.c:672
#1  0x000000000040296c in st_usleep (usecs=1000000) at sync.c:131
#2  0x000000000040113b in main (argc=1, argv=0x7fffffffe658) at st0.cpp:10

它们的frame地址是0x7ffff7fe9d60和0x7fffffffe560,但是却不能随意使用frame xxx切换,只能在当前backtrack时才能切换。

coroutine backtrace

当我们知道了线程的RBP,那么我们就可以知道整个堆栈,例如我们看当前线程的前一个线程:

// 先计算偏移量,从线程的链表获取到线程的偏移量
(gdb) p (int)(&(((_st_thread_t*)(0))->tlink))
$55 = 72

// 根据当前线程的链表,查找到前一个线程,其中RBP是139928880446560
p *(_st_thread_t*)((char*)_st_this_thread->tlink.prev-72)
__jmpbuf = {4096, 139928880446560, 139928887965408, 8313290, 2266154456, 2274288280, 139928880446528, 5925227}

// 查看RBP开始的两个指针,就是前一个函数的入口和前前函数的RBP,
// 以此类推,可以知道整个调用链条。
(gdb) x /2xa 139928880446560
0x7f43bb367c60:	0x7f43bb367c80	0x4aa9c6 <srs_usleep(long)+24>
(gdb) x/2xa 0x7f43bb367c80
0x7f43bb367c80:	0x7f43bb367cf0	0x5919fd <SrsNgExec::cycle()+381>
(gdb) x/2xa 0x7f43bb367cf0
0x7f43bb367cf0:	0x7f43bb367d20	0x4f9f96 <SrsSTCoroutine::cycle()+142>
(gdb) x/2xa 0x7f43bb367d20
0x7f43bb367d20:	0x7f43bb367d50	0x4fa00b <SrsSTCoroutine::pfn(void*)+43>
(gdb) x/2xa 0x7f43bb367d50
0x7f43bb367d50:	0x7f43bb367d70	0x5a5a92 <_st_thread_main+40>

注意:如果是当前线程,这个RBP可能是不对的,因为jmpbuf保存的是之前的一个位置。当前coroutine直接用bt就可以看到堆栈。

discovery in coredump

第一个coroutine是idle,第二个就是main,也就是SrsServer的coroutine,我们可以看它的调用堆栈:

(gdb) p *(_st_thread_t*)((char*)_st_this_vp.thread_q.next.next-72)
$14 = {state = 5, flags = 5, start = 0x0, arg = 0x0, retval = 0x0, stack = 0x0, links = {next = 0x9d7ab0 <_st_this_vp+16>, 
    prev = 0x9d7ab0 <_st_this_vp+16>}, wait_links = {next = 0x0, prev = 0x0}, tlink = {next = 0x7f4444ce0e78, prev = 0x7f4444cf2e78}, 
  due = 1603639789498746, left = 0x7f43ba533e30, right = 0x7f43b493be30, heap_index = 44, private_data = 0xc2e320, term = 0x0, context = {{
      __jmpbuf = {12646464, 140721635898384, 12653800, 12654984, 0, 0, 140721635898352, 5925227}, __mask_was_saved = 0, __saved_mask = {__val = {
          0 <repeats 16 times>}}}}}

(gdb) x/2xa 140721635898384
0x7ffc4f1eb410:	0x7ffc4f1eb430	0x4aa9c6 <srs_usleep(long)+24>
(gdb) x/2xa 0x7ffc4f1eb430
0x7ffc4f1eb430:	0x7ffc4f1eb490	0x4bf2d8 <SrsServer::do_cycle()+408>

这里我们可以看到SrsServer对象了,可以找找它的this指针:

(gdb) x/4xa 0x7ffc4f1eb490
0x7ffc4f1eb490:	0x7ffc4f1eb4f0	0x4bec9b <SrsServer::cycle()+159>
0x7ffc4f1eb4a0:	0xc30518	0xc0f840

// 可以看到第四个指针就是this
(gdb) x/2xa 0xc0f840
0xc0f840:	0x6b1738 <_ZTV9SrsServer+344>	0xc0fad0

// 打印它的内容,是可以对上的:
(gdb) p *(SrsServer*)0xc0f840
$16 = {<ISrsReloadHandler> = {_vptr.ISrsReloadHandler = 0x6b1738 <vtable for SrsServer+344>}, <ISrsSourceHandler> = {
    _vptr.ISrsSourceHandler = 0x6b1970 <vtable for SrsServer+912>}, <IConnectionManager> = {
    _vptr.IConnectionManager = 0x6b19b0 <vtable for SrsServer+976>}, http_api_mux = 0xc0fad0, http_server = 0xc0fb60, http_heartbeat = 0xc0fcb0, 
  ingester = 0xc0f780, conn_manager = 0xc0f950, pid_fd = 8, conns = std::vector of length 71, capacity 256 = {0xc30d28, 0xcbbcb8, 0xc76858, 
    0xd4fb18, 0xcd3938, 0xcd4ae8, 0xd744518, 0x1032a528, 0x495c9e08, 0x47fccbe8, 0x67f14018, 0x70906728, 0x77ce0c38, 0x7b87deb8, 0x76f313b8, 
    0x7e8148b8, 0x814987c8, 0x8356dff8, 0x82ba8bd0, 0x83489cf8, 0x81a10aa8, 0x863debf8, 0x83b977d8, 0x797c59d8, 0x870b0b28, 0x8b5591e8, 
    0x8a8f60e8, 0x8c31b748, 0x8d6cf8a8, 0x8c6268a8, 0x8e0d2758, 0x8d951218, 0x8b42aed8, 0x8ae91d88, 0x889791b8, 0x911c76f8, 0x9118c468, 
    0x93a248a8, 0x917bb4a8, 0x924516a8, 0x8fb1d8c0, 0x9365bf18, 0x94002458, 0x92ad37d8, 0x90c0e068, 0x93817448, 0x921cf848, 0x929e3b78, 
    0x9039a6b8, 0x92d62518, 0x92bf2ed8, 0x939e5348, 0x91e52718, 0x92b2c198, 0x9470dca8, 0x92ac0b38, 0x94707a88, 0x9456f718, 0x93432b98, 
    0x93f4f5c8, 0x91bb6f98, 0x8d7b6788, 0x92d33778, 0x92d61cb8, 0x93a17a88, 0x91fb9188, 0x924c3e60, 0x91f63508, 0x8b72e2d8, 0x93242ca8, 
    0x947c3e18}, listeners = std::vector of length 3, capacity 4 = {0xc2e4b0, 0xc2e6d0, 0xc2e860}, signal_manager = 0xc0f810, handler = 0x0, 
  signal_reload = false, signal_persistence_config = false, signal_gmc_stop = false, signal_fast_quit = false, signal_gracefully_quit = false, 
  ppid = 1}

@winlinvip
Copy link
Member Author

winlinvip commented Dec 10, 2020

GDB nn_coroutines

SRS提供脚本nn_coroutines,可以看当前或者coredump中有多少个coroutines:

(gdb) source gdb/srs.py 
(gdb) nn_coroutines 
this coroutine(&_st_this_thread->tlink) is: 0x7f43ba761e78
next is 0x7f43b92d9e78, total 500
next is 0x7f43b5c37e78, total 1000
next is 0x7f43bfd71e78, total 31500
next is 0x7f43bdad9e78, total 32000
next is 0x7f43bd8f3e78, total 32500
total coroutines: 32717

@winlinvip
Copy link
Member Author

GDB show_coroutines

SRS提供脚本show_coroutines,可以看当前或者coredump中每个coroutine的调用函数:

(gdb) source gdb/srs.py 
(gdb) show_coroutines 
offset=72, _st_this_thread=0xdefa90, pthis-offset=0xdefa90
thread: 0xdefa90, caller: 0x4c5d37 <srs_usleep(long)+24>
thread: 0xe03e00, caller: 0x4c5d37 <srs_usleep(long)+24>
thread: 0xe25e20, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xe37e30, caller: 0x63b1b4 <st_cond_wait+31>
thread: 0xe4a0f0, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xe5c2a0, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xe6e480, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xe80580, caller: 0x63b1b4 <st_cond_wait+31>
thread: 0xe925f0, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xea6b80, caller: 0x4c5d37 <srs_usleep(long)+24>
thread: 0xeb8bf0, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xee59d0, caller: 0x4c5c68 <srs_cond_timedwait(void*, long)+35>
thread: 0xf22c40, caller: 0x63b1b4 <st_cond_wait+31>
thread: 0xf34c50, caller: 0x4c5d37 <srs_usleep(long)+24>
thread: 0x7ffff7fc6e40, caller: 0x63bca4 <st_netfd_poll+62>
thread: 0xdee8b0, caller: 0x63a009 <_st_thread_main+40>

@winlinvip winlinvip pinned this issue Oct 17, 2021
@winlinvip winlinvip changed the title state-threads代码分析 Notes about code of state-threads. ST代码分析. Sep 5, 2022
@winlinvip winlinvip changed the title Notes about code of state-threads. ST代码分析. Guide: Notes about code of state-threads. ST代码分析. Sep 5, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant