A so based hot update scheme for C/C + + services

Keywords: Linux

For online services, there are often bug s in a certain logic of xxx service, which need to be repaired urgently. For stateless services, you can repair them and restart them directly. However, for stateful services, restart means loss of memory state and disconnection of long connections. For example, if the Warcraft server needs to be restarted, the players who have logged in will be disconnected. For stateful services that cannot tolerate restart, hot update can be used to repair the wrong logic.

Its basic principle is simple:

  1. Suppose that the function requiring hot update is func_a
  2. During the process of running, the process triggers the loading of a dynamic library through signals or other mechanisms.
  3. The dynamic library contains func that defines the repaired function_ b
  4. After loading the dynamic library, parse the symbol table in the dynamic library and find the function func to be repaired_ A and the repaired implementation func_ Memory address of B
  5. Modify the permission of process space code segment through mprotect and add write permission. This means that func can be modified_ A memory address.
  6. In func_ Insert a piece of assembly code into the memory address of a, and func will be called_ The logic of a jumps to func_b.
    // You can understand so rudely
    func_a()
    {
        // Insert code
        func_b(); return;
            
        // Wrong logic
    }
    
  7. After replacement, the original func_ The content of a code segment has been overwritten, and the new content is to jump to func_b. In this way, in the later logic, if it is executed to call func_ The logic of a will jump to the repaired func_b. The logic is modified and the program realizes hot update.

Let's begin to implement several important steps in the above process:

  • How to load a so library during operation and parse it to the symbol table.
    linux provides the following APIs
    #include <dlfcn.h>
    ...
    void *dlopen(const char *__file, int __mode)
    void *dlsym(void *__restrict__ __handle, const char *__restrict__ __name)
    int dlclose(void *__handle)
    char *dlerror(void)
    
    For a simple example, package a function into a so library
    int print_age(int val)
    {
        cout << "val : " << val << endl;
        return 0;
    }
    
    /*
    g++ -fPIC -shared test_shared_so.cc -o test_shared.so
    */
    

    Add - fpic when compiling to generate location independent code. View the symbol table of so, as shown below:


     

Of course, I use the symbol table generated by g + +. If you want to see a clean print_ The age symbol can be changed to gcc.

Write a main function to load this so Library:

typedef int (*FUNC_PTR)(int);

int main()
{
    //1. Call dlopen to load so Library
    char patch[] = "./test_shared.so";
    void *lib = dlopen(patch, RTLD_NOW);
    if (NULL == lib)
    {
        cout << "dlopen failed , patch " << patch << endl;
        return 0;
    }

    // 2. Find the function symbol table and replace it
    FUNC_PTR p_func = (FUNC_PTR)dlsym(lib, "_Z9print_agei");
    if (NULL == p_func)
    {
        cout << "fix symbol failed" << endl;
        dlclose(lib);
        return 0;
    }

    // 3. Execute function
    p_func(100);
    return 0;
}


g++ dlopen.cc -rdynamic -ldl
-rdynamic
 It instructs the connector to put all symbols (not just external symbols already used by the program)
Are added to the dynamic symbol table (i.e.dynsym In the table,
So that those can pass dlopen() (This series of functions uses.dynsym Functions such as in table symbols) are used.

-ldl
 If used in your program dlopen,dlsym,dlclose,dlerror To display and load the dynamic library, you need to set the link option -ldl

Through dlopen, dlsym can load a dynamic library in the running process, and can parse the symbols in the dynamic library to realize the call.

  • How to obtain the writable permission of code segment

    #include <sys/mman.h>
    int mprotect(void *addr, size_t len, int prot);
    

    Specific usage:

    addr: Modify the starting address of the protection attribute area, addr It must be the starting address of a memory page. In short, it is the page size (usually 4) KB == 4096 Byte) integer multiple.
    
    len: The length of the modified protection attribute area (if any) len Less than 4096 will be filled as 4096)
    
    prot: You can take the following values and use“|"Combine several attributes:
    1)PROT_READ: Memory segment readability;
    2)PROT_WRITE: The memory segment is writable;
    3)PROT_EXEC: Memory segment executable;
    4)PROT_NONE: The memory segment is not accessible.
    Return value: 0; successful,-1;Failed (and errno (set)
    
  • Get the page start address of the addr address of the corresponding function

    // Get system memory pages
    // The general default page size is 4096
    size_t page = getpagesize();    
    

    Find the starting address of the memory page to modify the permission through getpagesize, and then pass it into mprotect as a parameter to add write permission to this address.

    func_begin_addr = &need_fix_func;
    char * begin_page_addr = (char *)func_begin_addr - ((uint64_t)(char *)func_begin_addr % page );
    
    int ret = mprotect (begin_page_addr, (char *)old_func - align_point + inst_len,     PROT_READ | PROT_WRITE | PROT_EXEC)) ;
    if ( 0 != ret)
    {
        return -1;
    }
    
  • How to insert and jump to the assembly of the new function for the function to be repaired

mov $new_func_entry, %rax # 48 b8 xx xx xx xx xx xx xx xx 
jmp %rax                  # ff e0
//MOV new_func %rax
//JMP %rax
char prefix[] = {'\x48', '\xb8'}; 
char postfix[] = {'\xff', '\xe0'};    

//Writes the jump instruction to the beginning of the original function
memcpy(old_func, prefix, sizeof(prefix));
memcpy((char *)old_func + sizeof(prefix), &new_func, sizeof(void *));
memcpy((char *)old_func + sizeof(prefix) + sizeof(void *), postfix, sizeof(postfix));

DEMO path:

$ tree -L 2
.
|-- hot_fix
|   |-- Makefile
|   |-- hot_fix.cc
|   |-- hot_fix.h
|   |-- hot_fix.o
|   |-- hot_fix_lib
|   `-- libhot_fix.a
`-- test_prj
    |-- Makefile
    |-- app.cc
    |-- app.h
    |-- fix_patch.cc
    |-- main
    |-- main.cc
    `-- patch.so

main.cc

#include <iostream>
#include "app.h"
#include "hot_fix.h"
using namespace std;

int main()
{
    init_hot_fix_signal();

    business_logic();

    return 0;
}

app.cc

#include <iostream>
#include <unistd.h>
using namespace std;

// need fix here
int need_fix_func()
{
    cout << "before fix_func addr : " << (void*)&need_fix_func <<endl;

    int times = 10;
    for (int i = 0; i < times; i++) 
    {
        cout << "before fix cur times " << i << endl;
    }
    return 0;
}

int business_logic()
{
    // do something
    while(1)
    {
        sleep(2);
        need_fix_func();
    }
    return 0;
}

hot_fix.cc

#include <iostream>
#include <signal.h>
#include <dlfcn.h>
#include <errno.h>
#include <stdint.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include "hot_fix.h"
using namespace std;

static int fix_func(const void* new_func, void *old_func) 
{
    cout << "begin fix func " << endl;

    //Jump instruction
    char prefix[] = {'\x48', '\xb8'};   //MOV new_func %rax
    char postfix[] = {'\xff', '\xe0'};  //JMP %rax

    //Enable code writable permission
    size_t page_size= getpagesize();
    const int inst_len = sizeof(prefix) + sizeof(void *) + sizeof(postfix);
    char *align_point = (char *)old_func - ((uint64_t)(char *)old_func % page_size);
    if (0 != mprotect(align_point, (char *)old_func - align_point + inst_len, PROT_READ | PROT_WRITE | PROT_EXEC)) {
        return -1;
    }

    //Writes the jump instruction to the beginning of the original function
    memcpy(old_func, prefix, sizeof(prefix));
    memcpy((char *)old_func + sizeof(prefix), &new_func, sizeof(void *));
    memcpy((char *)old_func + sizeof(prefix) + sizeof(void *), postfix, sizeof(postfix));

    //Turn off code writable permissions
    if (0 != mprotect(align_point, (char *)old_func - align_point + inst_len, PROT_READ | PROT_EXEC)) {
        return -1;
    }
    return 0;
}

static void do_fix(int signum)
{
    cout << "do fix" << endl;

    //1. Call dlopen to load so Library
    char patch_patch[] = "../test_prj/patch.so";
    void *lib = dlopen(patch_patch, RTLD_NOW);
    if (NULL == lib)
    {
        cout << "dlopen failed , patch " << patch_patch << endl;
        return;
    }

    // 2. Find the function symbol table and replace it
    FIXTABLE *fix_item = (FIXTABLE *)dlsym(lib, "fix_table");
    if (NULL == fix_item) 
    {
        cout << "fix symbol failed" << endl;
        dlclose(lib);
        return;
    }
    
    void * result = dlopen(NULL, RTLD_NOW);
    if (NULL == result) 
    {
        cout << "result is null" << endl;
        dlclose(lib);
        return;
    }

    // 3. Execute update
    int ret = fix_func(fix_item->new_func, fix_item->old_func);
    cout << "fix result ret " << ret << endl;
    return;
}

int init_hot_fix_signal() 
{
    if (signal(SIGUSR1, do_fix) == SIG_ERR) 
    {
        return -1;
    }
    return 0;
}

patch.cc

#include <iostream>
#include "app.h"
#include "hot_fix.h"

using namespace std;

// Defines the function to be hot updated
int fix_func()
{
    cout << "before fix_func addr : " << (void*)&need_fix_func << endl;
    cout << "after  fix_func addr : " << (void*)&fix_func <<endl;
    
    cout << "load new fix function" << endl;
    // fix here
    int times = 3;
    for (int i = 0; i < times; i++)
    {
        cout << "after fix cur times " << i << endl;
    }
    return 0;
}

// Define the replaced function and the updated function
FIXTABLE fix_table = {(void *)&fix_func, (void *)&need_fix_func};

Execution results:
By triggering the signal, the process is updated without restarting:

kill -USR1 `ps -ef|grep main|grep -v grep|awk '{print $2}'`


reference resources: https://www.jianshu.com/p/b7c7102119fa

Posted by djalecc on Thu, 04 Nov 2021 07:34:46 -0700