Accessing physical addresses through devmem

Keywords: Linux glibc

Catalog

1. Write in front

Recently, when debugging, physical memory needs to be accessed at the user level, and it is found that the application layer can use devmem tools to access physical addresses. Looking at the source code is actually an operation on / dev/mem. Through mmap, physical addresses can be mapped to virtual addresses in user space, and device registers can be read and written in user space. For this reason, I want to understand the implementation of MMAP in depth.

2.devmem use

The configuration of devmem can be found in the miscellaneous items of busybox.

CONFIG_USER_BUSYBOX_DEVMEM:                                       

devmem is a small program that reads and writes from physical     
memory using /dev/mem.                                           

Symbol: USER_BUSYBOX_DEVMEM [=y]                                  
Prompt: devmem                                                    
  Defined at ../user/busybox/busybox-1.23.2/miscutils/Kconfig:216 
  Depends on: USER_BUSYBOX_BUSYBOX                                
  Location:                                                       
    -> BusyBox (USER_BUSYBOX_BUSYBOX [=y])                        
      -> Miscellaneous Utilities 
# busybox devmem
BusyBox v1.23.2 (2018-08-02 11:08:33 CST) multi-call binary.

Usage: devmem ADDRESS [WIDTH [VALUE]]

Read/write from physical address

    ADDRESS Address to act upon
    WIDTH   Width (8/16/...)
    VALUE   Data to be written
parameter Detailed description
ADDRESS Physical Address for Read-Write Access
WIDTH Access data type
VALUE If the read operation is omitted; if the write operation is omitted, the data to be written is indicated.

Basic test usage

# devmem 0x44e07134 16
0xFFEF
# devmem 0x44e07134 32
0xFFFFFFEF
# devmem 0x44e07134 8
0xEF

3. application layer

The interface is defined as follows:

#include <sys/mman.h>

void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
int munmap(void *addr, size_t length);

Detailed parameters are as follows:

parameter Detailed description
addr The virtual memory address that needs to be mapped; if it is NULL, the system will automatically select it. Return the address after the mapping is successful
length How much data need to be mapped
prot Describes memory protection methods for mapping areas, including PROT_EXEC, PROT_READ, PROT_WRITE and PROT_NONE.
flags Describe the characteristics of the mapping area, such as whether to share with other processes, whether to create anonymous mapping, whether to create private cow.
fd File descriptors to be mapped to memory
offset Offset of file mapping

Taking the implementation of devmem as an example,

If argv[3] exists, read and write permissions need to be mapped; if not, only read permissions need to be mapped.

    map_base = mmap(NULL,
            mapped_size,
            argv[3] ? (PROT_READ | PROT_WRITE) : PROT_READ,
            MAP_SHARED,
            fd,
            target & ~(off_t)(page_size - 1));

4. kernel layer

Because of the limited space, the relationship between glibc and system call is not expressed here, and the code implementation of system call is directly looked up.

arch/arm/include/uapi/asm/unistd.h

#define __NR_OABI_SYSCALL_BASE  0x900000

#if defined(__thumb__) || defined(__ARM_EABI__)
#define __NR_SYSCALL_BASE   0
#else
#define __NR_SYSCALL_BASE   __NR_OABI_SYSCALL_BASE
#endif

#define __NR_mmap           (__NR_SYSCALL_BASE+ 90)
#define __NR_munmap         (__NR_SYSCALL_BASE+ 91)

#define __NR_mmap2          (__NR_SYSCALL_BASE+192)

arch/arm/kernel/entry-common.S

/*=============================================================================
 * SWI handler
 *-----------------------------------------------------------------------------
 */

    .align  5
ENTRY(vector_swi)
#ifdef CONFIG_CPU_V7M
    v7m_exception_entry
#else
    sub sp, sp, #S_FRAME_SIZE
    stmia   sp, {r0 - r12}          @ Calling r0 - r12
 ARM(   add r8, sp, #S_PC       )
 ARM(   stmdb   r8, {sp, lr}^       )   @ Calling sp, lr
 THUMB( mov r8, sp          )
 THUMB( store_user_sp_lr r8, r10, S_SP  )   @ calling sp, lr
    mrs r8, spsr            @ called from non-FIQ mode, so ok.
    str lr, [sp, #S_PC]         @ Save calling PC
    str r8, [sp, #S_PSR]        @ Save CPSR
    str r0, [sp, #S_OLD_R0]     @ Save OLD_R0
#endif
    zero_fp

#ifdef CONFIG_ALIGNMENT_TRAP
    ldr ip, __cr_alignment
    ldr ip, [ip]
    mcr p15, 0, ip, c1, c0      @ update control register
#endif

    enable_irq
    ...
    
/*
 * Note: off_4k (r5) is always units of 4K.  If we can't do the requested
 * offset, we return EINVAL.
 */
sys_mmap2:
#if PAGE_SHIFT > 12
        tst r5, #PGOFF_MASK
        moveq   r5, r5, lsr #PAGE_SHIFT - 12
        streq   r5, [sp, #4]
        beq sys_mmap_pgoff
        mov r0, #-EINVAL
        mov pc, lr
#else
        str r5, [sp, #4]
        b   sys_mmap_pgoff
#endif
ENDPROC(sys_mmap2)

arch/arm/kernel/calls.S

/* 90 */    CALL(OBSOLETE(sys_old_mmap))    /* used by libc4 */
            CALL(sys_munmap)
            ... 
/* 190 */   CALL(sys_vfork)
            CALL(sys_getrlimit)
            CALL(sys_mmap2)

include/linux/syscalls.h

asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
            unsigned long prot, unsigned long flags,
            unsigned long fd, unsigned long pgoff);

Search for the mmap_pgoff function definition, located in mm/mmap.c, and omit some code that we don't care much about.

SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
        unsigned long, prot, unsigned long, flags,
        unsigned long, fd, unsigned long, pgoff)
{
    struct file *file = NULL;
    unsigned long retval = -EBADF;

    if (!(flags & MAP_ANONYMOUS)) {
        audit_mmap_fd(fd, flags);
        file = fget(fd);
        if (!file)
            goto out;
        if (is_file_hugepages(file))
            len = ALIGN(len, huge_page_size(hstate_file(file)));
        retval = -EINVAL;
        if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file)))
            goto out_fput;
    }
    ...
    
    flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);

    retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
out_fput:
    if (file)
        fput(file);
out:
    return retval;
}

mm/util.c

unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
    unsigned long len, unsigned long prot,
    unsigned long flag, unsigned long pgoff)
{
    unsigned long ret;
    struct mm_struct *mm = current->mm;
    unsigned long populate;

    ret = security_mmap_file(file, prot, flag);
    if (!ret) {
        down_write(&mm->mmap_sem);
        ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
                    &populate);
        up_write(&mm->mmap_sem);
        if (populate)
            mm_populate(ret, populate);
    }
    return ret;
}

The vm_area_struct structure is used to describe the virtual memory area of a process, and is associated with the memory descriptor mm_struct of the process, which is managed by a linked list and a red-black tree.

unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
            unsigned long len, unsigned long prot,
            unsigned long flags, unsigned long pgoff,
            unsigned long *populate)
{
    
    struct mm_struct * mm = current->mm;
    vm_flags_t vm_flags;

    *populate = 0;   
    
    //Search the process address space for a usable linear address interval, len specifies the length of the interval, and the non-empty addr parameter specifies which address to start the search.
    addr = get_unmapped_area(file, addr, len, pgoff, flags);
    
    vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
            mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 
            
    //File pointer is not empty. Mapping from file to virtual space is established. Access privileges are set according to flags flag.        
    if (file) {
        struct inode *inode = file_inode(file);
        
        switch (flags & MAP_TYPE) {
        case MAP_SHARED:
            vm_flags |= VM_SHARED | VM_MAYSHARE;
            break;
        ...
    } else {    //file pointer is empty, only create virtual space, not mapping.
        switch (flags & MAP_TYPE) {
        case MAP_SHARED:
            pgoff = 0;
            vm_flags |= VM_SHARED | VM_MAYSHARE;
            break;
        case MAP_PRIVATE:
            pgoff = addr >> PAGE_SHIFT;
            break;  
    }     
    
    //Create virtual spaces and map them.
    addr = mmap_region(file, addr, len, vm_flags, pgoff);
    
    return addr;
}
unsigned long mmap_region(struct file *file, unsigned long addr,
        unsigned long len, vm_flags_t vm_flags, unsigned long pgoff)
{
    ...
    //Check whether the virtual space needs to be expanded
    if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
        unsigned long nr_pages;

        /*
         * MAP_FIXED may remove pages of mappings that intersects with
         * requested mapping. Account for the pages it would unmap.
         */
        if (!(vm_flags & MAP_FIXED))
            return -ENOMEM;

        nr_pages = count_vma_pages_range(mm, addr, addr + len);

        if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages))
            return -ENOMEM;
    }
    
    //Scanning the red-black tree related to the vm_area_struct structure of the current process address space to determine the location of the linear region, if a region is found, it shows that the virtual interval where addr is located has been used, indicating that it has been mapped; therefore, do_munmap needs to be called to revoke this region from the process address space.
munmap_back:
    if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) {
        if (do_munmap(mm, addr, len))
            return -ENOMEM;
        goto munmap_back;
    }    
    
    vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
    if (vma)
        goto out;  
    
    //Assignment mapping virtual space
    vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
    if (!vma) {
        error = -ENOMEM;
        goto unacct_error;
    }

    vma->vm_mm = mm;
    vma->vm_start = addr;
    vma->vm_end = addr + len;
    vma->vm_flags = vm_flags;
    vma->vm_page_prot = vm_get_page_prot(vm_flags);
    vma->vm_pgoff = pgoff;
    INIT_LIST_HEAD(&vma->anon_vma_chain); 
    
    if (file) {
        if (vm_flags & VM_DENYWRITE) {
            error = deny_write_access(file);
            if (error)
                goto free_vma;
        }
        vma->vm_file = get_file(file);
        error = file->f_op->mmap(file, vma);
        if (error)
            goto unmap_and_free_vma;

        /* Can addr have changed??
         *
         * Answer: Yes, several device drivers can do it in their
         *         f_op->mmap method. -DaveM
         * Bug: If addr is changed, prev, rb_link, rb_parent should
         *      be updated for vma_link()
         */
        WARN_ON_ONCE(addr != vma->vm_start);

        addr = vma->vm_start;
        vm_flags = vma->vm_flags;
    } else if (vm_flags & VM_SHARED) {
        error = shmem_zero_setup(vma);
        if (error)
            goto free_vma;
    }    
    
    ...
}

The file - > f_op - > MMAP (file, vma) in the implementation of mmap_region function corresponds to mmap_mem, located in / drivers/char/mem.c. The code is as follows:

static const struct file_operations mem_fops = {
    .llseek     = memory_lseek,
    .read       = read_mem,
    .write      = write_mem,
    .mmap       = mmap_mem,
    .open       = open_mem,
    .get_unmapped_area = get_unmapped_area_mem,
};

static int mmap_mem(struct file *file, struct vm_area_struct *vma)
{
    size_t size = vma->vm_end - vma->vm_start;

    if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
        return -EINVAL;

    if (!private_mapping_ok(vma))
        return -ENOSYS;

    if (!range_is_allowed(vma->vm_pgoff, size))
        return -EPERM;

    if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
                        &vma->vm_page_prot))
        return -EINVAL;

    vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
                         size,
                         vma->vm_page_prot);

    vma->vm_ops = &mmap_mem_ops;

    /* Remap-pfn-range will mark the range VM_IO */
    if (remap_pfn_range(vma,
                vma->vm_start,
                vma->vm_pgoff,
                size,
                vma->vm_page_prot)) {
        return -EAGAIN;
    }
    return 0;
}

The remap_pfn_range function establishes the physical and virtual address page tables. Where vm_pgoff represents the physical address to be mapped and vm_page_prot represents the permissions of the page. These parameters correspond to mmap's parameters, and you can now access the physical address through the application layer.

Posted by lamurio on Mon, 28 Jan 2019 15:00:15 -0800