Catalog
1. Write in front
Recently, when debugging, physical memory needs to be accessed at the user level, and it is found that the application layer can use devmem tools to access physical addresses. Looking at the source code is actually an operation on / dev/mem. Through mmap, physical addresses can be mapped to virtual addresses in user space, and device registers can be read and written in user space. For this reason, I want to understand the implementation of MMAP in depth.
2.devmem use
The configuration of devmem can be found in the miscellaneous items of busybox.
CONFIG_USER_BUSYBOX_DEVMEM: devmem is a small program that reads and writes from physical memory using /dev/mem. Symbol: USER_BUSYBOX_DEVMEM [=y] Prompt: devmem Defined at ../user/busybox/busybox-1.23.2/miscutils/Kconfig:216 Depends on: USER_BUSYBOX_BUSYBOX Location: -> BusyBox (USER_BUSYBOX_BUSYBOX [=y]) -> Miscellaneous Utilities
# busybox devmem BusyBox v1.23.2 (2018-08-02 11:08:33 CST) multi-call binary. Usage: devmem ADDRESS [WIDTH [VALUE]] Read/write from physical address ADDRESS Address to act upon WIDTH Width (8/16/...) VALUE Data to be written
parameter | Detailed description |
---|---|
ADDRESS | Physical Address for Read-Write Access |
WIDTH | Access data type |
VALUE | If the read operation is omitted; if the write operation is omitted, the data to be written is indicated. |
Basic test usage
# devmem 0x44e07134 16 0xFFEF # devmem 0x44e07134 32 0xFFFFFFEF # devmem 0x44e07134 8 0xEF
3. application layer
The interface is defined as follows:
#include <sys/mman.h> void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset); int munmap(void *addr, size_t length);
Detailed parameters are as follows:
parameter | Detailed description |
---|---|
addr | The virtual memory address that needs to be mapped; if it is NULL, the system will automatically select it. Return the address after the mapping is successful |
length | How much data need to be mapped |
prot | Describes memory protection methods for mapping areas, including PROT_EXEC, PROT_READ, PROT_WRITE and PROT_NONE. |
flags | Describe the characteristics of the mapping area, such as whether to share with other processes, whether to create anonymous mapping, whether to create private cow. |
fd | File descriptors to be mapped to memory |
offset | Offset of file mapping |
Taking the implementation of devmem as an example,
If argv[3] exists, read and write permissions need to be mapped; if not, only read permissions need to be mapped.
map_base = mmap(NULL, mapped_size, argv[3] ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, fd, target & ~(off_t)(page_size - 1));
4. kernel layer
Because of the limited space, the relationship between glibc and system call is not expressed here, and the code implementation of system call is directly looked up.
arch/arm/include/uapi/asm/unistd.h
#define __NR_OABI_SYSCALL_BASE 0x900000 #if defined(__thumb__) || defined(__ARM_EABI__) #define __NR_SYSCALL_BASE 0 #else #define __NR_SYSCALL_BASE __NR_OABI_SYSCALL_BASE #endif #define __NR_mmap (__NR_SYSCALL_BASE+ 90) #define __NR_munmap (__NR_SYSCALL_BASE+ 91) #define __NR_mmap2 (__NR_SYSCALL_BASE+192)
arch/arm/kernel/entry-common.S
/*============================================================================= * SWI handler *----------------------------------------------------------------------------- */ .align 5 ENTRY(vector_swi) #ifdef CONFIG_CPU_V7M v7m_exception_entry #else sub sp, sp, #S_FRAME_SIZE stmia sp, {r0 - r12} @ Calling r0 - r12 ARM( add r8, sp, #S_PC ) ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr THUMB( mov r8, sp ) THUMB( store_user_sp_lr r8, r10, S_SP ) @ calling sp, lr mrs r8, spsr @ called from non-FIQ mode, so ok. str lr, [sp, #S_PC] @ Save calling PC str r8, [sp, #S_PSR] @ Save CPSR str r0, [sp, #S_OLD_R0] @ Save OLD_R0 #endif zero_fp #ifdef CONFIG_ALIGNMENT_TRAP ldr ip, __cr_alignment ldr ip, [ip] mcr p15, 0, ip, c1, c0 @ update control register #endif enable_irq ...
/* * Note: off_4k (r5) is always units of 4K. If we can't do the requested * offset, we return EINVAL. */ sys_mmap2: #if PAGE_SHIFT > 12 tst r5, #PGOFF_MASK moveq r5, r5, lsr #PAGE_SHIFT - 12 streq r5, [sp, #4] beq sys_mmap_pgoff mov r0, #-EINVAL mov pc, lr #else str r5, [sp, #4] b sys_mmap_pgoff #endif ENDPROC(sys_mmap2)
arch/arm/kernel/calls.S
/* 90 */ CALL(OBSOLETE(sys_old_mmap)) /* used by libc4 */ CALL(sys_munmap) ... /* 190 */ CALL(sys_vfork) CALL(sys_getrlimit) CALL(sys_mmap2)
include/linux/syscalls.h
asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff);
Search for the mmap_pgoff function definition, located in mm/mmap.c, and omit some code that we don't care much about.
SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) { struct file *file = NULL; unsigned long retval = -EBADF; if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); file = fget(fd); if (!file) goto out; if (is_file_hugepages(file)) len = ALIGN(len, huge_page_size(hstate_file(file))); retval = -EINVAL; if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file))) goto out_fput; } ... flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); out_fput: if (file) fput(file); out: return retval; }
mm/util.c
unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { unsigned long ret; struct mm_struct *mm = current->mm; unsigned long populate; ret = security_mmap_file(file, prot, flag); if (!ret) { down_write(&mm->mmap_sem); ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, &populate); up_write(&mm->mmap_sem); if (populate) mm_populate(ret, populate); } return ret; }
The vm_area_struct structure is used to describe the virtual memory area of a process, and is associated with the memory descriptor mm_struct of the process, which is managed by a linked list and a red-black tree.
unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate) { struct mm_struct * mm = current->mm; vm_flags_t vm_flags; *populate = 0; //Search the process address space for a usable linear address interval, len specifies the length of the interval, and the non-empty addr parameter specifies which address to start the search. addr = get_unmapped_area(file, addr, len, pgoff, flags); vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; //File pointer is not empty. Mapping from file to virtual space is established. Access privileges are set according to flags flag. if (file) { struct inode *inode = file_inode(file); switch (flags & MAP_TYPE) { case MAP_SHARED: vm_flags |= VM_SHARED | VM_MAYSHARE; break; ... } else { //file pointer is empty, only create virtual space, not mapping. switch (flags & MAP_TYPE) { case MAP_SHARED: pgoff = 0; vm_flags |= VM_SHARED | VM_MAYSHARE; break; case MAP_PRIVATE: pgoff = addr >> PAGE_SHIFT; break; } //Create virtual spaces and map them. addr = mmap_region(file, addr, len, vm_flags, pgoff); return addr; }
unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff) { ... //Check whether the virtual space needs to be expanded if (!may_expand_vm(mm, len >> PAGE_SHIFT)) { unsigned long nr_pages; /* * MAP_FIXED may remove pages of mappings that intersects with * requested mapping. Account for the pages it would unmap. */ if (!(vm_flags & MAP_FIXED)) return -ENOMEM; nr_pages = count_vma_pages_range(mm, addr, addr + len); if (!may_expand_vm(mm, (len >> PAGE_SHIFT) - nr_pages)) return -ENOMEM; } //Scanning the red-black tree related to the vm_area_struct structure of the current process address space to determine the location of the linear region, if a region is found, it shows that the virtual interval where addr is located has been used, indicating that it has been mapped; therefore, do_munmap needs to be called to revoke this region from the process address space. munmap_back: if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) { if (do_munmap(mm, addr, len)) return -ENOMEM; goto munmap_back; } vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL); if (vma) goto out; //Assignment mapping virtual space vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) { error = -ENOMEM; goto unacct_error; } vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags; vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff; INIT_LIST_HEAD(&vma->anon_vma_chain); if (file) { if (vm_flags & VM_DENYWRITE) { error = deny_write_access(file); if (error) goto free_vma; } vma->vm_file = get_file(file); error = file->f_op->mmap(file, vma); if (error) goto unmap_and_free_vma; /* Can addr have changed?? * * Answer: Yes, several device drivers can do it in their * f_op->mmap method. -DaveM * Bug: If addr is changed, prev, rb_link, rb_parent should * be updated for vma_link() */ WARN_ON_ONCE(addr != vma->vm_start); addr = vma->vm_start; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); if (error) goto free_vma; } ... }
The file - > f_op - > MMAP (file, vma) in the implementation of mmap_region function corresponds to mmap_mem, located in / drivers/char/mem.c. The code is as follows:
static const struct file_operations mem_fops = { .llseek = memory_lseek, .read = read_mem, .write = write_mem, .mmap = mmap_mem, .open = open_mem, .get_unmapped_area = get_unmapped_area_mem, }; static int mmap_mem(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) return -EINVAL; if (!private_mapping_ok(vma)) return -ENOSYS; if (!range_is_allowed(vma->vm_pgoff, size)) return -EPERM; if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, &vma->vm_page_prot)) return -EINVAL; vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); vma->vm_ops = &mmap_mem_ops; /* Remap-pfn-range will mark the range VM_IO */ if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, vma->vm_page_prot)) { return -EAGAIN; } return 0; }
The remap_pfn_range function establishes the physical and virtual address page tables. Where vm_pgoff represents the physical address to be mapped and vm_page_prot represents the permissions of the page. These parameters correspond to mmap's parameters, and you can now access the physical address through the application layer.