[LINUX] Also read the contents of arch / arm / kernel / swp_emulate.c


Why do you need swp emulation in the first place?

To summarize briefly

(1) Originally, there was a SWP instruction that exchanged data between two registers. (2) When it became a multi-core CPU, exclusive control with another CPU became necessary. (3) I would like to say that you should not use SWP instructions in the upper layer. However, I couldn't say that, so I needed a mechanism for soft emulation on the Kernel side.

is not it…….

Note that the xchg instruction for exchanging data also exists in other architectures such as x86.

Link to ARM documentation

SWP instruction

LDREX instruction / STREX instruction

1. Register SWP emuration in trap.c at startup

Request registration of hook function from swp_emulate.c to traps.c

At startup, late_initcall (swp_emulation_init);swp_emulation_init ()register_undef_hook () becomes a call chain, and swp_handler () is registered with undef_hook.


 * Only emulate SWP/SWPB executed in ARM state/User mode.
 * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE.
static struct undef_hook swp_hook = {
    .instr_mask = 0x0fb00ff0,
    .instr_val  = 0x01000090,
    .cpsr_mask  = MODE_MASK | PSR_T_BIT | PSR_J_BIT,
    .cpsr_val   = USR_MODE,
    .fn     = swp_handler ★★★★★ here

 * Register handler and create status file in /proc/cpu
 * Invoked as late_initcall, since not needed before init spawned.
static int __init swp_emulation_init(void)
    if (cpu_architecture() < CPU_ARCH_ARMv7)
        return 0;

    if (!proc_create_single("cpu/swp_emulation", S_IRUGO, NULL,
        return -ENOMEM;
#endif /* CONFIG_PROC_FS */

    pr_notice("Registering SWP/SWPB emulation handler\n");
    register_undef_hook(&swp_hook);★★★★★ here

    return 0;

late_initcall(swp_emulation_init);★★★★★ here

trap.c registers the hook function.

Register at arch / arm / kernel / traps.c.


void register_undef_hook(struct undef_hook *hook)
    unsigned long flags;

    raw_spin_lock_irqsave(&undef_lock, flags);
    list_add(&hook->node, &undef_hook);
    raw_spin_unlock_irqrestore(&undef_lock, flags);

2. Activate trap (until it is activated)

Vector table-> kernel

Roughly speaking, if you find an undef instruction in the vector table, the swp emulation function registered in the hook function will be called.


 ARM(   swi SYS_ERROR0  )
 THUMB( svc #0      )
 THUMB( nop         )
    b   vector_und

 * Undef instr entry dispatcher
 * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
    vector_stub und, UND_MODE

    .long   __und_usr           @  0 (USR_26 / USR_32)
    .long   __und_invalid           @  1 (FIQ_26 / FIQ_32)
    .long   __und_invalid           @  2 (IRQ_26 / IRQ_32)
    .long   __und_svc           @  3 (SVC_26 / SVC_32)★★★★★★★★★★★ Here! !!
    .long   __und_invalid           @  4
    .long   __und_invalid           @  5
    .long   __und_invalid           @  6
    .long   __und_invalid           @  7
    .long   __und_invalid           @  8
    .long   __und_invalid           @  9
    .long   __und_invalid           @  a
    .long   __und_invalid           @  b
    .long   __und_invalid           @  c
    .long   __und_invalid           @  d
    .long   __und_invalid           @  e
    .long   __und_invalid           @  f

    .align  5


    .align  5
    @ If a kprobe is about to simulate a "stmdb sp..." instruction,
    @ it obviously needs free stack space which then will belong to
    @ the saved context.
    svc_entry MAX_STACK_SIZE
    @ call emulation code, which returns using r9 if it has emulated
    @ the instruction, or the more conventional lr if we are to treat
    @ this as a real undefined instruction
    @  r0 - instruction
    ldr r0, [r4, #-4]
    mov r1, #2
    ldrh    r0, [r4, #-2]           @ Thumb instruction at LR - 2
    cmp r0, #0xe800         @ 32-bit instruction if xx >= 0
    blo __und_svc_fault
    ldrh    r9, [r4]            @ bottom 16 bits
    add r4, r4, #2
    str r4, [sp, #S_PC]
    orr r0, r9, r0, lsl #16
    badr    r9, __und_svc_finish
    mov r2, r4
    bl  call_fpe

    mov r1, #4              @ PC correction to apply
    mov r0, sp              @ struct pt_regs *regs
    bl  __und_fault ★★★★★★★★★★★ Here! !!

    get_thread_info tsk
    ldr r5, [sp, #S_PSR]        @ Get SVC cpsr
    svc_exit r5             @ return from exception
 UNWIND(.fnend      )


    @ Correct the PC such that it is pointing at the instruction
    @ which caused the fault.  If the faulting instruction was ARM
    @ the PC will be pointing at the next instruction, and have to
    @ subtract 4.  Otherwise, it is Thumb, and the PC will be
    @ pointing at the second half of the Thumb instruction.  We
    @ have to subtract 2.
    ldr r2, [r0, #S_PC]
    sub r2, r2, r1
    str r2, [r0, #S_PC]
    b   do_undefinstr ★★★★★★★★★★★★★★★★★ Here! !!

kernel → hook function

Now, do_undefinstr is called safely. Here, we call a function that calls a hook function that has already been pre-registered (although it's annoying ...)


smlinkage void do_undefinstr(struct pt_regs *regs)
    unsigned int instr;
    void __user *pc;

    pc = (void __user *)instruction_pointer(regs);

    if (processor_mode(regs) == SVC_MODE) {
            instr = __mem_to_opcode_arm(*(u32 *) pc);
    } else if (thumb_mode(regs)) {
        if (get_user(instr, (u16 __user *)pc))
            goto die_sig;
        instr = __mem_to_opcode_thumb16(instr);
        if (is_wide_instruction(instr)) {
            unsigned int instr2;
            if (get_user(instr2, (u16 __user *)pc+1))
                goto die_sig;
            instr2 = __mem_to_opcode_thumb16(instr2);
            instr = __opcode_thumb32_compose(instr, instr2);
    } else {
        if (get_user(instr, (u32 __user *)pc))
            goto die_sig;
        instr = __mem_to_opcode_arm(instr);

    if (call_undef_hook(regs, instr) == 0)★★★★★★★ here


From call_undef_hook (), a function registered in the hook is called.


static nokprobe_inline
int call_undef_hook(struct pt_regs *regs, unsigned int instr)
    struct undef_hook *hook;
    unsigned long flags;
    int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL;

    raw_spin_lock_irqsave(&undef_lock, flags);
    list_for_each_entry(hook, &undef_hook, node)
        if ((instr & hook->instr_mask) == hook->instr_val &&
            (regs->ARM_cpsr & hook->cpsr_mask) == hook->cpsr_val)
            fn = hook->fn;
    raw_spin_unlock_irqrestore(&undef_lock, flags);

    return fn ? fn(regs, instr) : 1;

3. Activate trap! !!

Now, when the hook function is called, we will enter.


 * swp_handler logs the id of calling process, dissects the instruction, sanity
 * checks the memory location, calls emulate_swpX for the actual operation and
 * deals with fixup/error handling before returning
static int swp_handler(struct pt_regs *regs, unsigned int instr)
    unsigned int address, destreg, data, type;
    unsigned int res = 0;

    perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);

    res = arm_check_condition(instr, regs->ARM_cpsr);
    switch (res) {
        /* Condition failed - return to next instruction */
        regs->ARM_pc += 4;
        return 0;
        /* If unconditional encoding - not a SWP, undef */
        return -EFAULT;
        return -EINVAL;

    if (current->pid != previous_pid) {
        pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n",
             current->comm, (unsigned long)current->pid);
        previous_pid = current->pid;

    address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)];
    data    = regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)];
    destreg = EXTRACT_REG_NUM(instr, RT_OFFSET);

    type = instr & TYPE_SWPB;

    pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n",
         EXTRACT_REG_NUM(instr, RN_OFFSET), address,
         destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data);

    /* Check access in reasonable access range for both SWP and SWPB */
    if (!access_ok((address & ~3), 4)) {
        pr_debug("SWP{B} emulation: access to %p not allowed!\n",
             (void *)address);
        res = -EFAULT;
    } else {
        res = emulate_swpX(address, &data, type);

    if (res == 0) {
         * On successful emulation, revert the adjustment to the PC
         * made in kernel/traps.c in order to resume execution at the
         * instruction following the SWP{B}.
        regs->ARM_pc += 4;
        regs->uregs[destreg] = data;
    } else if (res == -EFAULT) {
         * Memory errors do not mean emulation failed.
         * Set up signal info to return SEGV, then return OK
        set_segfault(regs, address);

    return 0;

3.1 arm_check_condition

Judgment as to whether the instruction should be executed in the current state.


 * Returns:
 * ARM_OPCODE_CONDTEST_FAIL   - if condition fails
 * ARM_OPCODE_CONDTEST_PASS   - if condition passes (including AL)
 * ARM_OPCODE_CONDTEST_UNCOND - if NV condition, or separate unconditional
 *                              opcode space from v5 onwards
 * Code that tests whether a conditional instruction would pass its condition
 * check should check that return value == ARM_OPCODE_CONDTEST_PASS.
 * Code that tests if a condition means that the instruction would be executed
 * (regardless of conditional or unconditional) should instead check that the
 * return value != ARM_OPCODE_CONDTEST_FAIL.

3.2 emulate_swpX()


static int emulate_swpX(unsigned int address, unsigned int *data,
            unsigned int type)
    unsigned int res = 0;

    // TYPE_For SWPB, an error if the address is unaligned
    if ((type != TYPE_SWPB) && (address & 0x3)) {
        /* SWP to unaligned address not permitted */
        pr_debug("SWP instruction on unaligned pointer!\n");
        return -EFAULT;

    //While mediating with others__user_swp[b]_asm()To call.
    //res-If it is EAGAIN, start over.
    while (1) {
        unsigned long temp;
        unsigned int __ua_flags;

        __ua_flags = uaccess_save_and_enable();
        if (type == TYPE_SWPB)
            __user_swpb_asm(*data, address, res, temp);
            __user_swp_asm(*data, address, res, temp);

        if (likely(res != -EAGAIN) || signal_pending(current))


    //Counter up
    if (res == 0) {
        if (type == TYPE_SWPB)

    return res;

3.3 user_swp[b]_arm()


 * Error-checking SWP macros implemented using ldrex{b}/strex{b}
#define __user_swpX_asm(data, addr, res, temp, B)       \
    __asm__ __volatile__(                   \
    "0: ldrex"B"    %2, [%3]\n"         \
    "1: strex"B"    %0, %1, [%3]\n"         \
    "   cmp     %0, #0\n"           \
    "   moveq       %1, %2\n"           \
    "   movne       %0, %4\n"           \
    "2:\n"                          \
    "   .section     .text.fixup,\"ax\"\n"      \
    "   .align      2\n"                \
    "3: mov     %0, %5\n"           \
    "   b       2b\n"               \
    "   .previous\n"                    \
    "   .section     __ex_table,\"a\"\n"        \
    "   .align      3\n"                \
    "   .long       0b, 3b\n"           \
    "   .long       1b, 3b\n"           \
    "   .previous"                  \
    : "=&r" (res), "+r" (data), "=&r" (temp)        \
    : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT)      \
    : "cc", "memory")

#define __user_swp_asm(data, addr, res, temp) \
    __user_swpX_asm(data, addr, res, temp, "")
#define __user_swpb_asm(data, addr, res, temp) \
    __user_swpX_asm(data, addr, res, temp, "b")

If you rewrite it to make it easier to read I'm just exchanging data between ldrex and strex (I'm not sure about the second half!) -Via EAGAIN, it will be retried until it succeeds in the upper layer emulate_swpX ().


0: ldrex"B"    %temp, [addr]       # [addr]Read the contents of to temp
1: strex"B"    %res, %data, [addr] #The contents of data,[addr]Write to
                                   #res is 0 when updated,If you can't update 1
   cmp         %res, #0            #Determine if res is 0
   moveq       %data, %temp        # true  =>Write the contents of temp to data
   movne       %res, -EAGAIN       # false => res = -EAGAIN
   .section     .text.fixup,"ax"
   .align      2
3: mov         %res, -EFAULT
   b           2b
   .section     __ex_table,"a"
   .align      3
   .long       0b, 3b
   .long       1b, 3b
    : "=&r" (res), "+r" (data), "=&r" (temp)
    : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT)
    : "cc", "memory"

that's all.

Recommended Posts

Also read the contents of arch / arm / kernel / swp_emulate.c
Read all the contents of proc / [pid]
Read the implementation of ARM global timer
Read arch / arm / oprofile / common.c
Simulation of the contents of the wallet
Understand the contents of sklearn's pipeline
Read all the contents of proc / [pid] ~ From setgroups to wchan ~
Read all the contents of proc / [pid] ~ From cwd to loginuid ~
Read all the contents of proc / [pid] ~ From map_files to numa_maps ~
See the contents of Kumantic Segumantion
Read all the contents of proc / [pid] ~ from attr to cpuset ~
I checked the contents of docker volume
I read the implementation of golang channel
[Bash] While read, pass the contents of the file to variables for each column
Get the contents of git diff from python
[Python] Read the source code of Bottle Part 2
I read the implementation of range (Objects / rangeobject.c)
The contents of the Python tutorial (Chapter 2) are itemized.
The contents of the Python tutorial (Chapter 8) are itemized.
The contents of the Python tutorial (Chapter 1) are itemized.
The contents of the Python tutorial (Chapter 10) are itemized.
Read the output of subprocess.Popen in real time
About the development contents of machine learning (Example)
Get the latest Linux kernel version with Arch Linux
The contents of the Python tutorial (Chapter 6) are itemized.
I read and implemented the Variants of UKR
The contents of the Python tutorial (Chapter 3) are itemized.