Kernel / VM Advent Calendar 2013 Day 3: Let's try the lock function called lockref of the Linux kernel

This diary is the third day article of Kernel / VM Advent Calendar 2013. This time, I would like to take a look at the lock mechanism called lockref, which was introduced in Linux 3.12. This is a function introduced in LWN's Introducing lock refs article.

According to this article, "In many cases, reference counters are defined as atomic_t types and you can manipulate variables without taking a lock, but with atomic_t, reference counters can be updated independently of other data structures in the structure. It works in some cases. If not, you need to lock the entire structure (super-translation). " And, in the article, using the dentry structure as an example, it is explained that spinlock will be a bottleneck, and the solution to that is lockref.

So if you look at include / linux / lockref.h, you will find the definition of the lockref structure.

 19 struct lockref {
 20         union {
 21 #ifdef CONFIG_CMPXCHG_LOCKREF
 22                 aligned_u64 lock_count;
 23 #endif
 24                 struct {
 25                         spinlock_t lock;
 26                         unsigned int count;
 27                 };
 28         };
 29 };

I first learned about aligned_u64 type φ (..) You can use this as a reference counter. The interface when using this data structure had the following 6 functions.

 31 extern void lockref_get(struct lockref *);
 32 extern int lockref_get_not_zero(struct lockref *);
 33 extern int lockref_get_or_lock(struct lockref *);
 34 extern int lockref_put_or_lock(struct lockref *);
 35 
 36 extern void lockref_mark_dead(struct lockref *);
 37 extern int lockref_get_not_dead(struct lockref *);

Looking at lib / lockref.c, comments were written for each function, so it is okay to use them properly by looking at the comments. think.

Let's use this code for a moment. It is a test code that increments the variable for counting with 3 patterns of lockref, spinlock, and atomic_t, which is decided at compile time.

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/time.h>
#include <linux/debugfs.h>
#include <linux/string.h>

MODULE_DESCRIPTION("lock test module");
MODULE_AUTHOR("masami256");
MODULE_LICENSE("GPL");

struct testfunc_ops {
	void(*inc_count)(void);
	unsigned int (*get_count)(void);
};

struct locktest_data;

struct locktest_data testdata;

static s64 total_nanosec;

#define LOCKTEST_USE_LOCKREF 
//#define LOCKTEST_USE_ATOMIC_INC

#ifdef LOCKTEST_USE_LOCKREF
#define LOCKTEST_LOCK_NAME "lockref"
#include <linux/lockref.h>
struct locktest_data {
	struct lockref ld_lockref;
};

static void 
locktest_lockref_inc_count(void)
{
	lockref_get(&testdata.ld_lockref);
}

static unsigned int
locktest_lockref_get_count(void)
{
	return testdata.ld_lockref.count;
}

struct testfunc_ops testfunc = {
	.inc_count = locktest_lockref_inc_count,
	.get_count = locktest_lockref_get_count,
};

#elif defined(LOCKTEST_USE_ATOMIC_INC)
#include <linux/spinlock.h>
#define LOCKTEST_LOCK_NAME "atomic_t"
struct locktest_data {
	atomic_t count;
};

static void 
locktest_atomic_inc_count(void)
{
	atomic_inc(&testdata.count);
}

static unsigned int
locktest_atomic_get_count(void)
{
	return (unsigned int) atomic_read(&testdata.count);
}

struct testfunc_ops testfunc = {
	.inc_count = locktest_atomic_inc_count,
	.get_count = locktest_atomic_get_count,
};

#else // use spinlock
#define LOCKTEST_LOCK_NAME "spinlock"
#include <linux/spinlock.h>
struct locktest_data {
	unsigned int count;
	spinlock_t ld_lock;
};

static void
locktest_spinlock_inc_count(void)
{
	spin_lock(&testdata.ld_lock);
	testdata.count++;
	spin_unlock(&testdata.ld_lock);
}

static unsigned int
locktest_spinlock_get_count(void)
{
	return testdata.count;
}

struct testfunc_ops testfunc = {
	.inc_count = locktest_spinlock_inc_count,
	.get_count = locktest_spinlock_get_count,
};

#endif // LOCKTEST_USE_LOCKREF

#define LOOP_COUNT 1000000
static unsigned int expected_count;

static void
locktest_run_test(void *info)
{
	int i = 0;
	for (i = 0; i < LOOP_COUNT; i++)
		testfunc.inc_count();
}

static s64
locktest_get_current_time_as_ns(void)
{
	struct timeval v;
	do_gettimeofday(&v);

	return timeval_to_ns(&v);
}

static void
save_test_total_time(s64 start, s64 end)
{
	total_nanosec = end - start;
}

static int num_cpus;

static void
locktest_start_test(void)
{
	s64 start, end;

	start = locktest_get_current_time_as_ns();
	on_each_cpu(&locktest_run_test, NULL, 1);
	end = locktest_get_current_time_as_ns();

	save_test_total_time(start, end);
	printk(KERN_INFO "Test took %lld nanoseconds\n", total_nanosec);

	WARN_ON(unlikely(expected_count != testfunc.get_count()));
}

// debugfs operations.
static struct dentry *locktest_dir;
static struct dentry *locktest_tc;
static struct dentry *locktest_result;
static char locktest_data[8];
static char locktest_result_data[64];

static ssize_t
locktest_read_result(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
	memset(locktest_result_data, 0, sizeof(locktest_result_data));
	sprintf(locktest_result_data, "%lld", total_nanosec);

	return simple_read_from_buffer(buf, len, ppos, locktest_result_data, sizeof(locktest_result_data));
}

static ssize_t
locktest_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
	return simple_read_from_buffer(buf, len, ppos, locktest_data, sizeof(locktest_data));
}

static ssize_t
locktest_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
	ssize_t s = simple_write_to_buffer(locktest_data, sizeof(locktest_data), ppos, buf, len);

	if (!strncmp(locktest_data, "run", 3))
		locktest_start_test();
	else if(!strncmp(locktest_data, "reset", 5))
		memset(&testdata, 0, sizeof(testdata));

	memset(locktest_data, 0x0, sizeof(locktest_data));
	return s;
}

static int
locktest_check_debugfs_func_result(const struct dentry *entry)
{
	if (!entry)
		return -1;

	if (ERR_PTR(-ENODEV) == entry)
		return -ENODEV;

	return 0;
}

static void
locktest_remove_debugfs_dir(void)
{
	if (locktest_dir)
		debugfs_remove_recursive(locktest_dir);
}

static int 
locktest_create_debugfs_directory(void)
{
	locktest_dir = debugfs_create_dir("locktest", NULL);
	return locktest_check_debugfs_func_result(locktest_dir);
}

struct file_operations locktest_fops = {
	.owner = THIS_MODULE,
	.read = locktest_read,
	.write = locktest_write,
};

struct file_operations locktest_result_fops = {
	.owner = THIS_MODULE,
	.read = locktest_read_result,
};

static int
locktest_create_file(void)
{
	int ret = 0;

	locktest_tc= debugfs_create_file("testcase", 0644, locktest_dir, &locktest_data, &locktest_fops);
	ret = locktest_check_debugfs_func_result(locktest_tc);
	if (ret)
		return ret;

	locktest_result = debugfs_create_file("test_result", 0644, locktest_dir, &locktest_result, &locktest_result_fops);
	ret = locktest_check_debugfs_func_result(locktest_tc);
	if (ret)
		return ret;

	return 0;
}

static int
locktest_init(void)
{
	int ret = 0;
	ret = locktest_create_debugfs_directory();
	if (ret)
		goto error_out;

	ret = locktest_create_file();
	if (ret)
		goto error_out;

	num_cpus = num_online_cpus();
	expected_count = num_cpus * LOOP_COUNT;

	printk(KERN_INFO "Use %s mechanism\n", LOCKTEST_LOCK_NAME);
	printk(KERN_INFO "cpus: %d\n", num_cpus);
	printk(KERN_INFO "module loaded\n");
	return 0;

error_out:
	locktest_remove_debugfs_dir();
	return ret;
}

static void
locktest_cleanup(void)
{
	locktest_remove_debugfs_dir();
	printk(KERN_INFO "module unloaded\n");
}

module_init(locktest_init);
module_exit(locktest_cleanup);

The content of the test is that the time (nanosecond) required to execute locktest_run_test () is measured by incrementing the counter in the global variable struct locktest_data testdata while looping 1000000 lower in each cpu. Make a set and execute it about 100 times with the following script.

#!/bin/bash

modfile=/home/masami/codes/locktest/locktest.ko
if [ ! -f $modfile ]; then
    echo $modfile is not found.
    exit -1
fi

insmod $modfile

debugfs_path=/sys/kernel/debug/locktest
for i in {0..100}
do
    echo run > $debugfs_path/testcase
    result=`cat $debugfs_path/test_result`
    echo $i,$result
    echo reset > $debugfs_path/testcase
done

rmmod locktest

exit 0

The environment I tried was x86_64, Arch Linux, and the kernel was 3.13-rc2. The CPU is i7 3770S (the number of cpu that can be seen from the kernel is 8), and the result is as ↓. result.png

Well, in the case of lockref, there is a lot of variation in speed. Spinlock is a stable time. atomic_inc () is still faster because it doesn't take a lock. I'm wondering if this test case isn't the use case that lockref expects.

Recommended Posts

Kernel / VM Advent Calendar 2013 Day 3: Let's try the lock function called lockref of the Linux kernel
Try the Linux kernel lockdown mechanism
A quick overview of the Linux kernel
Let's try Linux for the first time
Looking back on the transition of the Qiita Advent calendar