构建最小虚拟机

前言

ARM 架构上的 KVM(Kernel-based Virtual Machine)是一种基于 Linux 内核的虚拟化技术,它允许用户空间程序通过系统调用来直接控制硬件,从而实现对虚拟机的管理。KVM 在 x86 平台上已经非常成熟,而在 ARM 架构上,KVM 也得到了广泛的支持和发展。

PS: 本文首发在幽兰 Wiki - 构建最小虚拟机

ARM 上的 KVM 特点

硬件支持:ARM 处理器近年来增加了对虚拟化的支持,包括 ARMv8-A 架构中的虚拟化扩展(Virtualization Extensions),这使得在 ARM 上实现 KVM 成为了可能。

  • 性能优势:KVM 提供了接近于裸机的性能,因为它是直接运行在硬件之上的,减少了传统虚拟化解决方案中的额外开销。
  • 兼容性:KVM 支持多种操作系统作为客户机运行,包括但不限于 Linux、Windows 和各种 Unix-like 操作系统。
  • 管理工具:KVM 可以与 QEMU(Quick EMUlator)配合使用,QEMU 提供了用户空间的组件来实现虚拟机的管理和设备模拟等功能。此外,像 libvirt 这样的工具提供了高级的虚拟机管理功能。

ARM KVM 的实现

在 ARM 平台上实现 KVM 虚拟化主要依赖以下几个组件:

  • Linux 内核:ARM 版本的 Linux 内核包含了对 KVM 的支持,允许用户空间应用程序直接访问虚拟化硬件资源。
  • QEMU:一个开源的机器模拟器,可以用来启动虚拟机,并为它们提供模拟的硬件环境。
  • libvirt:一个用于管理虚拟化的软件集合,它可以简化 KVM 的管理和部署。

ARM KVM 的应用场景

ARM KVM 主要应用于以下几个场景:

  • 数据中心:随着 ARM 服务器芯片的发展,越来越多的数据中心开始采用 ARM 架构的服务器,KVM 可以帮助这些服务器实现高效的虚拟化。
  • 嵌入式系统:ARM 芯片广泛应用于嵌入式系统中,KVM 可以提供一种灵活的方式来测试和开发嵌入式系统。
  • 云计算平台:ARM KVM 可以用于构建云计算平台,提供高性能的虚拟化服务。

基于 C 编写一个最小虚拟机

本文将尝试通过 C 语言编写一个最小虚拟机,可以幽兰代码本上运行 ARM Aarch64 指令集的简单裸机程序。

环境搭建

幽兰本已经内置了 GCC 开发编译套件,因此不需要再额外搭建开发环境,我们直接开始编写。

实验代码

  1. 首先我们要包含必要的头文件:
 1// 包含基本 libc 库头文件
 2#include <err.h>
 3#include <stdint.h>
 4#include <stdio.h>
 5#include <string.h>
 6#include <stddef.h>
 7#include <unistd.h>
 8
 9// 包含 mmap 头文件,用于申请客户机内存
10#include <sys/mman.h>
11
12// 包含 IO 相关头文件,用于访问文件
13#include <fcntl.h>
14#include <sys/ioctl.h>
15
16// 包含 KVM 相关头文件,用于配置虚拟机
17#include <linux/kvm.h>
  1. 然后进行 KVM 相关的初始化:
 1int main(void)
 2{
 3    ...
 4
 5	/* 打开 kvm 文件,获取 fd */
 6    int kvmfd = open("/dev/kvm", O_RDWR);
 7    if (kvmfd == -1)
 8        err(1, "/dev/kvm");
 9    else
10        printf("[%d] Open kvm succesfuly, fd is %d\n", ++step, kvmfd);
11
12    /* 确保 KVM API 版本是 12 */
13    ret = ioctl(kvmfd, KVM_GET_API_VERSION, NULL);
14    if (ret < 0)
15        err(1, "KVM_GET_API_VERSION");
16    if (ret != 12)
17        errx(1, "KVM_GET_API_VERSION %d, expected 12", ret);
18    else
19        printf("[%d] Get kvm version %d\n", ++step, ret);
20
21    /* 1. 获取 VM 的 id */
22    int vmfd = ioctl(kvmfd, KVM_CREATE_VM, (unsigned long)0);
23    if (vmfd < 0)
24        err(1, "KVM_CREATE_VM");
25    else
26        printf("[%d] Create VM succesfuly, fd is %d\n", ++step, vmfd);
27	...
28}
  1. 然后就是配置 VM 即对应的 vCPU:
 1int main()
 2{
 3	...
 4	/* 2. 创建 vCPU */
 5    int vcpufd = ioctl(vmfd, KVM_CREATE_VCPU, (unsigned long)0);
 6    if (vcpufd < 0)
 7        err(1, "KVM_CREATE_VCPU");
 8    else
 9        printf("[%d] Create vCPU succesfuly, fd is %d\n", ++step, vcpufd);
10
11    /* 3. 设置 vCPU 的类型,这里是 ARMv8 */
12    // sample code can check the qemu/target/arm/kvm64.c
13    memset(&init, 0, sizeof(init));
14    init.target = KVM_ARM_TARGET_GENERIC_V8;
15    ret = ioctl(vcpufd, KVM_ARM_VCPU_INIT, &init);
16    if (ret < 0)
17        err(1, "init vcpu type failed\n");
18    else
19        printf("[%d] Set vCPU type is Aarch64 (ARMv8)\n", ++step);
20
21    /* 4. 为 kvm_run 分配内存 */
22    mmap_size = ioctl(kvmfd, KVM_GET_VCPU_MMAP_SIZE, NULL);
23    if (mmap_size < 0)
24        err(1, "KVM_GET_VCPU_MMAP_SIZE");
25    if (mmap_size < sizeof(*run))
26        errx(1, "KVM_GET_VCPU_MMAP_SIZE unexpectedly small");
27    run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpufd, 0);
28    if (run == MAP_FAILED)
29        err(1, "mmap vcpu");
30    else
31        printf("[%d] Init kvm_run successfuly!\n", ++step);
32}
  1. 编写一段简单的客户机程序,并拷贝到客户机内存:
 1const unsigned code[] = {
 2    // write "Hello" to port 0x996
 3    0xd28132c4, // mov    x4, #0x996                     // #2454
 4    0xd2800905, // mov    x5, #0x48                      // H
 5    0x39000085, // strb    w5, [x4]
 6    0xd2800ca5, // mov    x5, #0x65                      // e
 7    0x39000085, // strb    w5, [x4]
 8    0xd2800d85, // mov    x5, #0x6c                      // ll
 9    0x39000085, // strb    w5, [x4]
10    0x39000085, // strb    w5, [x4]
11    0xd2800de5, // mov    x5, #0x6f                      // o
12    0x39000085, // strb    w5, [x4]
13    0xd2800145, // mov    x5, #0xa                       // \n
14    0x39000085, // strb    w5, [x4]
15};
16#define MEM_SIZE  0x1000
17...
18int main()
19{
20	...
21    /* 5. 将程序拷贝到客户机内存 */
22    ram = mmap(NULL, MEM_SIZE, PROT_READ | PROT_WRITE, 
23                MAP_SHARED | MAP_ANONYMOUS, -1, 0);
24    if (!ram)
25        err(1, "allocating guest memory");
26    memcpy(ram, code, sizeof(code));
27    printf("[%d] Load the vm running program to buffer 'ram'\n", ++step);
28	...
29}
  1. 初始化 userspace_memory_region 并设置 vCPU 寄存器
 1int main()
 2{
 3  ...
 4
 5    /* 6. 设置 the vm userspace memory region,并绑定 vmfd */
 6    struct kvm_userspace_memory_region region = {
 7        .slot = 0,
 8        .flags = 0,
 9        .memory_size = MEM_SIZE,
10        .guest_phys_addr = PHY_ADDR,
11        .userspace_addr = (unsigned long)ram,
12    };
13    ret = ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &region);
14    if (ret < 0)
15        err(1, "KVM_SET_USER_MEMORY_REGION");
16    else
17        printf("[%d] Set the vm userspace program ram to vm fd handler\n", ++step);
18
19    /* 7. 设置 vCPU 的 PC 寄存器,指向客户机第一条指令的内存地址 */
20    reg.id = ARM64_CORE_REG(regs.pc);
21    reg.addr = (__u64)&guest_entry;
22    ret = ioctl(vcpufd, KVM_SET_ONE_REG, &reg);
23    if (ret < 0)
24        err(1,"KVM_SET_ONE_REG failed (pc)");
25    else
26        printf("[%d] Set vCPU PC, is 0x%x\n", ++step, (unsigned)guest_entry);
27  ...
28}
  1. 处理 VM 运行时的逻辑,增加 IO 模拟:
 1#define PHY_ADDR  0x10000
 2int main()
 3{
 4	...
 5    /* 8. VM 运行时处理 */
 6    printf("[%d] Run vCPU and print message:\n", ++step);
 7
 8    while (1) {
 9        ret = ioctl(vcpufd, KVM_RUN, NULL);
10        if (ret < 0)
11            err(1, "KVM_RUN");
12
13        switch (run->exit_reason) {
14            case KVM_EXIT_MMIO:
15                if (run->mmio.is_write && run->mmio.len == 1) {
16                    printf("%c", run->mmio.data[0]);
17                }
18                if (run->mmio.data[0] == '\n')
19                    return 0;
20                else
21                    break;
22            case KVM_EXIT_FAIL_ENTRY:
23                errx(1, "KVM_EXIT_FAIL_ENTRY: hardware_entry_failure_reason = 0x%llx",
24                    (unsigned long long)run->fail_entry.hardware_entry_failure_reason);
25            case KVM_EXIT_INTERNAL_ERROR:
26                errx(1, "KVM_EXIT_INTERNAL_ERROR: suberror = 0x%x", run->internal.suberror);
27            default:
28                errx(1, "exit_reason = 0x%x", run->exit_reason);
29        }
30    }
31
32    return 0;
33}

运行结果如下:

 1geduer@ulan:~/gevico/kvm$ gcc main.c && ./a.out 
 2[1] Open kvm succesfuly, fd is 3
 3[2] Get kvm version 12
 4[3] Create VM succesfuly, fd is 4
 5[4] Create vCPU succesfuly, fd is 5
 6[5] Set vCPU type is Aarch64 (ARMv8)
 7[6] Init kvm_run successfuly!
 8[7] Load the vm running program to buffer 'ram'
 9[8] Set the vm userspace program ram to vm fd handler
10[9] Set vCPU PC, is 0x10000
11[10] Run vCPU and print message:
12Hello

完整代码

完整代码如下:

  1#include <err.h>
  2#include <stdint.h>
  3#include <stdio.h>
  4#include <string.h>
  5#include <stddef.h>
  6#include <unistd.h>
  7#include <sys/mman.h>
  8#include <fcntl.h>
  9#include <sys/ioctl.h>
 10#include <linux/kvm.h>
 11
 12#define MEM_SIZE  0x1000
 13#define PHY_ADDR  0x10000
 14
 15static __u64 __core_reg_id(__u64 offset)
 16{
 17    __u64 id = KVM_REG_ARM64 | KVM_REG_ARM_CORE | offset;
 18
 19    if (offset < KVM_REG_ARM_CORE_REG(fp_regs))
 20        id |= KVM_REG_SIZE_U64;
 21    else if (offset < KVM_REG_ARM_CORE_REG(fp_regs.fpsr))
 22        id |= KVM_REG_SIZE_U128;
 23    else
 24        id |= KVM_REG_SIZE_U32;
 25
 26    return id;
 27}
 28
 29#define ARM64_CORE_REG(x) __core_reg_id(KVM_REG_ARM_CORE_REG(x))
 30
 31const unsigned code[] = {
 32    // write "Hello" to port 0x996
 33    0xd28132c4, // mov    x4, #0x996                     // #2454
 34    0xd2800905, // mov    x5, #0x48                      // H
 35    0x39000085, // strb    w5, [x4]
 36    0xd2800ca5, // mov    x5, #0x65                      // e
 37    0x39000085, // strb    w5, [x4]
 38    0xd2800d85, // mov    x5, #0x6c                      // ll
 39    0x39000085, // strb    w5, [x4]
 40    0x39000085, // strb    w5, [x4]
 41    0xd2800de5, // mov    x5, #0x6f                      // o
 42    0x39000085, // strb    w5, [x4]
 43    0xd2800145, // mov    x5, #0xa                       // \n
 44    0x39000085, // strb    w5, [x4]
 45};
 46
 47int main(void)
 48{
 49    /* Initialize registers: instruction pointer for our code, addends, and
 50     * initial flags required by aarch64 architecture. */
 51    struct kvm_one_reg reg;
 52    struct kvm_vcpu_init init; //using init the vcpu type
 53    struct kvm_vcpu_init preferred;
 54    __u64 guest_entry = PHY_ADDR;
 55    int ret;
 56    int step = 0;
 57
 58    uint8_t *ram;
 59    size_t mmap_size;
 60    struct kvm_run *run;
 61
 62    int kvmfd = open("/dev/kvm", O_RDWR);
 63    if (kvmfd == -1)
 64        err(1, "/dev/kvm");
 65    else
 66        printf("[%d] Open kvm succesfuly, fd is %d\n", ++step, kvmfd);
 67
 68    /* Make sure we have the stable version of the API */
 69    ret = ioctl(kvmfd, KVM_GET_API_VERSION, NULL);
 70    if (ret < 0)
 71        err(1, "KVM_GET_API_VERSION");
 72    if (ret != 12)
 73        errx(1, "KVM_GET_API_VERSION %d, expected 12", ret);
 74    else
 75        printf("[%d] Get kvm version %d\n", ++step, ret);
 76
 77    /* 1. create vm and get the vm fd handler */
 78    int vmfd = ioctl(kvmfd, KVM_CREATE_VM, (unsigned long)0);
 79    if (vmfd < 0)
 80        err(1, "KVM_CREATE_VM");
 81    else
 82        printf("[%d] Create VM succesfuly, fd is %d\n", ++step, vmfd);
 83
 84    /* 2. create vcpu */
 85    int vcpufd = ioctl(vmfd, KVM_CREATE_VCPU, (unsigned long)0);
 86    if (vcpufd < 0)
 87        err(1, "KVM_CREATE_VCPU");
 88    else
 89        printf("[%d] Create vCPU succesfuly, fd is %d\n", ++step, vcpufd);
 90
 91    /* 3. arm64 type vcpu type init */
 92    // sample code can check the qemu/target/arm/kvm64.c
 93    memset(&init, 0, sizeof(init));
 94    init.target = KVM_ARM_TARGET_GENERIC_V8;
 95    ret = ioctl(vcpufd, KVM_ARM_VCPU_INIT, &init);
 96    if (ret < 0)
 97        err(1, "init vcpu type failed\n");
 98    else
 99        printf("[%d] Set vCPU type is Aarch64 (ARMv8)\n", ++step);
100
101    /* 4. Map the shared kvm_run structure and following data. */
102    mmap_size = ioctl(kvmfd, KVM_GET_VCPU_MMAP_SIZE, NULL);
103    if (mmap_size < 0)
104        err(1, "KVM_GET_VCPU_MMAP_SIZE");
105    if (mmap_size < sizeof(*run))
106        errx(1, "KVM_GET_VCPU_MMAP_SIZE unexpectedly small");
107    run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpufd, 0);
108    if (run == MAP_FAILED)
109        err(1, "mmap vcpu");
110    else
111        printf("[%d] Init kvm_run successfuly!\n", ++step);
112
113    /* 5. load the vm running program to buffer 'ram' */
114    ram = mmap(NULL, MEM_SIZE, PROT_READ | PROT_WRITE, 
115                MAP_SHARED | MAP_ANONYMOUS, -1, 0);
116    if (!ram)
117        err(1, "allocating guest memory");
118    memcpy(ram, code, sizeof(code));
119    printf("[%d] Load the vm running program to buffer 'ram'\n", ++step);
120
121    /* 6. Set the vm userspace program ram to vm fd handler */
122    struct kvm_userspace_memory_region region = {
123        .slot = 0,
124        .flags = 0,
125        .memory_size = MEM_SIZE,
126        .guest_phys_addr = PHY_ADDR,
127        .userspace_addr = (unsigned long)ram,
128    };
129    ret = ioctl(vmfd, KVM_SET_USER_MEMORY_REGION, &region);
130    if (ret < 0)
131        err(1, "KVM_SET_USER_MEMORY_REGION");
132    else
133        printf("[%d] Set the vm userspace program ram to vm fd handler\n", ++step);
134
135    /* 7. Set PC */
136    reg.id = ARM64_CORE_REG(regs.pc);
137    reg.addr = (__u64)&guest_entry;
138    ret = ioctl(vcpufd, KVM_SET_ONE_REG, &reg);
139    if (ret < 0)
140        err(1,"KVM_SET_ONE_REG failed (pc)");
141    else
142        printf("[%d] Set vCPU PC, is 0x%x\n", ++step, (unsigned)guest_entry);
143
144    /* 8. Repeatedly run code and handle VM exits. */
145    printf("[%d] Run vCPU and print message:\n", ++step);
146
147    while (1) {
148        ret = ioctl(vcpufd, KVM_RUN, NULL);
149        if (ret < 0)
150            err(1, "KVM_RUN");
151
152        switch (run->exit_reason) {
153            case KVM_EXIT_MMIO:
154                if (run->mmio.is_write && run->mmio.len == 1) {
155                    printf("%c", run->mmio.data[0]);
156                }
157                if (run->mmio.data[0] == '\n')
158                    return 0;
159                else
160                    break;
161            case KVM_EXIT_FAIL_ENTRY:
162                errx(1, "KVM_EXIT_FAIL_ENTRY: hardware_entry_failure_reason = 0x%llx",
163                    (unsigned long long)run->fail_entry.hardware_entry_failure_reason);
164            case KVM_EXIT_INTERNAL_ERROR:
165                errx(1, "KVM_EXIT_INTERNAL_ERROR: suberror = 0x%x", run->internal.suberror);
166            default:
167                errx(1, "exit_reason = 0x%x", run->exit_reason);
168        }
169    }
170
171    return 0;
172}

参考资料:

[1] KVM API 手册

[2] KVM 示例教程

[3] ARM 简易 KVM 虚拟机