红帽, OpenShift 4.5 - an insider's view for technology partners support GPU sharing
阿里解决方案:
代码地址:
wget http://cgpu.oss-cn-hangzhou.aliyuncs.com/cgpu-0.8.tar.gz tar -xvf cgpu-0.8.tar.gz cd cgpu ls # cgpu-container-wrapper cgpu-km.c cgpu.o cgpu-procfs.c install.sh Makefile os-interface.c README uninstall.sh upgrade.sh version.h
github
git clone https://github.com/lvmxh/cgpu.git cd cgpu
由于nvidia的驱动并不开源, 所以阿里应该进行不了相关源码级别的改造。
我们可以看到,cgpu.o 调用了 filp_open, 猜测阿里的做法,应该是打开了用户态的nv显卡, 截获了对应nv显卡的所有的操作。
但是我们可以导出cgpu的符号表可以大概了解一下,是啥东西。
objdump -t cgpu.o
1 objdump -t cgpu.o 2 3 cgpu.o: file format elf64-x86-64 4 5 SYMBOL TABLE: 6 0000000000000000 l d .text 0000000000000000 .text 7 0000000000000000 l d .text.unlikely 0000000000000000 .text.unlikely 8 0000000000000000 l d .rodata 0000000000000000 .rodata 9 0000000000000000 l d .rodata.str1.1 0000000000000000 .rodata.str1.1 10 0000000000000000 l d .rodata.str1.8 0000000000000000 .rodata.str1.8 11 0000000000000000 l d .data 0000000000000000 .data 12 0000000000000000 l d .bss 0000000000000000 .bss 13 0000000000000248 l .rodata.str1.8 0000000000000000 .LC43 14 0000000000000000 l F .text.unlikely 00000000000000e7 mbedtls_mpi_shift_r 15 0000000000000000 l d __mcount_loc 0000000000000000 __mcount_loc 16 0000000000000000 l d .comment 0000000000000000 .comment 17 0000000000000000 l d .note.GNU-stack 0000000000000000 .note.GNU-stack 18 00000000000045c0 g F .text 000000000000025c cgpu_finalize 19 0000000000002920 g F .text 0000000000000052 cgpu_clear_set_pfn 20 0000000000000000 *UND* 0000000000000000 os_kfree 21 0000000000000000 *UND* 0000000000000000 os_spin_lock 22 0000000000000000 *UND* 0000000000000000 os_writel 23 0000000000000000 *UND* 0000000000000000 os_vma_ops 24 0000000000000000 *UND* 0000000000000000 os_kmalloc 25 0000000000000000 *UND* 0000000000000000 os_wait_event_interruptible_timeout 26 0000000000003af0 g F .text 0000000000000105 cgpu_km_unlocked_ioctl 27 0000000000000000 *UND* 0000000000000000 os_ioremap_nocache 28 0000000000000000 *UND* 0000000000000000 os_iounmap 29 0000000000003c00 g F .text 00000000000000cf cgpu_km_compat_ioctl 30 0000000000000000 *UND* 0000000000000000 os_in_vma_range 31 0000000000000000 *UND* 0000000000000000 os_get_tgid 32 0000000000000000 *UND* 0000000000000000 os_memcpy 33 0000000000000000 *UND* 0000000000000000 os_unmap_range 34 0000000000000000 *UND* 0000000000000000 os_get_page_shift 35 0000000000004d20 g F .text 0000000000000037 group_get_policy 36 0000000000000000 *UND* 0000000000000000 os_get_fops 37 0000000000000000 *UND* 0000000000000000 __fentry__ 38 0000000000004e00 g F .text 0000000000000086 inst_get_meminfo 39 0000000000000000 *UND* 0000000000000000 os_kmalloc_array 40 0000000000000000 *UND* 0000000000000000 os_kthread_run 41 0000000000000000 *UND* 0000000000000000 os_spin_unlock 42 0000000000000000 *UND* 0000000000000000 os_memset 43 0000000000000000 *UND* 0000000000000000 os_ioremap_cache 44 0000000000000000 *UND* 0000000000000000 os_filp_open 45 0000000000000000 *UND* 0000000000000000 __stack_chk_fail 46 0000000000004e90 g F .text 0000000000001858 cgpu_ioctl 47 0000000000000000 *UND* 0000000000000000 os_kthread_stop 48 0000000000000000 *UND* 0000000000000000 put_spin_lock 49 0000000000000000 *UND* 0000000000000000 os_get_system_info 50 0000000000000000 *UND* 0000000000000000 os_cdev_put 51 0000000000004d60 g F .text 0000000000000057 group_set_max_inst 52 0000000000000000 *UND* 0000000000000000 os_put_waitqueue_head 53 0000000000000000 *UND* 0000000000000000 os_virt_to_phys 54 00000000000028e0 g F .text 000000000000003c check_permit 55 0000000000004840 g F .text 000000000000001b inst_get_total_mem 56 0000000000000000 *UND* 0000000000000000 os_spin_lock_init 57 0000000000000000 *UND* 0000000000000000 os_pr_debug 58 0000000000004b70 g F .text 0000000000000126 inst_get_node 59 0000000000003cd0 g F .text 0000000000000047 cgpu_km_poll 60 0000000000003840 g F .text 000000000000007f inst_vma_fault 61 0000000000000000 *UND* 0000000000000000 os_get_vm_file 62 0000000000000000 *UND* 0000000000000000 os_vm_insert_pfn 63 0000000000000000 *UND* 0000000000000000 os_get_zeroed_page 64 0000000000000000 *UND* 0000000000000000 os_get_inode 65 0000000000000000 *UND* 0000000000000000 os_copy_to_user 66 0000000000000000 *UND* 0000000000000000 os_file_inode 67 0000000000004cf0 g F .text 000000000000002f group_set_policy 68 0000000000000000 *UND* 0000000000000000 os_cdev_get 69 0000000000000000 *UND* 0000000000000000 get_rdev 70 0000000000000000 *UND* 0000000000000000 os_set_pgoff 71 0000000000000000 *UND* 0000000000000000 cgpu_km_vma_fault 72 0000000000000000 *UND* 0000000000000000 os_filp_close 73 00000000000038c0 g F .text 0000000000000225 cgpu_km_mmap 74 0000000000000000 *UND* 0000000000000000 os_follow_pfn 75 0000000000000000 *UND* 0000000000000000 os_free_page 76 0000000000000000 *UND* 0000000000000000 os_copy_from_user 77 0000000000004a20 g F .text 0000000000000145 inst_set_weight 78 0000000000004dc0 g F .text 0000000000000037 group_get_max_inst 79 00000000000049f0 g F .text 0000000000000024 inst_get_weight 80 0000000000004860 g F .text 0000000000000148 inst_set_total_mem 81 0000000000004380 g F .text 000000000000023e cgpu_initialize 82 0000000000004ca0 g F .text 000000000000004b inst_set_name 83 0000000000000000 *UND* 0000000000000000 os_file_op 84 0000000000002840 g F .text 0000000000000097 get_system_info 85 0000000000002690 g F .text 00000000000001a3 inst_get_bios 86 00000000000049b0 g F .text 0000000000000037 inst_get_free_weight 87 00000000000040d0 g F .text 00000000000002a2 cgpu_km_close 88 0000000000000000 *UND* 0000000000000000 os_kthread_should_stop 89 0000000000000000 *UND* 0000000000000000 os_ireadcount_inc 90 0000000000000000 *UND* 0000000000000000 os_get_minor 91 0000000000000000 *UND* 0000000000000000 os_printf 92 0000000000004820 g F .text 0000000000000019 inst_get_minor 93 0000000000000000 *UND* 0000000000000000 os_set_filp 94 0000000000000000 *UND* 0000000000000000 get_spin_lock 95 0000000000000000 *UND* 0000000000000000 os_init_waitqueue_head 96 0000000000003d20 g F .text 00000000000003a7 cgpu_km_open 97 0000000000000000 *UND* 0000000000000000 os_minor 98 0000000000000000 *UND* 0000000000000000 os_alloc_file_operations 99 0000000000000000 *UND* 0000000000000000 os_get_device_id 100 0000000000002640 g F .text 000000000000004d _strstr