20150430 调试分析之 根据内核报错信息PC指针分析错误
2015-04-30 Lover雪儿
大家写驱动的时候不知道有没有发现,当我们驱动写错了,发生内核奔溃时,会打印一大堆的报错信息,
如果再返回我们的程序中一行一行代码的检查,既耗费时间,并且有些逻辑上的错误,我们是很难看的出来的,
那我们能不能再这一大堆的报错信息中发现问题的所在呢?
此处我们来模拟一个错误,还是沿用上一篇文章中的驱动代码err_led.c的驱动程序中的代码修改错误,当然大家用其他的驱动代码做测试也可以.
博客地址: http://www.cnblogs.com/lihaiyan/p/4297923.html
1 40 static int key_open(struct inode *inode, struct file *file) 2 41 { 3 42 printk("<0>function open!\n\n"); 4 43 5 44 base_iomux = 0x43FAC000; 6 45 MUX_CTL &= ~(0x07 << 0); 7 46 MUX_CTL |= (0X05 << 0); //设置为ALT5 GPIO3_23 ERR_LED 8 47 9 48 //MUX_CTL 10 49 return 0; 11 50 }
直接让base_iomux = 实际的物理地址,肯定会报错的.
加载编译,如我们所愿,成功的打印出了内核奔溃信息:
接下来我们来分析分析是否可以从这些信息中寻得蛛丝马迹呢?让我们一起见证奇迹的时刻.
1 root@EasyARM-iMX257 ~# echo 1 > /dev/err_led_dev 2 function open! 3 4 Unable to handle kernel paging request at virtual address 43fac060 5 //无法访问虚拟地址43fac060,因为驱动访问的都是虚拟地址,而恰恰43fac060这个地址是没有映射的,以无法访问 6 7 pgd = c3b8c000 8 [43fac060] *pgd=00000000 9 Internal error: Oops: 5 [#2] PREEMPT 10 Modules linked in: err_led mymsg gpio [ Tainted: G D (2.6.31-207-g7286c01 #692) 11 //发生错误时,系统加载的模块有err_led mymsg gpio 这几个 12 13 PC is at key_open+0x18/0x54 [err_led] 14 //PC就是发生错误的指令地址,发生错误的函数为 key_open,偏移0x18,其实这里已经很明显了 15 16 LR is at key_open+0x10/0x54 [err_led] 17 //LR寄存器的值 18 19 pc : [<bf010128>] lr : [<bf010120>] psr: 60000013 20 //发生错误时pc指针的值: bf010128 21 22 sp : c32c3e70 ip : c046708f fp : 00095ab0 23 r10: c3b9aae0 r9 : c320 r4 : 00000001 24 r3 : 00000000 r2 : 00000000 r1 : 43facfff r0 : 43fac000 25 //执行这条错误导致错误时各个寄存器的值 26 27 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user 28 Control: 0005317f Table: 83b8c000 DAC: 000000 limit = 0xc32c2270) 29 30 //发生错误时,堆栈的信息 31 32 Stack: (0xc32c2000 c00bb9d0 0000000b c3 c31790c8 00000000 c00bb7fc c380f0a0 c31b6398 c00b66b4 33 3ea0: c32c3ef8 c3b9aae0 c3885660 c32c3e0b76d4 34 3ec0: 00000000 c3885660 c32c3ef0 00000000 c32c3ef0 c00c4288 00000000 000001b6 35 3ee0: 00020241 00000000 00000000 00000000000b 36 3f00: c3b91005 c380f2200000 00005402 00000036 37 3f20: 00000000 c00c5698 c31498a0 fffffff7 be880704 c00c5d34 c32c3f84 00 c3879d60 00000003 c380f0a0 c31b6398 00000000 00020241 38 3f60: 000001b6 ffffff9c 00000000 c0029f24 c3b91000 00000003 00095ab0 c00b6444 39 3f80: 00000022 00000000 000001b6 000932ac 00000001 00000005 c0029f24 c32c2000 40 3fa0: 40138000 c0029da0 000001b6 000932000000 41 3fc0: 000001b6 000932ac 00000001 00000005 00000000 000933f8 40138000 00095ab0 42 3fe0: 000903ac be8803d8 00035c28 400d110x18/0x54 [err_led]) from [<c00bb9d0>] (chrdev_open+0x1d4/0x1f4) 43 [<c00bb9d0>] (chrdev_open+0x1d4/0x1f4) from [<c00b66b4>] (__dentry_open+0x18c/0x2ac) 44 [<c00b66b4>] (__dentry_open+0x18c/0x2ac) from [<c00b76d4>] (nameidata_to_filp+0x44/0x5c) 45 [<c00b76d4>] (nameidata_to_filp+0x44/0x5c) from [<c00c4288>] (do_filp_open+0x3e4/0x7e8) 46 [<c00c4288>] (do_filp_open+0x3e4/0x7e8) from [<c00b6444>] (do_sys_open+0x5c/0x114) 47 [114) from [<c0029da0>] (ret_fast_syscall+0x0/0x2c) 48 Code: e24dd004 eb40e05c e59f1030 e59f0030 (e5113f9f) 49 ---[ end trace c4bb5578ca399f8a ]--- 50 process '/sbin/getty -L ttymxc0 115200 vt100' (pid 1832) exited. Scheduling for restart. 51 starting pid 1833, tty '': '/sbin/getty -L ttymxc0 115200 vt100' 52 CC) 4.1.2 53 root filesystem built on Tue, 13 Aug 2013 02:31:56 -0700 54 Freescale Semiconductor, Inc. 55 56 57 //如果内核的配置选项选择了kernel 的FRAME_POINTER 的话 58 -->kernel hacking 59 - - → - - > kernel debuging (DEBUG_KERNEL) 60 //这儿会打印粗回溯信息,即一系列的内核指针调用信息. 61 Backtrace:
有的时候pc值只会给出列出一个地址,不会说是在那个函数里面
1.根据上面的PC值,找到出错误的地址: System.map
先判断是否属于内核的地址: 看源代码目录下的linux-2.6.31/System.map
1 root@Lover雪:/home/study/nfs_home/system/linux-2.6.31# vi System.map 2 1 c0004000 A swapper_pg_dir 3 2 c0008000 T __init_begin 4 3 c0008000 T _sinittext 5 4 c0008000 T stext 6 5 c0008000 T _stext 7 6 c0008034 t __enable_mmu 8 .......... 9 32128 c04a0d64 b ratelimit.21298 10 32129 c04a0d68 b pipe_version_lock 11 32130 c04a0d68 b pipe_version_rpc_waitqueue 12 32131 c04a0db4 b rsc_table 13 32132 c04a1db4 b rsi_table 14 32133 c04a1eb8 B krb5_seq_lock 15 32134 c04a1ec0 b i.21559 16 32135 c04a1ec8 b wireless_nlevent_queue 17 32136 c04a1ed4 B __bss_stop 18 32137 c04a1ed4 B _end
可以发现内核的地址范围为c0004000 ~ c04a1ed4 ,如果属于这个范围则为内核错误.
如果不属于System.map 里的范围,则它属于insmod加载的程序.]
因为我们的PC值为 bf010128,肯定不属于内核啊,
2.假设它是加载的驱动程序引入的错误,怎么确定是哪一个驱动程序呢?
/proc/kallsyms
先看看加载的驱动程序的函数的地址范围.cat /proc/kallsyms > 1.txt
在开发板上运行cat /proc/kallsyms > 1.txt 在1.txt中查看PC = bf010128 相近的值 28484 bf0100ec t key_read [err_led] 28485 bf0100ec t $a [err_led] 28486 bf01010c t $d [err_led] 28487 bf010110 t key_open [err_led] 28488 bf010110 t $a [err_led] 28489 bf010154 t $d [err_led] 28490 bf010164 t $a [err_led] 28491 bf010248 t $d [err_led] 28492 bf01024c t key_irq_exit [err_led] 28493 bf01024c t $a [err_led] 28494 bf0102ac t $d [err_led]
可以发现:我们这个程序所有的程序的地址,
我们找到与PC值项相近并且小于它的地方,可以发现,我们除错的程序再key_open 里面.
t: 静态函数
T: 全局函数
然后利用这个找到的地址再加上我们的偏移地址,就找到了我们的错误所在.
3.找到了我们的驱动程序err_led.ko反汇编
1 root@Lover雪:/home/study/nfs_home/module/36_my_proc_prink/test# 2 arm-none-linux-gnueabi-objdump -D err_led.ko > err_led.txt 3 root@Lover雪:/home/study/nfs_home/module/36_my_proc_prink/test# 4 vi err_led.txt 5 下面就是我们err_led.ko 的反汇编代码: 6 再反汇编代码中找到key_open函数然后在加上偏移0x18 7 8 84 00000110 <key_open>: 9 85 110: e52de004 str lr, [sp, #-4]! 10 86 114: e59f0038 ldr r0, [pc, #56] ; 154 <.text+0x154> 11 87 118: e24dd004 sub sp, sp, #4 ; 0x4 12 88 11c: ebfffffe bl 0 <printk> 13 89 120: e59f1030 ldr r1, [pc, #48] ; 158 <.text+0x158> 14 90 124: e59f0030 ldr r0, [pc, #48] ; 15c <.text+0x15c> 15 91 128: e5113f9f ldr r3, [r1, #-3999] 16 92 12c: e3c33007 bic r3, r3, #7 ; 0x7 //R3 &= ~(0x07) 17 93 130: e5013f9f str r3, [r1, #-3999] 18 94 134: e5112f9f ldr r2, [r1, #-3999] 19 95 138: e59f3020 ldr r3, [pc, #32] ; 160 <.text+0x160> 20 96 13c: e3822005 orr r2, r2, #5 ; 0x5 //R3 |= 0x05 21 97 140: e5830000 str r0, [r3] 22 98 144: e3a00000 mov r0, #0 ; 0x0
如上面代码所示,很容易就找到了了错误地址.
分析汇编代码:
接下来就是考察汇编功底的时刻了,
根据汇编代码来推出C语言代码:
从上面推出的 R3 &= ~(0x07) 和 R3 |= 0x05,再与程序中匹配:
1 40 static int key_open(struct inode *inode, struct file *file) 2 41 { 3 42 printk("<0>function open!\n\n"); 4 43 5 44 base_iomux = 0x43FAC000; 6 45 MUX_CTL &= ~(0x07 << 0); 7 46 MUX_CTL |= (0X05 << 0); //设置为ALT5 GPIO3_23 ERR_LED 8 47 9 48 //MUX_CTL 10 49 return 0; 11 50 }
可以发现,一模一样,有没有.所以我们就得到了MUX_CTL这儿有错误.
根据MUX_CTL自然就可以得到base_iomux这个有错误.
附上驱动程序:err_led.c
1 #include<linux/cdev.h> 2 #include<linux/module.h> 3 #include<linux/types.h> 4 #include<linux/fs.h> 5 #include<linux/errno.h> 6 #include<linux/mm.h> 7 #include<linux/sched.h> 8 #include<linux/init.h> 9 #include<asm/io.h> 10 #include<asm/system.h> 11 #include<asm/uaccess.h> 12 #include<linux/device.h> 13 #include <linux/delay.h> 14 15 #define Driver_NAME "err_led_dev" 16 #define DEVICE_NAME "err_led_dev" 17 18 static int major = 0; 19 20 //auto to create device node 21 static struct class *drv_class = NULL; 22 static struct class_device *drv_class_dev = NULL; 23 24 //寄存器基址; 25 static unsigned long base_iomux; //iomux基址 0X 43FA C000 - 0X 43FA FFFF 26 static unsigned long base_gpio3; //gpio3 0X 53FA 4000 - 0X 53FA 7FFF 27 // MUX_CTL模式选择 配置寄存器 28 #define MUX_CTL (*(volatile unsigned long *)(base_iomux + 0x0060)) 29 // PAD_CTL GPIO常用功能设置 30 #define PAD_CTL (*(volatile unsigned long *)(base_iomux + 0x0270)) 31 // GPIO DR 数据寄存器 DR 32 #define DR_GPIO3 (*(volatile unsigned long *)(base_gpio3 + 0x0000)) 33 // GPIO GDIR 方向控制寄存器 GDIR 34 #define GDIR_GPIO3 (*(volatile unsigned long *)(base_gpio3 + 0x0004)) 35 36 37 extern int printk(const char *fmt, ...); 38 39 static int key_open(struct inode *inode, struct file *file) 40 { 41 printk("<0>function open!\n\n"); 42 43 base_iomux = 0x43FAC000; 44 MUX_CTL &= ~(0x07 << 0); 45 MUX_CTL |= (0X05 << 0); //设置为ALT5 GPIO3_23 ERR_LED 46 47 //MUX_CTL 48 return 0; 49 } 50 51 static int key_read(struct file *filp, char __user *buff, size_t count, loff_t *offp) 52 { 53 static int cnt = 0; 54 printk("enter key_open %d \n",cnt); 55 return 0; 56 } 57 58 static ssize_t key_write(struct file *file, const char __user *buf, size_t count, loff_t * ppos) 59 { 60 printk("<0>function write!\n\n"); 61 return 1; 62 } 63 64 static int key_release(struct inode *inode, struct file *filp) 65 { 66 printk("<0>function write!\n\n"); 67 return 0; 68 } 69 70 static int key_ioctl(struct inode *inode,struct file *flip,unsigned int command,unsigned long arg) 71 { 72 printk("<0>function ioctl!\n\n"); 73 return 0; 74 } 75 static struct file_operations key_fops = { 76 .owner = THIS_MODULE, /* 这是一个宏,推向编译模块时自动创建的__this_module变量 */ 77 .open = key_open, 78 .read = key_read, 79 .write = key_write, 80 .release= key_release, 81 .ioctl = key_ioctl, 82 }; 83 84 void gpio_addr(void){ 85 printk("<0>addr base_iomux : %x \n",base_iomux); 86 printk("<0>addr base_gpio3 : %x \n",base_gpio3); 87 printk("<0>addr MUX_CTL : %x \n",&MUX_CTL); 88 printk("<0>addr PAD_CTL : %x \n",&PAD_CTL); 89 printk("<0>addr GDIR_GPIO3 : %x \n",&GDIR_GPIO3); 90 printk("<0>addr DR_GPIO3 : %x \n",&DR_GPIO3); 91 } 92 93 void led_on_off(void){ 94 ssleep(1); 95 DR_GPIO3 |= (0x01 << 23); //将GPIO2_23置1 96 ssleep(1); 97 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零 98 ssleep(1); 99 DR_GPIO3 |= (0x01 << 23); //将GPIO2_23置1 100 ssleep(1); 101 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零 102 ssleep(1); 103 DR_GPIO3 |= (0x01 << 23); //将GPIO2_23置1 104 ssleep(1); 105 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零 106 ssleep(1); 107 DR_GPIO3 |= (0x01 << 23); //将GPIO2_23置1 108 ssleep(1); 109 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零 110 ssleep(1); 111 DR_GPIO3 |= (0x01 << 23); //将GPIO2_23置1 112 } 113 114 static int __init key_irq_init(void) 115 { 116 printk("<0>\nHello,this is %s module!\n\n",Driver_NAME); 117 //register and mknod 118 major = register_chrdev(0,Driver_NAME,&key_fops); 119 drv_class = class_create(THIS_MODULE,Driver_NAME); 120 drv_class_dev = device_create(drv_class,NULL,MKDEV(major,0),NULL,DEVICE_NAME); /*/dev/key_query*/ 121 122 //IO端口申请 ioremap 可以直接通过指针来访问这些地址 123 base_iomux = ioremap(0x43FAC000,0xFFF); 124 base_gpio3 = ioremap(0x53FA4000,0xFFF); 125 126 //MUX_CTL 127 MUX_CTL &= ~(0x07 << 0); 128 MUX_CTL |= (0X05 << 0); //设置为ALT5 GPIO3_23 ERR_LED 129 //PAD_CTL 130 PAD_CTL &= ~(0x01<<13 | 0x01<<3 | 0x03<<1 | 0x01<<0); //1.8v 不需要上拉下拉 CMOS输出 slew rate 131 //GDIR_GPIO3 配置为输出模式 132 GDIR_GPIO3 &= ~(0x01 << 23); 133 GDIR_GPIO3 |= (0x01 << 23); //配置为输出模式 134 135 //DR_GPIO3 配置为输出0 点亮ERR_LED 136 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零 137 DR_GPIO3 &= ~(0x01 << 23); //将GPIO2_23清零 138 gpio_addr(); 139 led_on_off(); 140 return 0; 141 } 142 143 static void __exit key_irq_exit(void) 144 { 145 gpio_addr(); 146 printk("<0>\nGoodbye,%s!\n\n",Driver_NAME); 147 led_on_off(); 148 149 unregister_chrdev(major,Driver_NAME); 150 device_unregister(drv_class_dev); 151 class_destroy(drv_class); 152 153 //释放IO端口 154 iounmap(base_iomux); 155 iounmap(base_gpio3); 156 } 157 158 159 /* 这两行指定驱动程序的初始化函数和卸载函数 */ 160 module_init(key_irq_init); 161 module_exit(key_irq_exit); 162 163 /* 描述驱动程序的一些信息,不是必须的 */ 164 MODULE_AUTHOR("Lover雪儿"); 165 MODULE_VERSION("0.1.0"); 166 MODULE_DESCRIPTION("IMX257 key Driver"); 167 MODULE_LICENSE("GPL");