C语言函数调用分析总结-文章-基础课-C语言基础

C语言函数调用分析总结

时间:04-15 10:43 阅读:1101次

*温馨提示：点击图片可以放大观看高清大图

简介：C语言是一个强大的语言，特别是对于嵌入式开发过程中有时需要反汇编分析代码中存在的问题，函数是C语言中的难点，关于函数的调用也是很多人不能理解的，很多知道的也是一知半解。对C语言的调用有了一个比较清晰的认识就能够更清晰的分析代码中存在的问题。我也是看了很多的资料，然后自己写了一一段小代码作为分析的测试代码。

我的测试环境：Fedora14

Gcc版本：gcc-4.5.1

内核版本：2.6.38.1

首先记住在X86体系里很多的寄存器都有特殊的用途，其中ESP表示当前函数堆栈的栈顶指针，而EBP则表示当前函数堆栈的基地址。EBP是栈基址的指针，永远指向栈底（高地址），ESP是栈指针，永远指向栈顶（低地址）。

我的代码如下：

#include<stdio.h>

int pluss_a_and_b(int a,int b)

{

int c = -2;

return (a + b - c);

}

int call_plus(int *a,int *b)

{

int c = *a;

int d = *b;

*a = d;

*b = c;

return pluss_a_and_b(c,d);

}

int main()

{

int c = 10;

int d = 20;

int g = call_plus(&c,&d);

return 0;

}

对上面的代码进行编译和反汇编：

[gong@Gong-Computer deeplearn]$ gcc -g testcall.c -o testcall

[gong@Gong-Computer deeplearn]$ objdump -S -d testcall > testcall_s

然后对反汇编的代码进行分析：

... 8048393: c3 ret 08048394 <pluss_a_and_b>: #include<stdio.h> int pluss_a_and_b(int a,int b) { 8048394: 55 push %ebp 8048395: 89 e5 mov %esp,%ebp 8048397: 83 ec 10 sub $0x10,%esp int c = -2; 804839a: c7 45 fc fe ff ff ff movl $0xfffffffe,-0x4(%ebp) return (a + b - c); 80483a1: 8b 45 0c mov 0xc(%ebp),%eax 80483a4: 8b 55 08 mov 0x8(%ebp),%edx 80483a7: 8d 04 02 lea (%edx,%eax,1),%eax 80483aa: 2b 45 fc sub -0x4(%ebp),%eax } 80483ad: c9 leave 80483ae: c3 ret 080483af <call_plus>: int call_plus(int *a,int *b) { 80483af: 55 push %ebp 80483b0: 89 e5 mov %esp,%ebp 80483b2: 83 ec 18 sub $0x18,%esp int c = *a; 80483b5: 8b 45 08 mov 0x8(%ebp),%eax 80483b8: 8b 00 mov (%eax),%eax 80483ba: 89 45 fc mov %eax,-0x4(%ebp) int d = *b; 80483bd: 8b 45 0c mov 0xc(%ebp),%eax 80483c0: 8b 00 mov (%eax),%eax 80483c2: 89 45 f8 mov %eax,-0x8(%ebp) *a = d; 80483c5: 8b 45 08 mov 0x8(%ebp),%eax 80483c8: 8b 55 f8 mov -0x8(%ebp),%edx 80483cb: 89 10 mov %edx,(%eax) *b = c; 80483cd: 8b 45 0c mov 0xc(%ebp),%eax 80483d0: 8b 55 fc mov -0x4(%ebp),%edx 80483d3: 89 10 mov %edx,(%eax) return pluss_a_and_b(c,d); 80483d5: 8b 45 f8 mov -0x8(%ebp),%eax 80483d8: 89 44 24 04 mov %eax,0x4(%esp) 80483dc: 8b 45 fc mov -0x4(%ebp),%eax 80483df: 89 04 24 mov %eax,(%esp) 80483e2: e8 ad ff ff ff call 8048394 <pluss_a_and_b> } 80483e7: c9 leave 80483e8: c3 ret 080483e9 <main>: int main() { 80483e9: 55 push %ebp 80483ea: 89 e5 mov %esp,%ebp 80483ec: 83 ec 18 sub $0x18,%esp int c = 10; 80483ef: c7 45 f8 0a 00 00 00 movl $0xa,-0x8(%ebp) int d = 20; 80483f6: c7 45 f4 14 00 00 00 movl $0x14,-0xc(%ebp) int g = call_plus(&c,&d); 80483fd: 8d 45 f4 lea -0xc(%ebp),%eax 8048400: 89 44 24 04 mov %eax,0x4(%esp) 8048404: 8d 45 f8 lea -0x8(%ebp),%eax 8048407: 89 04 24 mov %eax,(%esp) 804840a: e8 a0 ff ff ff call 80483af <call_plus> 804840f: 89 45 fc mov %eax,-0x4(%ebp) return 0; 8048412: b8 00 00 00 00 mov $0x0,%eax } 8048417: c9 leave 8048418: c3 ret 8048419: 90 nop 804841a: 90 nop ...

首先，C语言的入口都是从main函数开始的，但是从反汇编代码中可以发现并不是只有自己设计的代码，还存在很多关于初始化等操作。这主要是因为C语言的运行需要一些基本的环境和C-RunTime的一些基本函数。因此main 函数只是我们C语言的入口，但并不是一个程序的开始。因此main函数也需要堆栈的控制，也需要压栈出栈等操作。

需要注意的是：

指令call用来调用一个函数或过程，这时下一条指令地址被压入堆栈中，以备返回时能恢复执行下条指令。sp=sp-1。通过下面的汇编代码就可知道函数的返回地址。

80483e2: e8 ad ff ff ff call 8048394 <pluss_a_and_b>

}

80483e7: c9 leave

可以知道指令call后的返回地址就是80483e7。而8048394则说明被调用函数的起始地址，这些数字可能在不同的系统中存在差别。

RET指令用来从一个函数或过程返回，之前CALL保存的下条指令地址会从栈内弹出到EIP寄存器中，程序转到CALL之前下条指令处执行。

下面简单的介绍几个代码:

80483e9: 55 push %ebp

80483ea: 89 e5 mov %esp,%ebp

80483ec: 83 ec 18 sub $0x18,%esp

首先push %ebp，是将调用函数的栈帧基地址压入栈中，也就是保存调用函数的栈帧EBP。将其指向的地址压入堆栈中。mov %esp,%ebp则是将ESP和EBP指向同一个地址，作为被调用函数的栈帧基地址。sub $0x18,%esp则是修改ESP的值，与EBP构成当前被调用函数的栈帧空间。

从图中可以每个函数的栈空间都是相互独立的，但是每一个栈空间的基本结构都是相同的。都是该函数的EBP指针，然后是局部变量空间，然后是往下一个函数的传递参数空间，返回的EBP地址。这样就能实现不同函数的调用，然后传递参数是采用基于EBP指针的相对位置实现的，并没有绝对地址。

由此可以知道栈空间的分布是根据调用情况分析的，当调用过多时就会导致溢出错误，因此并不是一味的迭代和递归。

关于函数调用的返回都是采用EAX寄存器实现的，但是当返回的是结构体以及联合体时返回就不能采用EAX实现了，基本的实现方法也是基于堆栈的。

#include<stdio.h>

typedef struct {

double d;

float f;

int i;

char c;

}return_value;

return_value my_test_of_return()

{

return_value rv;

rv.d = 12.56;

rv.f = 3.1;

rv.i = 10;

rv.c = 'a';

return rv;

}

int main()

{

return_value local = my_test_of_return();

return 0;

}

编译以及反汇编以后得到如下的结果：

[gong@Gong-Computer deeplearn]$ gcc -g structpass.c -o structpass

[gong@Gong-Computer deeplearn]$ objdump -S -d structpass > structpass_s

... 08048394 <my_test_of_return>: char c; }return_value; return_value my_test_of_return() { 8048394: 55 push %ebp 8048395: 89 e5 mov %esp,%ebp 8048397: 83 ec 20 sub $0x20,%esp 804839a: 8b 45 08 mov 0x8(%ebp),%eax return_value rv; rv.d = 12.56; 804839d: dd 05 d8 84 04 08 fldl 0x80484d8 80483a3: dd 5d e8 fstpl -0x18(%ebp) rv.f = 3.1; 80483a6: ba 66 66 46 40 mov $0x40466666,%edx 80483ab: 89 55 f0 mov %edx,-0x10(%ebp) rv.i = 10; 80483ae: c7 45 f4 0a 00 00 00 movl $0xa,-0xc(%ebp) rv.c = 'a'; 80483b5: c6 45 f8 61 movb $0x61,-0x8(%ebp) return rv; 80483b9: 8b 55 e8 mov -0x18(%ebp),%edx 80483bc: 89 10 mov %edx,(%eax) 80483be: 8b 55 ec mov -0x14(%ebp),%edx 80483c1: 89 50 04 mov %edx,0x4(%eax) 80483c4: 8b 55 f0 mov -0x10(%ebp),%edx 80483c7: 89 50 08 mov %edx,0x8(%eax) 80483ca: 8b 55 f4 mov -0xc(%ebp),%edx 80483cd: 89 50 0c mov %edx,0xc(%eax) 80483d0: 8b 55 f8 mov -0x8(%ebp),%edx 80483d3: 89 50 10 mov %edx,0x10(%eax) } 80483d6: c9 leave 80483d7: c2 04 00 ret $0x4 080483da <main>: int main() { 80483da: 8d 4c 24 04 lea 0x4(%esp),%ecx 80483de: 83 e4 f8 and $0xfffffff8,%esp 80483e1: ff 71 fc pushl -0x4(%ecx) 80483e4: 55 push %ebp 80483e5: 89 e5 mov %esp,%ebp 80483e7: 51 push %ecx 80483e8: 83 ec 2c sub $0x2c,%esp return_value local = my_test_of_return(); 80483eb: 8d 45 e0 lea -0x20(%ebp),%eax 80483ee: 89 04 24 mov %eax,(%esp) 80483f1: e8 9e ff ff ff call 8048394 <my_test_of_return> 80483f6: 83 ec 04 sub $0x4,%esp return 0; 80483f9: b8 00 00 00 00 mov $0x0,%eax } 80483fe: 8b 4d fc mov -0x4(%ebp),%ecx 8048401: c9 leave 8048402: 8d 61 fc lea -0x4(%ecx),%esp ...

从上面的结果可以知道可以知道，返回的过程并不是一次通过EAX返回的，而是通过堆栈一个一个的传递出来，实现结果的返回。因此这也是我们需要注意的地方。

同样对于结构体的传递方式也是采用堆栈的方式进行传递，基本的参看下面的分析。参数也是依据堆栈中的位置进行控制的。

代码：

#include<stdio.h>

typedef struct {

double d;

float f;

int i;

char c;

}return_value;

return_value my_test_pass(return_value pass)

{

return_value rv;

rv.d = pass.d;

rv.f = pass.f;

rv.i = pass.i;

rv.c = pass.c;

return rv;

}

return_value my_test_of_return()

{

return_value rv;

rv.d = 12.56;

rv.f = 3.1;

rv.i = 10;

rv.c = 'a';

return rv;

}

int main()

{

return_value local = my_test_of_return();

return_value local1 = my_test_pass(local);

return 0;

}

编译和反汇编过程：

[gong@Gong-Computer deeplearn]$ gcc -g structpass.c -o structpass

[gong@Gong-Computer deeplearn]$ objdump -S -d structpass > structpass_s

... int main() { 804841d: 8d 4c 24 04 lea 0x4(%esp),%ecx 8048421: 83 e4 f8 and $0xfffffff8,%esp 8048424: ff 71 fc pushl -0x4(%ecx) 8048427: 55 push %ebp 8048428: 89 e5 mov %esp,%ebp 804842a: 51 push %ecx 804842b: 83 ec 4c sub $0x4c,%esp return_value local = my_test_of_return(); 804842e: 8d 45 e0 lea -0x20(%ebp),%eax 8048431: 89 04 24 mov %eax,(%esp) 8048434: e8 9e ff ff ff call 80483d7 <my_test_of_return> 8048439: 83 ec 04 sub $0x4,%esp return_value local1 = my_test_pass(local); 804843c: 8d 45 c8 lea -0x38(%ebp),%eax 804843f: 8b 55 e0 mov -0x20(%ebp),%edx 8048442: 89 54 24 04 mov %edx,0x4(%esp) 8048446: 8b 55 e4 mov -0x1c(%ebp),%edx 8048449: 89 54 24 08 mov %edx,0x8(%esp) 804844d: 8b 55 e8 mov -0x18(%ebp),%edx 8048450: 89 54 24 0c mov %edx,0xc(%esp) 8048454: 8b 55 ec mov -0x14(%ebp),%edx 8048457: 89 54 24 10 mov %edx,0x10(%esp) 804845b: 8b 55 f0 mov -0x10(%ebp),%edx 804845e: 89 54 24 14 mov %edx,0x14(%esp) 8048462: 89 04 24 mov %eax,(%esp) 8048465: e8 2a ff ff ff call 8048394 <my_test_pass> 804846a: 83 ec 04 sub $0x4,%esp return 0; 804846d: b8 00 00 00 00 mov $0x0,%eax } ...

由上面的反汇编代码可以知道结构体的传递参数是依据堆栈实现的。这也说明了多参数的传递过程并不是按着固定的模式实现的，这也是我们需要注意的问题。参数的传递需要根据实际情况分析。

总结：

函数的调用是有一定的方式的，各个函数都有一定的堆栈空间，而且每一个堆栈空间的分布情况也是类似的，但是大小要根据实际的情况分析。一般一个函数的堆栈空间中包含下面几个部分：1、栈帧（用来表示该堆栈空间的栈底，也就是指开始的地址EBP），局部变量的空间，下一个被调用函数的参数传递，最后是返回地址（实质上也是一个EBP）。就是依据EBP和相对位置就能知道每一个函数的基本分布，而ESP就能知道堆栈空间的大小。

被调用参数的获取主要是依据EBP指针的相对位置获得，因为被调用函数的堆栈空间上一个堆栈空间就是调用函数的堆栈空间。根据函数的栈帧指针（EBP）和相对位置（-4，-8等）找到对应的参数，但是相对位置也是不固定的，这需要考虑结构体的对齐等方式，具体的要在实际中计算。

返回值一般都是采用EAX返回的，但是对于结构体等则是采用堆栈的方式一个元算一个元素的返回的，但是还是运用了EAX的特性。

函数调用的分布打开如下：