C++ class memory layout
最近在用 ebpf urpobe 探针获取 c++ 程序运行时的内存,这需要对 c++ 特别时 c++ class 继承的内存布局有所了解,这篇文章通过测试代码和内存地址分析并验证 c++ class 多继承、虚继承场景下的内存布局
C++ 虚函数表简介
C++ 中当一个类在实现的时候,如果存在一个或以上的虚函数时,那么这个类便会包含一张虚函数表。而当一个子类继承并重载了基类的虚函数时,它也会有自己的一张虚函数表。当我们在设计类的时候,如果把某个函数设置成虚函数时,也就表明我们希望子类在继承的时候能够有自己的实现方式;如果我们明确这个类不会被继承,那么就不应该有虚函数的出现。
类 A 的实现如下:
class A {
public:
virtual void vfunc1();
virtual void vfunc2();
void func1();
void func2();
private:
int m_data1, m_data1;
};
类 A 的虚函数表的内存布局如下图所示:
如果有如下代码所示的继承关系:
class A {
public:
virtual void vfunc1();
virtual void vfunc2();
void func1();
void func2();
private:
int m_data1, m_data1;
};
class B : public A {
public:
virtual void vfunc1();
void func2();
private:
int m_data3;
};
class C : public B {
public:
virtual void vfunc1();
void func2();
private:
int m_data1, m_data4;
};
类 A B C 的虚函数表的内存布局如下所示:
- 对于非虚函数,三个类中虽然都有一个叫 func2 的函数,但他们彼此互不关联,因此都是各自独立的,不存在重载一说,在调用的时候也不需要进行查表的操作,直接调用即可。
- 由于子类B和子类C都是继承于基类A,因此他们都会存在一个虚指针用于指向虚函数表。注意,假如子类B和子类C中不存在虚函数,那么这时他们将共用基类A的一张虚函数表,在B和C中用虚指针指向该虚函数表即可。但是,上面的代码设计时子类B和子类C中都有一个虚函数 vfunc1,因此他们就需要各自产生一张虚函数表,并用各自的虚指针指向该表。由于子类B和子类C都对 vfunc1 作了重载,因此他们有三种不同的实现方式,函数地址也不尽相同,在使用的时候需要从各自类的虚函数表中去查找对应的 vfunc1 地址。
- 对于虚函数 vfunc2,两个子类都没有进行重载操作,所以基类A、子类B和子类C将共用一个 vfunc2,该虚函数的地址会分别保存在三个类的虚函数表中,但他们的地址是相同的。
- 从上图可以发现,在类对象的头部存放着一个虚指针,该虚指针指向了各自类所维护的虚函数表,再通过查找虚函数表中的地址来找到对应的虚函数。
- 对于类中的数据而言,子类中都会包含父类的信息。如上例中的子类C,它自己拥有一个变量 m_data1,似乎是和基类中的 m_data1 重名了,但其实他们并不存在联系,从存放的位置便可知晓。
C++ class 内存布局
普通继承 class 的内存布局
如下代码所示类继承关系:
class Polygon
{
protected:
int width, height;
long init_area;
private:
int _private = 111;
public:
void set_values(int a, int b)
{
width = a;
height = b;
init_area = (long)width * (long)height;
}
virtual int area()
{
return 0;
}
void *mem_addr(int i)
{
if (i == 0)
{
return &width;
}
else if (i == 1)
{
return &height;
}
else if (i == 2)
{
return &init_area;
}
else if (i == 3)
{
return &_private;
}
else if (i == 4)
{
return &_private_2;
}
else if (i == 5)
{
return &test2;
}
else
{
return NULL;
}
}
private:
int _private_2 = 222;
protected:
int test2 = 333;
};
class Rectangle : public Polygon
{
public:
int area()
{
foo();
return width * height;
}
void foo()
{
printf("Rectangle->mem_addr_rect addr: %p\n", (void *)(&Rectangle::mem_addr_rect));
}
void *mem_addr_rect(int i)
{
if (i == 0)
{
return &test3;
}
else if (i == 1)
{
return &_private_3;
}
else
{
return NULL;
}
}
protected:
int test3;
private:
int _private_3 = 0;
};
class Base
{
public:
int a;
int b;
virtual void function_1()
{
std::cout << "function 1" << std::endl;
}
void *mem_addr_base(int i)
{
if (i == 0)
{
return &a;
}
else if (i == 1)
{
return &b;
}
else
{
return NULL;
}
}
};
class MultiRectangle : public Rectangle,
public Base
{
public:
void *mem_addr_multi(int i)
{
if (i == 0)
{
return &test4;
}
else if (i == 1)
{
return &_private_4;
}
else
{
return NULL;
}
}
private:
int _private_4 = 0;
protected:
int test4;
};
通过代码探测类的内存布局,测试代码如下所示:
Rectangle rect;
Polygon *ppoly = ▭
ppoly->set_values(4, 5);
ppoly->area();
std::cout << "***********************************" << std::endl;
std::cout << "[single inherit memory layout]" << std::endl;
std::cout << "Rectangle addr: " << &rect << std::endl;
std::cout << "Polygon addr: " << (Polygon *)(&rect) << std::endl;
std::cout << "Rectangle.width addr: " << rect.mem_addr(0) << std::endl;
std::cout << "Rectangle.height addr: " << rect.mem_addr(1) << std::endl;
std::cout << "Rectangle.init_area addr: " << rect.mem_addr(2) << std::endl;
std::cout << "Rectangle._private addr: " << rect.mem_addr(3) << std::endl;
std::cout << "Rectangle._private_2 addr: " << rect.mem_addr(4) << std::endl;
std::cout << "Rectangle.test2 addr: " << rect.mem_addr(5) << std::endl;
std::cout << "Rectangle.test3 addr: " << rect.mem_addr_rect(0) << std::endl;
std::cout << "Rectangle._private_3 addr: " << rect.mem_addr_rect(1) << std::endl;
MultiRectangle multi_rect;
std::cout << "***********************************" << std::endl;
std::cout << "[multiple inherit memory layout]" << std::endl;
std::cout << "MultiRectangle addr: " << &multi_rect << std::endl;
std::cout << "Rectangle addr: " << (Rectangle *)(&multi_rect) << std::endl;
std::cout << "Base addr: " << (Base *)(&multi_rect) << std::endl;
std::cout << "MultiRectangle->Rectangle vptr addr: " << (long *)((Rectangle *)(&multi_rect)) << std::endl;
std::cout << "MultiRectangle->Rectangle vptr function: " << (void *)(*(long *)*((long *)((Rectangle *)(&multi_rect)))) << std::endl;
std::cout << "MultiRectangle->Rectangle->Polygon vptr addr: " << (long *)((Polygon *)(&multi_rect)) << std::endl;
std::cout << "MultiRectangle->Rectangle->Polygon vptr function: " << (void *)(*(long *)*((long *)((Polygon *)(&multi_rect)))) << std::endl;
std::cout << "MultiRectangle->Rectangle->Polygon->width addr: " << multi_rect.mem_addr(0) << std::endl;
std::cout << "MultiRectangle->Rectangle->Polygon->height addr: " << multi_rect.mem_addr(1) << std::endl;
std::cout << "MultiRectangle->Rectangle->Polygon->init_area addr: " << multi_rect.mem_addr(2) << std::endl;
std::cout << "MultiRectangle->Rectangle->Polygon->_private addr: " << multi_rect.mem_addr(3) << std::endl;
std::cout << "MultiRectangle->Rectangle->Polygon->_private_2 addr: " << multi_rect.mem_addr(4) << std::endl;
std::cout << "MultiRectangle->Rectangle->Polygon->test2 addr: " << multi_rect.mem_addr(5) << std::endl;
std::cout << "MultiRectangle->Rectangle->Polygon->set_values[FUNC] addr: " << (void *)(&Polygon::set_values) << std::endl;
std::cout << "MultiRectangle->Rectangle->Polygon->area addr[FUNC]: " << (void *)(&Polygon::area) << std::endl;
std::cout << "MultiRectangle->Rectangle->test3 addr: " << multi_rect.mem_addr_rect(0) << std::endl;
std::cout << "MultiRectangle->Rectangle->_private_3 addr: " << multi_rect.mem_addr_rect(1) << std::endl;
std::cout << "MultiRectangle->Rectangle->mem_addr_rect[FUNC] addr: " << (void *)(&Rectangle::mem_addr_rect) << std::endl;
std::cout << "MultiRectangle->Rectangle->area[FUNC] addr: " << (void *)(&Rectangle::area) << std::endl;
std::cout << "MultiRectangle->Rectangle->foo[FUNC] addr: " << (void *)(&Rectangle::foo) << std::endl;
std::cout << "MultiRectangle->mem_addr_multi[FUNC] addr: " << (void *)(&MultiRectangle::mem_addr_multi) << std::endl;
std::cout << "MultiRectangle->Base vptr addr: " << (Base *)(&multi_rect) << std::endl;
std::cout << "MultiRectangle->Base vptr function: " << (void *)(*(long *)*((long *)((Base *)(&multi_rect)))) << std::endl;
std::cout << "????? Base: " << multi_rect.mem_addr_rect(1) + 4 << std::endl;
std::cout << "????? Base: " << *((int *)(multi_rect.mem_addr_rect(1) + 4)) << std::endl;
std::cout << "????? Base: " << (void *)(*(long *)*((long *)(multi_rect.mem_addr_rect(1) + 8))) << std::endl;
std::cout << "MultiRectangle->Base->a addr: " << multi_rect.mem_addr_base(0) << std::endl;
std::cout << "MultiRectangle->Base->b addr: " << multi_rect.mem_addr_base(1) << std::endl;
std::cout << "MultiRectangle->Base->function_1[FUNC] addr: " << (void *)(&Base::function_1) << std::endl;
std::cout << "MultiRectangle->Base->mem_addr_base[FUNC] addr: " << (void *)(&Base::mem_addr_base) << std::endl;
std::cout << "MultiRectangle vptr addr: " << (long *)(&multi_rect) << std::endl;
std::cout << "MultiRectangle vptr function: " << (void *)(*(long *)*((long *)(&multi_rect))) << std::endl;
std::cout << "MultiRectangle->_private_4 addr: " << multi_rect.mem_addr_multi(0) << std::endl;
std::cout << "MultiRectangle->test4 addr: " << multi_rect.mem_addr_multi(1) << std::endl;
std::cout << "MultiRectangle->mem_addr_multi[FUNC] addr: " << (void *)(&MultiRectangle::mem_addr_multi) << std::endl;
测试结果分析如下图所示:
虚继承 class 的内存布局
如下代码所示类虚继承关系:
class B
{
public:
int ib;
public:
B(int i = 1) : ib(i) {}
virtual void f() { cout << "B::f()" << endl; }
virtual void Bf() { cout << "B::Bf()" << endl; }
};
class B1 : virtual public B
{
public:
int ib1;
public:
B1(int i = 100) : ib1(i) {}
virtual void f() { cout << "B1::f()" << endl; }
virtual void f1() { cout << "B1::f1()" << endl; }
virtual void Bf1() { cout << "B1::Bf1()" << endl; }
};
class B2 : virtual public B
{
public:
int ib2;
public:
B2(int i = 1000) : ib2(i) {}
virtual void f() { cout << "B2::f()" << endl; }
virtual void f2() { cout << "B2::f2()" << endl; }
virtual void Bf2() { cout << "B2::Bf2()" << endl; }
};
class D : public B1, public B2
{
public:
int id;
public:
D(int i = 10000) : id(i) {}
virtual void f() { cout << "D::f()" << endl; }
virtual void f1() { cout << "D::f1()" << endl; }
virtual void f2() { cout << "D::f2()" << endl; }
virtual void Df() { cout << "D::Df()" << endl; }
};
通过代码探测类的内存布局,测试代码如下所示:
D d;
std::cout << "***********************************" << std::endl;
std::cout << "[multiple virtual inherit memory layout]" << std::endl;
cout << "D->vptr addr: " << &d << endl;
cout << "D->B1->vptr addr: " << (B1 *)(&d) << endl;
cout << "D->B1->ib1 addr: " << &d.ib1 << endl;
cout << "D->B1->f[FUNC] addr: " << (void *)&B1::f << endl;
cout << "D->B1->f1[FUNC] addr: " << (void *)&B1::f1 << endl;
cout << "D->B1->Bf1[FUNC] addr: " << (void *)&B1::Bf1 << endl;
cout << "D->B2->vptr addr: " << (B2 *)(&d) << endl;
cout << "D->B2->ib2 addr: " << &d.ib2 << endl;
cout << "D->B2->f[FUNC] addr: " << (void *)&B2::f << endl;
cout << "D->B2->f1[FUNC] addr: " << (void *)&B2::f2 << endl;
cout << "D->B2->Bf1[FUNC] addr: " << (void *)&B2::Bf2 << endl;
cout << "D->id addr: " << &d.id << endl;
cout << "D->f[FUNC] addr: " << (void *)&D::f << endl;
cout << "D->f1[FUNC] addr: " << (void *)&D::f1 << endl;
cout << "D->f2[FUNC] addr: " << (void *)&D::f2 << endl;
cout << "D->Df[FUNC] addr: " << (void *)&D::Df << endl;
cout << "D->B1->B vptr addr: " << (B *)(&d) << endl;
cout << "D->B1->B->ib addr: " << &d.ib << endl;
cout << "D->B1->B->f[FUNC] addr: " << (void *)&B::f << endl;
cout << "D->B1->B->Bf[FUNC] addr: " << (void *)&B::Bf << endl;
测试结果分析如下图所示:
总结
- class memeber 是按照定义的顺序排列的;
- class 首地址是函数表指针即 vptr,占8字节;
- 接着 vptr 就是 class 数据成员,按照各自 size 做地址偏移;
Public discussion