4

I used to think that the pointer to the Virtual Function Table (VFT, also Virtual Method Table, VMT) is the very first 32-bit word of the object binary representation.

But now I see a VFT whose index is 13 (!!!!), that is, offset=0x34. (I write "index" because the code to invoke the Qt function o.metaObject() is ((func***)o)[13][0](o)). OMG, what is going on? Why the VFT address is located... where?

EDIT (after complaints that the question is unclear):

Each object with virtual functions has a pointer to the Virtual Function Table. Usually, this is the very first 32-bit value in the object's binary representation (and may be accessed as ((void**)objAddr)[0]). But in the example below the offset of VMT pointer is not 0! (Function names may be demangled by c++filt; for readability, the class names have been shortened to Abc and Xyz):

.text:02EF171C _ZN3XyzC2EP7QObject ; constructor Xyz::Xyz(QObject*), r0 = objAddr, r1 = QObject addr
.text:02EF171C                 PUSH.W          {R4-R8,LR}
.text:02EF1720                 MOV             R4, R0
.text:02EF1722                 LDR             R5, =(_GLOBAL_OFFSET_TABLE_ - 0x02EF1730)
.text:02EF1724                 MOV             R7, R1
.text:02EF1726                 BL.W            _ZN4AbcdC2EP7QObject ; superclass_constructor(objAddr)
.text:02EF172A ; ---------------------------------------------------------------------------
.text:02EF172A                 LDR             R3, =(_ZTVN3XyzE_ptr - 0x27E4BE0) ; vtable for Xyz
.text:02EF172C                 ADD             R5, PC ; _GLOBAL_OFFSET_TABLE_
.text:02EF172E                 MOV             R6, R4
.text:02EF1730                 MOV             R1, R7
.text:02EF1732                 LDR             R3, [R5,R3] ; _ZTVN3XyzE_ptr ; pointer to vtable for Xyz
.text:02EF1734                 ADDS            R3, #8 ; *_ptr points to the (-2)nd element of VMT
.text:02EF1736                 STR.W           R3, [R6],#0x34 ; OOPS! the offset is 0x34 !!!

I want to be able to locate the pointer to VMT for any object, but as the example above shows, the pointer to VMT is not necessarily ((void**)objAddr)[0].

So the question is:

1) why the VMT pointer is in the middle of the object's binary representation? There must be something specific about this place.

2) how do I find out where the VMT pointer actually is? (Ideally, at run-time given the object address. I have the code to tell a valid address from an invalid one. I'm interested in GCC for Android/ARM, although techniques for different platforms may turn out to be applicable.)

PS the code to detect a valid address on Android is:

#include <unistd.h>
#include <fcntl.h>
int isValidPtr(const void*p, int len) {
    if (!p) { return 0; }
    int ret = 1;
    int nullfd = open("/dev/random", O_WRONLY); // does not work with /dev/null !!!
    if (write(nullfd, p, len) < 0) {
        ret = 0; /* Not OK */
    }
    close(nullfd);
    return ret;
}

UPDATE

In the following example, the VMT offset is 0:

class Base {
public:
  int x,y;
};
class Derived: public Base {
public:
  int z;
  Derived();
  virtual int func();
  virtual int func2();
};

Coercion from Base* to Derived* compiles to: SUBS R0, #4

int test3(Base*b) {
    Derived*d = (Derived*)b;
    int r = addDerived(*d);
    return r;
}

 ; test3(Base *)
 _Z5test3P4Base
 CBZ             R0, loc_1C7A
 SUBS            R0, #4
 B.W             _Z10addDerivedR7Derived ;

UPDATE2

I tried

struct Cls2 {
    unsigned x[13];
    Derived d;
    Cls2();
};

and here's the disassembly:

.text:00001CE2 _ZN4Cls2C2Ev ; Cls2::Cls2(void)
.text:00001CE2                 PUSH            {R4,LR}
.text:00001CE4                 MOV             R4, R0
.text:00001CE6                 ADD.W           R0, R0, #0x34
.text:00001CEA                 BL              _ZN7DerivedC2Ev ; Derived::Derived(void)
.text:00001CEE                 MOV             R0, R4
.text:00001CF0                 POP             {R4,PC}

That is, the VFT pointer of Cls2::d will indeed be at offset 0x34, but there's no STR.W R3,[R6],#0x34, so it is not #2 suggested by Willem Hengeveld.

BUT if we comment out the constructor,

struct Cls2 {
    unsigned x[13];
    Derived d;
//    Cls2();
};

in

int testCls2() {
    Cls2 c;
    return c.d.func2();
}

we get

.text:00001C9E _Z8testCls2v
.text:00001C9E var_18          = -0x18
.text:00001C9E                 PUSH            {LR}
.text:00001CA0                 SUB             SP, SP, #0x4C
.text:00001CA2                 ADD             R0, SP, #0x50+var_18
.text:00001CA4                 BL              _ZN7DerivedC2Ev ; Derived::Derived(void)
.text:00001CA8                 ADD             R0, SP, #0x50+var_18
.text:00001CAA                 BL              _ZN7Derived5func2Ev ; Derived::func2(void)
.text:00001CAE                 ADD             SP, SP, #0x4C
.text:00001CB0                 POP             {PC}

which is very similar to the original code BUT in my case the VMT vtable for Xyz is written from Xyz::Xyz() and not from the enclosing function.

2 Answers2

5

I can think of 2 cases where a VMT is not in the first word of an object:

  • using multiple inheritance
  • when the an object has a member variable which has virtual methods

multiple inheritance

struct base1 {
    uint32_t x[12];
    virtual void m1() { }
};


struct base2 {
    virtual void m2() { }
};

struct cls : base1, base2 {
};

now the VMT of base2 is at offset 0x34

virtual member

struct cls2 {
    uint32_t x[13];
    base2   b;
};

now also the VMT of base2 is at offset 0x34

Willem Hengeveld
  • 1,829
  • 11
  • 11
1

The code to detect and print virtual function table pointers is:

int isIdentifier(const char* s) { // true if points to [0-9a-zA-Z_]*\x00
    if(!isValidPtr(s,0x10)) { return 0; }
    if(!s[0]) { return 0; }
    int i;
    for (i=0; s[i] && i<512; i++) {
        if( i/0x10 && i%0x10 == 0 && !isValidPtr(s,0x10)) { return 0; }
        unsigned char c = s[i];
        if ('0'<=c && c<='9' || 'a'<=c && c <= 'z' || 'A'<=c && c <= 'Z' || '_' == c) {
        } else {
            return 0;
        }
    }
    return !s[i];
}

char* isVftPtr(void*addr) { // returns addr of mangled class name (prefix it with _Z to demangle with c++filt)
    unsigned int* vmtaddr = isValidPtr(addr,4)
                     && 0 == (3 & *(int*)addr)
                     && isValidPtr(*(int**)addr,4)
                     ? *(unsigned int**)addr
                     : (void*)0;
    if (vmtaddr
      &&isValidPtr(vmtaddr-2,0x20)
     ) {
        char**ptypeinfo = ((char***)vmtaddr)[-1];
        if (isValidPtr(ptypeinfo,4)
          &&isValidPtr((char***)ptypeinfo[0]-1,8)
          &&isValidPtr(((char***)ptypeinfo[0])[-1],8)
          &&isValidPtr(((char***)ptypeinfo[0])[-1][1],0x20)
          &&isIdentifier(ptypeinfo[1])
        ) {
            return !strncmp(((char***)ptypeinfo[0])[-1][1], "N10__cxxabiv",12) ? ptypeinfo[1] : 0;
        }
    }
    return 0;
}
// Usage example: printVfts("pThis", pThis, -8, 0x400)
void printVfts(const char*tag, void* addr, int from, int upto) {
    void** start = addr+from;
    void** end = addr+upto;
    DLOG("{ %s ====== printVfts %p (%p..%p)", tag, addr,start,end);
    void**p;
    char*n = 0;
    for(p=addr;p<end;p++) {
        if (n = isVftPtr(p)) {
            DLOG("vft at %p [off=0x%x] _Z%s",p,(unsigned)p - (unsigned)addr, n);
        }
    }
    DLOG("} %s ====== printVfts %p", tag, addr);
}

The code worked on Android/ARM.

The function isValidPtr() is given in the question, the logging macro is given below:

#include <android/log.h>
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG  , "~~~~~~", __VA_ARGS__)
#define DLOG(...) __android_log_print(ANDROID_LOG_DEBUG  , "~~~~~~", __VA_ARGS__)

And, finally: printVfts() showed that there is another VFT pointer at offset 0.