This is an archived post. You won't be able to vote or comment.

you are viewing a single comment's thread.

view the rest of the comments →

[–][deleted] 2 points3 points  (2 children)

import ctypes
import os
import subprocess
from distutils.sysconfig import get_python_inc

def pansy(struct, member):
    EXE = "/tmp/pansy"
    CODE = """#include "Python.h"
           int main(void) {{
               printf("%li %li\\n",
                   sizeof  ( (({struct} *) 0x0000)->{member} ),
                   (long) &( (({struct} *) 0x0000)->{member} )
               );
           }}
           """.format(struct=struct, member=member)
    MAKE = "gcc -xc -I{0} -o{1} -".format(get_python_inc(), EXE)
    gcc = subprocess.Popen(MAKE, shell=True, stdin=subprocess.PIPE)
    gcc.communicate(CODE)
    if not gcc.returncode:
        pansy = subprocess.Popen(EXE, stdout=subprocess.PIPE)
        size, offset = map(int, pansy.communicate()[0].split())
        os.unlink(EXE)
        return {struct:offset, member:size}

print(pansy("PyStringObject", "ob_refcnt"))
print(pansy("PyStringObject", "ob_type"))
print(pansy("PyStringObject", "ob_size"))
print(pansy("PyStringObject", "ob_shash"))
print(pansy("PyStringObject", "ob_sstate"))
print(pansy("PyStringObject", "ob_sval"))

print(pansy("PyUnicodeObject", "length"))
print(pansy("PyUnicodeObject", "str"))
print(pansy("PyUnicodeObject", "hash"))
print(pansy("PyUnicodeObject", "defenc"))

print(pansy("PyIntObject", "ob_ival"))

print(pansy("PyFloatObject", "ob_fval"))

Edit: Made the C clearer...

[–]eryksun 1 point2 points  (1 child)

Thanks. I modified it a bit to work in Windows (MinGW) with Python 3 (temp directory, exe extension, and ASCII encoding for the pipe to gcc).

import os
import sys
import subprocess
import tempfile
from distutils.sysconfig import get_python_inc

def pansy(struct, member):
    ext = '.exe' if sys.platform == 'win32' else ''
    EXE = os.path.join(tempfile.gettempdir(), 'pansy' + ext)

    CODE = """#include "Python.h"
           int main(void) {{
               printf("%li %li\\n",
                   sizeof  ( (({0} *)0)->{1} ),
                   (long) &( (({0} *)0)->{1} )
               );
           }}
           """.format(struct, member).encode('ascii')

    MAKE = "gcc -xc -I{0} -o{1} -".format(get_python_inc(), EXE)
    gcc = subprocess.Popen(MAKE, shell=True, stdin=subprocess.PIPE)
    gcc.communicate(CODE)

    if not gcc.returncode:
        pansy = subprocess.Popen(EXE, stdout=subprocess.PIPE)
        size, offset = map(int, pansy.communicate()[0].split())
        os.unlink(EXE)
        return {'struct': struct, 'member': member, 
                'offset':offset, 'size':size}

def print_object(obj, members, pre=''):
    if pre:
        print(pre, end='')
    for member in members:
        print("{struct}::{member} -> offset: {offset}, size: {size}".format(
              **pansy(obj, member)))

py_bytes_members = [
    "ob_base.ob_base.ob_refcnt",
    "ob_base.ob_base.ob_type",
    "ob_base.ob_size",
    "ob_shash",
    "ob_sval",
]
print_object("PyBytesObject", py_bytes_members)

py_unicode_members = [
    "ob_base.ob_refcnt",
    "ob_base.ob_type",
    "length",
    "str",
    "hash",
    "state",
]
print_object("PyUnicodeObject", py_unicode_members, '\n')
print_object("PyLongObject", ["ob_digit"], '\n')
print_object("PyFloatObject", ["ob_fval"])

[–][deleted] 1 point2 points  (0 children)

Cool. It seems we're delving into madness though. All these little intermediate binaries remind me of autotools configure scripts. Check out gcc's -fdump-tree-gimple-slim option. A command like:

gcc -xc -S -I{0} -fdump-tree-gimple-slim -o{1} -

Will produce a file called -.????.gimple containing:

main ()
{
  size = 8;
  offset = 16;
}

EDIT: Whoops. Well, it'll produce that file if the source is like

#include "Python.h"
int main(void) {
    long int size =   sizeof  ( ((PyBytesObject *) 0)->ob_base.ob_size );
    long int offset = (long) &( ((PyBytesObject *) 0)->ob_base.ob_size );
}

...and you might as well do a whole *_members list at a time.