保护技术开发05 - 实现Dex文件运行时自篡改

一种比较常规的反静态分析的方法,大概可以分为修改数据和修改指令两部分

第一步就是定位Dex在内存中的位置,所以我们来实现定位到关键数据的过程

最近Android Studio升级到了3.0,对NDK开发的支持做的还不错,直接在入口添加C++支持即可配置好NDK开发环境

我们先实现一个JNI_OnLoad,来实现自动执行而不是在Java层调用篡改Dex数据或指令的函数

extern "C"
jint JNICALL JNI_OnLoad(JavaVM *vm, void *reserved)
{
    LOGE("Call JNI_OnLoad()");
    JNIEnv *env = NULL;
    jint result = -1;
    if (vm->GetEnv((void **) &env, JNI_VERSION_1_6) != JNI_OK) {
        return result;
    }
    myModifyDexInst();
    return JNI_VERSION_1_6;
}

先定位Dex映射到内存中的位置,那个循环获取的位置,自己踩到坑的时候就明白为什么了

static void myModifyDexInst()
{
    uint64_t moduleBase = 0;
    size_t moduleSize = 0;
    char filename[1024];

    for (int i = 1; i < 3; i++)
    {
        sprintf(filename, "/data/dalvik-cache/data@app@com.wnagzihxa1n.demo-%d.apk@classes.dex", i);
        getModuleInfo(filename, &moduleBase, &moduleSize);
        if (moduleBase != 0)
        {
            break;
        }
    }

    if (moduleBase == 0)
    {
        LOGE("Can't locate the module %s\n", filename);
        return;
    }
    LOGE("moduleBase is : 0x%x, moduleSize is : 0x%x\n", (unsigned int) moduleBase, (unsigned int) moduleSize);

    return;
}

通过读取/proc/pid/maps文件进行Dex映射数据的读取,获取映射的区段并计算整个长度

static void getModuleInfo(const char* moduleName, uint64_t* moduleBase, size_t* moduleSize)
{
    FILE* fp_dex = NULL;
    char filename[1024] = "/proc/self/maps";
    char line[1024];
    char* temp,* baseStart,* baseEnd;

    fp_dex = fopen(filename, "r");
    if (fp_dex != NULL)
    {
        while (fgets(line, sizeof(line), fp_dex))
        {
            if (strstr(line, moduleName))
            {
                LOGE("Current line : %s\n", line);
                temp = strtok(line, " ");
                baseStart = strtok(temp, "-");
                baseEnd = strtok(NULL, "-");
                LOGE("baseStart = 0x%s, baseEnd = 0x%s\n", baseStart, baseEnd);
                *moduleBase = strtoul(baseStart, NULL, 16);
                *moduleSize = strtoul(baseEnd, NULL, 16) - strtoul(baseStart, NULL, 16);
                break;
            }
        }
    }
    fclose(fp_dex);
}

然后我们跑起来

11-14 10:45:25.774 5791-5791/com.wnagzihxa1n.demo E/wnagzihxa1n: Call JNI_OnLoad()
11-14 10:45:25.784 5791-5791/com.wnagzihxa1n.demo E/wnagzihxa1n: Current line : 8d60d000-8d665000 r--p 00000000 fe:20 7115       /data/dalvik-cache/data@app@com.wnagzihxa1n.demo-1.apk@classes.dex
11-14 10:45:26.294 5791-5791/com.wnagzihxa1n.demo E/wnagzihxa1n: baseStart = 0x8d60d000, baseEnd = 0x8d665000
11-14 10:45:26.294 5791-5791/com.wnagzihxa1n.demo E/wnagzihxa1n: moduleBase is : 0x8d60d000, moduleSize is : 0x58000

在可以定位到ODex数据后,开始定位Dex文件映射到内存中的位置,要先进行数据的判断,通过Magic Number

if (!checkLegalODexFile(moduleBase))
{
    LOGE("Check ODexFile failed\n");
    return;
}

LOGE("Check ODexFile successed\n");
void* optBase = (void*) moduleBase;
DexHeader* pHeader = (DexHeader*) ((u4)optBase + sizeof(DexOptHeader));

gDexFile.pOptHeader = (DexOptHeader*) optBase;
gDexFile.pHeader    = (DexHeader*) pHeader;
gDexFile.baseAddr   = (u1*) ((u4) optBase + sizeof(DexOptHeader));
gDexFile.pStringIds = (DexStringId*) ((u4) gDexFile.baseAddr + pHeader->stringIdsOff);
gDexFile.pTypeIds   = (DexTypeId*) ((u4) gDexFile.baseAddr + pHeader->typeIdsOff);
gDexFile.pFieldIds  = (DexFieldId*) ((u4) gDexFile.baseAddr + pHeader->fieldIdsOff);
gDexFile.pMethodIds = (DexMethodId*) ((u4) gDexFile.baseAddr + pHeader->methodIdsOff);
gDexFile.pProtoIds  = (DexProtoId*) ((u4) gDexFile.baseAddr + pHeader->protoIdsOff);
gDexFile.pClassDefs = (DexClassDef*) ((u4) gDexFile.baseAddr + pHeader->classDefsOff);

char sanitized[sizeof(pHeader->magic) * 2 + 1];
asciify(sanitized, pHeader->magic, sizeof(pHeader->magic));
LOGE("pHeader->magic = %s\n", sanitized);
LOGE("pHeader->stringIdsOff = 0x%x\n", pHeader->stringIdsOff);
LOGE("pHeader->stringIdsSize = 0x%x\n", pHeader->stringIdsSize);

判断是否是合法的ODex文件

static bool checkLegalODexFile(uint64_t moduleBase)
{
    void* optBase = (void*) moduleBase;
    DexOptHeader* dexOptHeader = (DexOptHeader*) (optBase);
    DexHeader* dexHeader = (DexHeader*) ((u4)optBase + sizeof(DexOptHeader));
    if (!strcmp((char*) dexOptHeader->magic, "dey\n036\0")
        && !strcmp((char*) dexHeader->magic, "dex\n035\0"))
    {
        return true;
    }
    return false;
}

这里我们简单的输出几个值进行对比

11-14 14:26:22.224 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: Current line : 8d60d000-8d665000 r--p 00000000 fe:20 7113       /data/dalvik-cache/data@app@com.wnagzihxa1n.demo-2.apk@classes.dex
11-14 14:26:22.224 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: baseStart = 0x8d60d000, baseEnd = 0x8d665000
11-14 14:26:22.234 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: moduleBase is : 0x8d60d000, moduleSize is : 0x58000
11-14 14:26:22.234 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: Check ODexFile successed
11-14 14:26:22.244 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: pHeader->magic = dex\n035\0
11-14 14:26:22.244 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: pHeader->stringIdsOff = 0x70
11-14 14:26:22.244 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: pHeader->stringIdsSize = 0x721

关于输出Magic Number,可以参考DexDump.cpp的源码

static void asciify(char* out, const unsigned char* data, size_t len)
{
    while (len--) {
        if (*data < 0x20) {
            switch (*data) {
                case '\0':
                    *out++ = '\\';
                    *out++ = '0';
                    break;
                case '\n':
                    *out++ = '\\';
                    *out++ = 'n';
                    break;
                default:
                    *out++ = '.';
                    break;
            }
        } else if (*data >= 0x80) {
            *out++ = '?';
        } else {
            *out++ = *data;
        }
        data++;
    }
    *out = '\0';
}

简单输出字段数据进行对比

IMAGE

如果对的上那说明已经成功定位Dex文件映射到内存中的地址了

然后我们可以根据拿到的地址进行类,方法,指令,以及其余数据的索引操作

首先我们来写一个方法,比如很简单的a + b

public class MainActivity extends Activity {
    EditText editText1, editText2;
    Button button;

    // Used to load the 'native-lib' library on application startup.
    static {
        System.loadLibrary("native-lib");
    }

    public void getSum(int a, int b) {
        int add = a + b;
        int sub = a - b;
        Toast.makeText(MainActivity.this, "add = " + add + "\nsub = " + sub, Toast.LENGTH_LONG).show();
    }

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        editText1 = (EditText) findViewById(R.id.editText1);
        editText2 = (EditText) findViewById(R.id.editText2);
        button = (Button) findViewById(R.id.button);
        button.setOnClickListener(new View.OnClickListener() {
            @Override
            public void onClick(View view) {
                getSum(Integer.valueOf(editText1.getText().toString()), Integer.valueOf(editText2.getText().toString()));
            }
        });
    }
}

简单先测试

IMAGE

然后我们使用IDA观察对应的指令,稍微整理了下

CODE:0001C1D0 0090 0605  add-int v0, a, b
CODE:0001C1D4 0191 0605  sub-int v1, a, b

同时在运行时读取指令,这个代码比较复杂,有一些函数直接从DexDump.cpp里面拷贝,首先是获取指定类结构数据,其中有三个方法dexGetClassDefdexGetClassDatadexReadAndVerifyClassData都是Dalvik虚拟机里面自带的,所以直接调用并进行遍历所有类即可,最后返回指定类数据

static DexClassData* getClassData(DexFile* pDexFile, char* pClassName)
{
    size_t classDefCount = pDexFile->pHeader->classDefsSize;
    for (size_t classDefIndex = 0; classDefIndex < classDefCount; classDefIndex++)
    {
        const DexClassDef* pClassDef = dexGetClassDef(pDexFile, classDefIndex);
        const u1* pEncodedData = dexGetClassData(pDexFile, pClassDef);
        const DexClassData* pClassData = dexReadAndVerifyClassData(&pEncodedData, NULL);
        char* descriptor = getTypeIdString(pDexFile, pClassDef->classIdx);
        if (strcmp(descriptor, pClassName) == 0)
        {
            LOGE("Found Class %s\n", pClassName);
            return (DexClassData*) pClassData;
        }
    }
    return NULL;
}

方法数据的获取,依旧是靠遍历

static const DexCode* getCode(const DexFile* pDexFile, const DexClassData* pClassData, const char* pMethodName)
{
    size_t directMethodSize = pClassData->header.directMethodsSize;
    size_t virtualMethodSize = pClassData->header.virtualMethodsSize;

    for (int directMethodIndex = 0; directMethodIndex < directMethodSize; ++directMethodIndex) 
    {
        int idx = pClassData->directMethods[directMethodIndex].methodIdx;
        const DexCode* pCode = dexGetCode(pDexFile, &pClassData->directMethods[directMethodIndex]);
        if (strcmp(getString(pDexFile, pDexFile->pMethodIds[idx].nameIdx), pMethodName) == 0)
        {
            return pCode;
        }
    }

    for (int virtualMethodIndex = 0; virtualMethodIndex < virtualMethodSize; ++virtualMethodIndex) 
    {
        int idx = pClassData->virtualMethods[virtualMethodIndex].methodIdx;
        const DexCode* pCode = dexGetCode(pDexFile, &pClassData->virtualMethods[virtualMethodIndex]);
        if (strcmp(getString(pDexFile, pDexFile->pMethodIds[idx].nameIdx), pMethodName) == 0)
        {
            return pCode;
        }
    }
    return NULL;
}

调用的时候,简单做一下判断即可

static const DexCode* getClassMethodCode(const DexFile* pDexFile, const char* pClassName, const char* pMethodName)
{
    LOGE("Finding %s -> %s\n", pClassName, pMethodName);
    const DexClassData* pClassData = getClassData(pDexFile, pClassName);
    if (pClassData == NULL)
    {
        return NULL;
    }

    const DexCode* pCode = getCode(pDexFile, pClassData, pMethodName);

    if (pCode == NULL)
    {
        return NULL;
    }

    dumpDexCode(pCode);

    return pCode;
}

跑起来,可以看到成功获取到指定方法的指令

11-17 10:45:41.784 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: Current line : 8d5b4000-8d60d000 r--p 00000000 fe:20 7113       /data/dalvik-cache/data@app@com.wnagzihxa1n.demo-2.apk@classes.dex
11-17 10:45:41.784 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: baseStart = 0x8d5b4000, baseEnd = 0x8d60d000
11-17 10:45:41.794 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: moduleBase is : 0x8d5b4000, moduleSize is : 0x59000
11-17 10:45:41.804 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: Check ODexFile successed
11-17 10:45:41.804 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: Finding Lcom/wnagzihxa1n/demo/MainActivity; -> getSum
11-17 10:45:41.814 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: Found Class Lcom/wnagzihxa1n/demo/MainActivity;
11-17 10:45:41.814 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n:       registers     : 7
11-17 10:45:41.824 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n:       ins           : 3
11-17 10:45:41.834 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n:       outs          : 3
11-17 10:45:41.834 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n:       insns size    : 44 16-bit code units
11-17 10:45:41.834 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 0090 0605 0191 0605 0222 006c 1070 0307 
11-17 10:45:41.834 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 0002 031b 0277 0000 20f8 003b 0032 020c 
11-17 10:45:41.834 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 20f8 0036 0002 020c 031b 0002 0000 20f8 
11-17 10:45:41.844 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 003b 0032 020c 20f8 0036 0012 020c 10f8 
11-17 10:45:41.854 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 0007 0002 020c 1312 3071 02d8 0324 020c 
11-17 10:45:41.854 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 10f8 0019 0002 000e

那么定位到指令,我们就可以通过修改内存可读写的属性进行指令的修改,这里有两个点:第一个是00和01这个表示寄存器编号,第二个是90和91表示opcode,分别表示add-int和sub-int,我们把加和减对调

if (pCode == NULL)
{
    LOGE("Get Method getSum ins failed\n");
    return;
}

if (mprotect((void*) moduleBase, moduleSize, PROT_READ | PROT_WRITE | PROT_EXEC) == 0)
{
    pCode->insns[0] = 0x0091;
    pCode->insns[2] = 0x0190;
    dumpDexCode(pCode);
    mprotect((void*) moduleBase, moduleSize, PROT_READ | PROT_EXEC);
}

然后修改结果如下

11-19 15:40:54.804 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: Current line : 8d593000-8d5ec000 r--p 00000000 fe:20 7113       /data/dalvik-cache/data@app@com.wnagzihxa1n.demo-2.apk@classes.dex
11-19 15:40:54.804 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: baseStart = 0x8d593000, baseEnd = 0x8d5ec000
11-19 15:40:54.834 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: moduleBase is : 0x8d593000, moduleSize is : 0x59000
11-19 15:40:54.834 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: Check ODexFile successed
11-19 15:40:54.904 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: Finding Lcom/wnagzihxa1n/demo/MainActivity; -> getSum
11-19 15:40:55.034 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: Found Class Lcom/wnagzihxa1n/demo/MainActivity;
11-19 15:40:55.074 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n:       registers     : 7
11-19 15:40:55.074 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n:       ins           : 3
11-19 15:40:55.074 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n:       outs          : 3
11-19 15:40:55.074 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n:       insns size    : 44 16-bit code units
11-19 15:40:55.074 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0090 0605 0191 0605 0222 006c 1070 0307 
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0002 031b 0277 0000 20f8 003b 0032 020c 
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 20f8 0036 0002 020c 031b 0002 0000 20f8 
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 003b 0032 020c 20f8 0036 0012 020c 10f8 
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0007 0002 020c 1312 3071 02d8 0324 020c 
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 10f8 0019 0002 000e 
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n:       registers     : 7
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n:       ins           : 3
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n:       outs          : 3
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n:       insns size    : 44 16-bit code units
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0091 0605 0190 0605 0222 006c 1070 0307 
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0002 031b 0277 0000 20f8 003b 0032 020c 
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 20f8 0036 0002 020c 031b 0002 0000 20f8 
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 003b 0032 020c 20f8 0036 0012 020c 10f8 
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0007 0002 020c 1312 3071 02d8 0324 020c 
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 10f8 0019 0002 000e

直观的效果

IMAGE

这个点其实可以做很多的文章,比如最简单的就是反静态分析,比如跑起来之后再修改某些指令,或者替换某些指令,再比如可以把所有方法指令全都抽空,然后动态运行的时候全部填充回去,再进一步,可以运行到这个方法的时候把指令填充回去

References

  • Dalvik字节码自篡改原理及实现:https://bbs.pediy.com/thread-211331.htm
  • Android安全分析挑战:运行时篡改Dalvik字节码:https://bbs.pediy.com/thread-170381.htm
  • 运行时自篡改dalvik字节码delta.apk原理解析(逆向):https://bbs.pediy.com/thread-176732.htm
  • apk自我保护的一种实现方式——运行时自篡改dalvik指令:http://blog.csdn.net/freshui/article/details/13620647