一种比较常规的反静态分析的方法,大概可以分为修改数据和修改指令两部分
第一步就是定位Dex在内存中的位置,所以我们来实现定位到关键数据的过程
最近Android Studio升级到了3.0,对NDK开发的支持做的还不错,直接在入口添加C++支持即可配置好NDK开发环境
我们先实现一个JNI_OnLoad,来实现自动执行而不是在Java层调用篡改Dex数据或指令的函数
extern "C"
jint JNICALL JNI_OnLoad(JavaVM *vm, void *reserved)
{
LOGE("Call JNI_OnLoad()");
JNIEnv *env = NULL;
jint result = -1;
if (vm->GetEnv((void **) &env, JNI_VERSION_1_6) != JNI_OK) {
return result;
}
myModifyDexInst();
return JNI_VERSION_1_6;
}
先定位Dex映射到内存中的位置,那个循环获取的位置,自己踩到坑的时候就明白为什么了
static void myModifyDexInst()
{
uint64_t moduleBase = 0;
size_t moduleSize = 0;
char filename[1024];
for (int i = 1; i < 3; i++)
{
sprintf(filename, "/data/dalvik-cache/data@app@com.wnagzihxa1n.demo-%d.apk@classes.dex", i);
getModuleInfo(filename, &moduleBase, &moduleSize);
if (moduleBase != 0)
{
break;
}
}
if (moduleBase == 0)
{
LOGE("Can't locate the module %s\n", filename);
return;
}
LOGE("moduleBase is : 0x%x, moduleSize is : 0x%x\n", (unsigned int) moduleBase, (unsigned int) moduleSize);
return;
}
通过读取/proc/pid/maps
文件进行Dex映射数据的读取,获取映射的区段并计算整个长度
static void getModuleInfo(const char* moduleName, uint64_t* moduleBase, size_t* moduleSize)
{
FILE* fp_dex = NULL;
char filename[1024] = "/proc/self/maps";
char line[1024];
char* temp,* baseStart,* baseEnd;
fp_dex = fopen(filename, "r");
if (fp_dex != NULL)
{
while (fgets(line, sizeof(line), fp_dex))
{
if (strstr(line, moduleName))
{
LOGE("Current line : %s\n", line);
temp = strtok(line, " ");
baseStart = strtok(temp, "-");
baseEnd = strtok(NULL, "-");
LOGE("baseStart = 0x%s, baseEnd = 0x%s\n", baseStart, baseEnd);
*moduleBase = strtoul(baseStart, NULL, 16);
*moduleSize = strtoul(baseEnd, NULL, 16) - strtoul(baseStart, NULL, 16);
break;
}
}
}
fclose(fp_dex);
}
然后我们跑起来
11-14 10:45:25.774 5791-5791/com.wnagzihxa1n.demo E/wnagzihxa1n: Call JNI_OnLoad()
11-14 10:45:25.784 5791-5791/com.wnagzihxa1n.demo E/wnagzihxa1n: Current line : 8d60d000-8d665000 r--p 00000000 fe:20 7115 /data/dalvik-cache/data@app@com.wnagzihxa1n.demo-1.apk@classes.dex
11-14 10:45:26.294 5791-5791/com.wnagzihxa1n.demo E/wnagzihxa1n: baseStart = 0x8d60d000, baseEnd = 0x8d665000
11-14 10:45:26.294 5791-5791/com.wnagzihxa1n.demo E/wnagzihxa1n: moduleBase is : 0x8d60d000, moduleSize is : 0x58000
在可以定位到ODex数据后,开始定位Dex文件映射到内存中的位置,要先进行数据的判断,通过Magic Number
if (!checkLegalODexFile(moduleBase))
{
LOGE("Check ODexFile failed\n");
return;
}
LOGE("Check ODexFile successed\n");
void* optBase = (void*) moduleBase;
DexHeader* pHeader = (DexHeader*) ((u4)optBase + sizeof(DexOptHeader));
gDexFile.pOptHeader = (DexOptHeader*) optBase;
gDexFile.pHeader = (DexHeader*) pHeader;
gDexFile.baseAddr = (u1*) ((u4) optBase + sizeof(DexOptHeader));
gDexFile.pStringIds = (DexStringId*) ((u4) gDexFile.baseAddr + pHeader->stringIdsOff);
gDexFile.pTypeIds = (DexTypeId*) ((u4) gDexFile.baseAddr + pHeader->typeIdsOff);
gDexFile.pFieldIds = (DexFieldId*) ((u4) gDexFile.baseAddr + pHeader->fieldIdsOff);
gDexFile.pMethodIds = (DexMethodId*) ((u4) gDexFile.baseAddr + pHeader->methodIdsOff);
gDexFile.pProtoIds = (DexProtoId*) ((u4) gDexFile.baseAddr + pHeader->protoIdsOff);
gDexFile.pClassDefs = (DexClassDef*) ((u4) gDexFile.baseAddr + pHeader->classDefsOff);
char sanitized[sizeof(pHeader->magic) * 2 + 1];
asciify(sanitized, pHeader->magic, sizeof(pHeader->magic));
LOGE("pHeader->magic = %s\n", sanitized);
LOGE("pHeader->stringIdsOff = 0x%x\n", pHeader->stringIdsOff);
LOGE("pHeader->stringIdsSize = 0x%x\n", pHeader->stringIdsSize);
判断是否是合法的ODex文件
static bool checkLegalODexFile(uint64_t moduleBase)
{
void* optBase = (void*) moduleBase;
DexOptHeader* dexOptHeader = (DexOptHeader*) (optBase);
DexHeader* dexHeader = (DexHeader*) ((u4)optBase + sizeof(DexOptHeader));
if (!strcmp((char*) dexOptHeader->magic, "dey\n036\0")
&& !strcmp((char*) dexHeader->magic, "dex\n035\0"))
{
return true;
}
return false;
}
这里我们简单的输出几个值进行对比
11-14 14:26:22.224 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: Current line : 8d60d000-8d665000 r--p 00000000 fe:20 7113 /data/dalvik-cache/data@app@com.wnagzihxa1n.demo-2.apk@classes.dex
11-14 14:26:22.224 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: baseStart = 0x8d60d000, baseEnd = 0x8d665000
11-14 14:26:22.234 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: moduleBase is : 0x8d60d000, moduleSize is : 0x58000
11-14 14:26:22.234 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: Check ODexFile successed
11-14 14:26:22.244 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: pHeader->magic = dex\n035\0
11-14 14:26:22.244 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: pHeader->stringIdsOff = 0x70
11-14 14:26:22.244 9943-9943/com.wnagzihxa1n.demo E/wnagzihxa1n: pHeader->stringIdsSize = 0x721
关于输出Magic Number,可以参考DexDump.cpp的源码
static void asciify(char* out, const unsigned char* data, size_t len)
{
while (len--) {
if (*data < 0x20) {
switch (*data) {
case '\0':
*out++ = '\\';
*out++ = '0';
break;
case '\n':
*out++ = '\\';
*out++ = 'n';
break;
default:
*out++ = '.';
break;
}
} else if (*data >= 0x80) {
*out++ = '?';
} else {
*out++ = *data;
}
data++;
}
*out = '\0';
}
简单输出字段数据进行对比
如果对的上那说明已经成功定位Dex文件映射到内存中的地址了
然后我们可以根据拿到的地址进行类,方法,指令,以及其余数据的索引操作
首先我们来写一个方法,比如很简单的a + b
public class MainActivity extends Activity {
EditText editText1, editText2;
Button button;
// Used to load the 'native-lib' library on application startup.
static {
System.loadLibrary("native-lib");
}
public void getSum(int a, int b) {
int add = a + b;
int sub = a - b;
Toast.makeText(MainActivity.this, "add = " + add + "\nsub = " + sub, Toast.LENGTH_LONG).show();
}
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
editText1 = (EditText) findViewById(R.id.editText1);
editText2 = (EditText) findViewById(R.id.editText2);
button = (Button) findViewById(R.id.button);
button.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View view) {
getSum(Integer.valueOf(editText1.getText().toString()), Integer.valueOf(editText2.getText().toString()));
}
});
}
}
简单先测试
然后我们使用IDA观察对应的指令,稍微整理了下
CODE:0001C1D0 0090 0605 add-int v0, a, b
CODE:0001C1D4 0191 0605 sub-int v1, a, b
同时在运行时读取指令,这个代码比较复杂,有一些函数直接从DexDump.cpp里面拷贝,首先是获取指定类结构数据,其中有三个方法dexGetClassDef
,dexGetClassData
和dexReadAndVerifyClassData
都是Dalvik虚拟机里面自带的,所以直接调用并进行遍历所有类即可,最后返回指定类数据
static DexClassData* getClassData(DexFile* pDexFile, char* pClassName)
{
size_t classDefCount = pDexFile->pHeader->classDefsSize;
for (size_t classDefIndex = 0; classDefIndex < classDefCount; classDefIndex++)
{
const DexClassDef* pClassDef = dexGetClassDef(pDexFile, classDefIndex);
const u1* pEncodedData = dexGetClassData(pDexFile, pClassDef);
const DexClassData* pClassData = dexReadAndVerifyClassData(&pEncodedData, NULL);
char* descriptor = getTypeIdString(pDexFile, pClassDef->classIdx);
if (strcmp(descriptor, pClassName) == 0)
{
LOGE("Found Class %s\n", pClassName);
return (DexClassData*) pClassData;
}
}
return NULL;
}
方法数据的获取,依旧是靠遍历
static const DexCode* getCode(const DexFile* pDexFile, const DexClassData* pClassData, const char* pMethodName)
{
size_t directMethodSize = pClassData->header.directMethodsSize;
size_t virtualMethodSize = pClassData->header.virtualMethodsSize;
for (int directMethodIndex = 0; directMethodIndex < directMethodSize; ++directMethodIndex)
{
int idx = pClassData->directMethods[directMethodIndex].methodIdx;
const DexCode* pCode = dexGetCode(pDexFile, &pClassData->directMethods[directMethodIndex]);
if (strcmp(getString(pDexFile, pDexFile->pMethodIds[idx].nameIdx), pMethodName) == 0)
{
return pCode;
}
}
for (int virtualMethodIndex = 0; virtualMethodIndex < virtualMethodSize; ++virtualMethodIndex)
{
int idx = pClassData->virtualMethods[virtualMethodIndex].methodIdx;
const DexCode* pCode = dexGetCode(pDexFile, &pClassData->virtualMethods[virtualMethodIndex]);
if (strcmp(getString(pDexFile, pDexFile->pMethodIds[idx].nameIdx), pMethodName) == 0)
{
return pCode;
}
}
return NULL;
}
调用的时候,简单做一下判断即可
static const DexCode* getClassMethodCode(const DexFile* pDexFile, const char* pClassName, const char* pMethodName)
{
LOGE("Finding %s -> %s\n", pClassName, pMethodName);
const DexClassData* pClassData = getClassData(pDexFile, pClassName);
if (pClassData == NULL)
{
return NULL;
}
const DexCode* pCode = getCode(pDexFile, pClassData, pMethodName);
if (pCode == NULL)
{
return NULL;
}
dumpDexCode(pCode);
return pCode;
}
跑起来,可以看到成功获取到指定方法的指令
11-17 10:45:41.784 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: Current line : 8d5b4000-8d60d000 r--p 00000000 fe:20 7113 /data/dalvik-cache/data@app@com.wnagzihxa1n.demo-2.apk@classes.dex
11-17 10:45:41.784 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: baseStart = 0x8d5b4000, baseEnd = 0x8d60d000
11-17 10:45:41.794 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: moduleBase is : 0x8d5b4000, moduleSize is : 0x59000
11-17 10:45:41.804 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: Check ODexFile successed
11-17 10:45:41.804 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: Finding Lcom/wnagzihxa1n/demo/MainActivity; -> getSum
11-17 10:45:41.814 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: Found Class Lcom/wnagzihxa1n/demo/MainActivity;
11-17 10:45:41.814 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: registers : 7
11-17 10:45:41.824 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: ins : 3
11-17 10:45:41.834 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: outs : 3
11-17 10:45:41.834 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: insns size : 44 16-bit code units
11-17 10:45:41.834 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 0090 0605 0191 0605 0222 006c 1070 0307
11-17 10:45:41.834 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 0002 031b 0277 0000 20f8 003b 0032 020c
11-17 10:45:41.834 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 20f8 0036 0002 020c 031b 0002 0000 20f8
11-17 10:45:41.844 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 003b 0032 020c 20f8 0036 0012 020c 10f8
11-17 10:45:41.854 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 0007 0002 020c 1312 3071 02d8 0324 020c
11-17 10:45:41.854 2522-2522/com.wnagzihxa1n.demo E/wnagzihxa1n: 10f8 0019 0002 000e
那么定位到指令,我们就可以通过修改内存可读写的属性进行指令的修改,这里有两个点:第一个是00和01这个表示寄存器编号,第二个是90和91表示opcode,分别表示add-int和sub-int,我们把加和减对调
if (pCode == NULL)
{
LOGE("Get Method getSum ins failed\n");
return;
}
if (mprotect((void*) moduleBase, moduleSize, PROT_READ | PROT_WRITE | PROT_EXEC) == 0)
{
pCode->insns[0] = 0x0091;
pCode->insns[2] = 0x0190;
dumpDexCode(pCode);
mprotect((void*) moduleBase, moduleSize, PROT_READ | PROT_EXEC);
}
然后修改结果如下
11-19 15:40:54.804 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: Current line : 8d593000-8d5ec000 r--p 00000000 fe:20 7113 /data/dalvik-cache/data@app@com.wnagzihxa1n.demo-2.apk@classes.dex
11-19 15:40:54.804 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: baseStart = 0x8d593000, baseEnd = 0x8d5ec000
11-19 15:40:54.834 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: moduleBase is : 0x8d593000, moduleSize is : 0x59000
11-19 15:40:54.834 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: Check ODexFile successed
11-19 15:40:54.904 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: Finding Lcom/wnagzihxa1n/demo/MainActivity; -> getSum
11-19 15:40:55.034 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: Found Class Lcom/wnagzihxa1n/demo/MainActivity;
11-19 15:40:55.074 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: registers : 7
11-19 15:40:55.074 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: ins : 3
11-19 15:40:55.074 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: outs : 3
11-19 15:40:55.074 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: insns size : 44 16-bit code units
11-19 15:40:55.074 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0090 0605 0191 0605 0222 006c 1070 0307
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0002 031b 0277 0000 20f8 003b 0032 020c
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 20f8 0036 0002 020c 031b 0002 0000 20f8
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 003b 0032 020c 20f8 0036 0012 020c 10f8
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0007 0002 020c 1312 3071 02d8 0324 020c
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 10f8 0019 0002 000e
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: registers : 7
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: ins : 3
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: outs : 3
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: insns size : 44 16-bit code units
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0091 0605 0190 0605 0222 006c 1070 0307
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0002 031b 0277 0000 20f8 003b 0032 020c
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 20f8 0036 0002 020c 031b 0002 0000 20f8
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 003b 0032 020c 20f8 0036 0012 020c 10f8
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 0007 0002 020c 1312 3071 02d8 0324 020c
11-19 15:40:55.084 7193-7193/com.wnagzihxa1n.demo E/wnagzihxa1n: 10f8 0019 0002 000e
直观的效果
这个点其实可以做很多的文章,比如最简单的就是反静态分析,比如跑起来之后再修改某些指令,或者替换某些指令,再比如可以把所有方法指令全都抽空,然后动态运行的时候全部填充回去,再进一步,可以运行到这个方法的时候把指令填充回去
References
- Dalvik字节码自篡改原理及实现:https://bbs.pediy.com/thread-211331.htm
- Android安全分析挑战:运行时篡改Dalvik字节码:https://bbs.pediy.com/thread-170381.htm
- 运行时自篡改dalvik字节码delta.apk原理解析(逆向):https://bbs.pediy.com/thread-176732.htm
- apk自我保护的一种实现方式——运行时自篡改dalvik指令:http://blog.csdn.net/freshui/article/details/13620647