TMSDK1.0 native crash 分析


问题堆栈

*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
Build fingerprint: 'Huawei/MT7-CL00/hwmt7:5.1.1/HuaweiMT7-CL00/C92B314:user/release-keys'
Revision: '0'
ABI: 'arm'
pid: 20696, tid: 23196, name: AntiVirus_Scan  >>> com.huawei.systemmanager <<<
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0xdeadcab1
    r0 9cc87d71  r1 9cc87d71  r2 00000000  r3 deadcab1
    r4 0282bb88  r5 9cb66000  r6 0d4ef2dc  r7 0d4ef288
    r8 b8212c60  r9 b8b0cb50  sl 138739a0  fp ffffffff
    ip 9ccbbf40  sp 0d4ef248  lr 9cc87d89  pc 9cc87d76  cpsr 60000030
    d0  0000000000000000  d1  0000000000000000
    d2  0000200300492d7e  d3  0057e11100000419
    d4  0000141a00002004  d5  0000200500582fc8
    d6  005979d200000095  d7  000019ee00002000
    d8  0000000000000000  d9  0000000000004000
    d10 0000000000000000  d11 0000000000000000
    d12 0000000000000000  d13 0000000000000000
    d14 0000000000000000  d15 0000000000000000
    d16 0000000000000000  d17 0000000000000fff
    d18 0000000000000000  d19 0000000000000000
    d20 70f2e43070f2e430  d21 70f2e43070f2e430
    d22 70f2e43070f2e430  d23 70f2e43070f2e430
    d24 70f2e43070f2e430  d25 70f2e43070f2e430
    d26 70f2e43070f2e430  d27 70f2e43070f2e430
    d28 70f2e43070f2e430  d29 70f2e43070f2e430
    d30 70f2e43070f2e430  d31 70f2e43070f2e430
    scr 60000013

backtrace:
    #00 pc 0001fd76  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #01 pc 0001fd87  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #02 pc 0001fde3  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #03 pc 0001f503  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #04 pc 0001ee87  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so ndk/sources/cxx-stl/gabi++/src/cxxabi.cc:271 call_terminate(&header->unwindHeader);
    #05 pc 0001ef3f  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so ndk/sources/cxx-stl/gabi++/src/cxxabi.cc:335 throwException(header);
    #06 pc 0001fab7  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so (operator new(unsigned int)+50)ndk/sources/cxx-stl/gabi++/src/new.cc:105 throw std::bad_alloc();
    #07 pc 0000651f  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #08 pc 0000662f  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #09 pc 0000daf7  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #10 pc 0000dc75  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #11 pc 0000dce9  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #12 pc 0001cd87  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so ams/AMS_BlueShark/src/ApkInfo.cpp:278 (discriminator 1)vector<string>& constantList constantList.resize(constantSize);
    #13 pc 00019285  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so ams/AMS_BlueShark/src/MalwareScanner.cpp:488
    #14 pc 00019515  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so ams/AMS_BlueShark/src/MalwareScanner.cpp:904
    #15 pc 0000bd87  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so (Java_com_tencent_tmsecure_module_qscanner_AmScanner_scanApkBytes+366)AmsDemo/AmsDemo_BlueShark/jni/com_tencent_tmsecure_module_qscanner_AmScann.cpp127int ret = scanner->ScanApk(apkKey, result);
    #16 pc 000707bf  /data/dalvik-cache/arm/system@framework@com.huawei.systemmanager.separated.jar@classes.dex

另一问题堆栈

*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
Build fingerprint: 'Huawei/MT7-CL00/hwmt7:5.1.1/HuaweiMT7-CL00/C92B312:user/release-keys'
Revision: '0'
ABI: 'arm'
pid: 25450, tid: 18688, name: AntiVirusScanTa  >>> com.huawei.systemmanager <<<
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0xdeadcab1
    r0 9d481d71  r1 9d481d71  r2 00000000  r3 deadcab1
    r4 9b28cfb0  r5 9cc1f000  r6 9d4ae467  r7 051fe3f8
    r8 b94bd498  r9 b958f0e8  sl 337488e0  fp ffffffff
    ip 9d4b5f40  sp 051fe3a0  lr 9d481d89  pc 9d481d76  cpsr 60000030
    d0  0000000000000000  d1  0000000000000000
    d2  646e612e6d6f6374  d3  6175682e64696f79
    d4  20646574656c6564  d5  20444e412030203d
    d6  696e6167724f7369  d7  524f20313d72657a
    d8  42c8000041c80000  d9  0000000000004000
    d10 0000000000000000  d11 0000000000000000
    d12 0000000000000000  d13 0000000000000000
    d14 0000000000000000  d15 0000000000000000
    d16 0000000000000000  d17 0000000000000fff
    d18 0000000000000000  d19 0000000000000000
    d20 6f4293e86f4293e8  d21 6f4293e86f4293e8
    d22 6f4293e86f4293e8  d23 6f4293e86f4293e8
    d24 6f4293e86f4293e8  d25 6f4293e86f4293e8
    d26 6f4293e86f4293e8  d27 6f4293e86f4293e8
    d28 6f4293e86f4293e8  d29 6f4293e86f4293e8
    d30 6f4293e86f4293e8  d31 6f4293e86f4293e8
    scr 20000013

backtrace:
    #00 pc 0001fd76  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #01 pc 0001fd87  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #02 pc 0001fde3  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #03 pc 0001f503  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #04 pc 0001ee87  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #05 pc 0001ef3f  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so
    #06 pc 0001fab7  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so (operator new(unsigned int)+50)
    #07 pc 0001faf7  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so (operator new[](unsigned int)+2)
    #08 pc 0001c0c1  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so ams/AMS_BlueShark/src/ApkInfo.cpp:389
    #09 pc 00019591  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so ams/AMS_BlueShark/src/MalwareScanner.cpp:925
    #10 pc 0000bd87  /system/app/HwSystemManager/lib/arm/libams-1.1.4-mfr.so (Java_com_tencent_tmsecure_module_qscanner_AmScanner_scanApkBytes+366)
    #11 pc 000707bf  /data/dalvik-cache/arm/system@framework@com.huawei.systemmanager.separated.jar@classes.dex

分析

两个问题的堆栈的后半部分是相同的,可能是同一问题导致的,通过addr2line,分析可知,后面这部分堆栈是位于stl库函数,因为该模块时静态包含了c++的模板库。 跟踪堆栈最初从应用自己的代码到STL库的代码两个堆栈并不相同,一个是

AmsError ApkInfo::ParseClassList(vector<string>& classList) {
    ASSERT_NOT_NULL_RETURN(mDexFile, _AmsErrGeneral);

    int classCount = mDexFile->GetHeader()->classDefsSize;
    classList.resize(classCount);
    for (int i = 0; i < classCount; i++) {

第五行的vectorresize函数。 一个是 cpp u1* buff = new u1[uncompLen+1]; 其中u1unsigned char类型。

虽然两个地方的调用的函数不同,但最终都是调用到了STL的new函数。按照初步的理解new函数失败应该返回NULL而不是出现tombstone。

进一步根据堆栈分析new的源码

_GABIXX_WEAK
void* operator new(std::size_t size) throw(std::bad_alloc) {
  void* space;
  do {
    space = malloc(size);
    if (space) {
      return space;
    }
    new_handler handler = std::get_new_handler();
    if (handler == NULL) {
      throw std::bad_alloc();
    }
    handler();
  } while (space == 0);
  __builtin_unreachable();
}

可见,函数在申请内存失败后,判断了是否有exception处理函数,如果有,走异常处理函数,否则抛出std::bad_alloc异常。由此可知,出现该tombstone的原因为没有处理new失败的异常。

进一步验证这个想法,自己写一个程序测试。程序如下

#include &lt;stdio.h&gt;
#include &lt;new&gt;
int main(void){
    printf("hello native world\n");
    throw std::bad_alloc();
    return 0;
}

出错之后的堆栈如下

Build fingerprint: 'google/shamu/shamu:5.1/LMY47I/1767468:user/release-keys'
Revision: '33696'
ABI: 'arm'
pid: 18951, tid: 18951, name: tim  >>> tim <<<
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0xdeadcab1
    r0 b6fdf7f1  r1 b6fdf7f1  r2 00000000  r3 deadcab1
    r4 b6438058  r5 b6f0e000  r6 00000001  r7 bea0ba60
    r8 00000000  r9 00000000  sl 00000000  fp bea0ba9c
    ip b6fe4fb8  sp bea0ba28  lr b6fdf809  pc b6fdf7f6  cpsr 60070030
    d0  0000000000000000  d1  0000000000000000
    d2  0000000000000000  d3  0000000000000000
    d4  3032373920303237  d5  3834343639343220
    d6  3438363239323820  d7  3820353732393220
    d8  0000000000000000  d9  0000000000000000
    d10 0000000000000000  d11 0000000000000000
    d12 0000000000000000  d13 0000000000000000
    d14 0000000000000000  d15 0000000000000000
    d16 0000000000000000  d17 0000000000000fff
    d18 0000000000000000  d19 0000000000000000
    d20 0000000000000000  d21 0000000000000000
    d22 0000000000000000  d23 0000000000000000
    d24 0000000000000000  d25 0000000000000000
    d26 0000000000000000  d27 0000000000000000
    d28 0000000000000000  d29 0000000000000000
    d30 0000000000000000  d31 0000000000000000
    scr 00000000

backtrace:
    #00 pc 000017f6  /system/bin/tim (__gabixx::__default_terminate()+5)
    #01 pc 00001807  /system/bin/tim (__gabixx::__terminate(void (*)())+6)
    #02 pc 0000183b  /system/bin/tim (std::terminate()+6)
    #03 pc 00001073  /system/bin/tim (__cxxabiv1::call_terminate(_Unwind_Control_Block*)+6)
    #04 pc 00000d73  /system/bin/tim ((anonymous namespace)::throwException(__cxxabiv1::__cxa_exception*)+38) ndk/sources/cxx-stl/gabi++/src/cxxabi.cc:271
    #05 pc 00000e2f  /system/bin/tim (__cxa_throw+22)
    #06 pc 00000b87  /system/bin/tim (main+38)
    #07 pc 00012df5  /system/lib/libc.so (__libc_init+44)
    #08 pc 00000b1c  /system/bin/tim (_start+88)

可见和出错异常的堆栈处理从33行以上是相同的,验证了上面两种问题都是new出错之后没有处理异常导致的。

修改方法

捕捉new操作的异常并处理异常

#include &lt;stdio.h&gt;
#include &lt;new&gt;
int main(void){
    printf("hello native world\n");
    try{
        throw std::bad_alloc();
    }catch(std::bad_alloc& ba){
        printf("std::bad_alloc\n");
    }
    printf("bye world\n");
    return 0;
}

运行结果

root@shamu:/ # tim
tim
hello native world
std::bad_alloc
bye world
root@shamu:/ #

可见成功的捕捉到了异常。

遗留问题

代码中每一处涉及到使用new的地方都会有问题,包括系统库调用的函数,比如上面的vector。一一排查难度较大。


Copyright © FengGuangtu 2017