OLLVM学习

OLLVM学习

LLVM

LLVM简介

  LLVM(Low Level Virtual Machine)是苹果公司的开源编译器框架, 包含包括Clang在内的一系列编译相关工具, 于2000年左右开发, LLVM/Clang从XCode8起作为XCode默认编译器, LLVM作为以下语言的开发工具链: C, C++, Objective-C, Swift, Ruby, Python, Haskell, Rust, D, PHP, Pure, Lua, Julia. 相比同样庞大但臃肿的GCC, LLVM的模块化设计更利于扩展和维护, 因此LLVM取代GCC是必然趋势. LLVM包含如下组件:

  • Clang, 用于做C/C++/Objective-C的编译前端
  • LLDB, 调试器
  • libc++, 提供c++基础库
  • compiler-rt
  • MLIR
  • OpenMP
  • libclc
  • klee
  • LLD 链接器
  • BOLT

第三方:

  • rustc, 用于rust的编译前端
  • swiftc, 用于swift的编译前端
  • codon, 用于python的编译前端

历史更新功能点:

  • 由Chris Lattner于2000创建
  • LLVM1.0(2003), 首次公开发布
  • LLVM3.0(2012), 引入了新的JIT编译器, 支持C++11, 基于SSA的内存安全转换, 全局ISel重构
  • LLVM3.7(2015), 支持OpenMP3.1, Clang Static Analyzer增强,AArch64支持
  • LLVM5.0(2016), 支持C++14, 引入了新的代码分析和优化技术
  • LLVM9.0(2019), 支持C++17, JIT支持WebAssembly, 优化RISC-V, 优化IR
  • LLVM12.0(2021), 支持C++20, 引入LTO优化, 支持arm64e

XCode与LLVM版本对应:

XCode LLVM
11.x 11
12.x 12
13.x 13
14.x 14
15.x 15

LLVM IR

IR简介

  IR(Intermediate Representation), 是一种LLVM定义的介于源码和汇编的中间语言, 语法类似于汇编. IR主要用于解决跨平台编译的问题, 同时也能解决优化/混淆/扩展问题. IR手册 https://llvm.org/docs/LangRef.html, 以下是IR相关的命令:

  • llc 将bitcode转换为asm/obj
  • lld 将多个bitcode/obj编译为二进制
  • lli bitcode解释器
  • opt 优化bitcode
  • llvm-ar 操作archive
  • llvm-as 将ll转换为bitcode, ll为人类可读字节码格式
  • llvm-cxxfilt c++修饰名转普通
  • llvm-dis bitcode转ll
  • llvm-extract 从bitcode提取函数
  • llvm-link 将多个bitcode合并为一个bitcode
  • clang -emit-llvm -c 源码编译为bitcode
  • clang -emit-llvm -S 源码编译为ll

第三方:

  • swiftc -emit-assembly /tmp/1.swift -o /tmp/1.bc Swift源码编译为汇编
  • swiftc -emit-bc /tmp/1.swift -o /tmp/1.bc Swift源码编译为bitcode
  • swiftc -emit-ir /tmp/1.swift -o /tmp/1.ll Swift源码编译为ll
  • cargo rustc -- --emit=asmrustc --emit=asm 1.rs Rust源码编译为汇编
  • cargo rustc -- --emit=llvm-bcrustc --emit=llvm-bc 1.rs Rust源码编译为bitcode
  • cargo rustc -- --emit=llvm-irrustc --emit=llvm-ir 1.rs Rust源码编译为ll
  • codon build -llvm 1.py Python源码编译为ll

测试用例:

// 1.cpp
#include <stdio.h>
int main(int argc, char** argv) {
  printf("Hello World!\n");
  return 0;
}

源码交叉编译为bitcode/ll

# for MacOS x86_64
./clang -isysroot `xcrun --sdk macosx --show-sdk-path` -arch x86_64 -emit-llvm -c /tmp/1.cpp --output=/tmp/1.bc
./clang -isysroot `xcrun --sdk macosx --show-sdk-path` -arch x86_64 -emit-llvm -S /tmp/1.cpp --output=/tmp/1.ll
# 如果要用XCode自带clang需使用xcrun, 以下同, 不建议用XCode clang, 因为不同版本Clang/llc/lld/lli互相不兼容, 且XCode不提供llc/lld/lli
xcrun --sdk macosx clang -arch x86_64 -emit-llvm -c /tmp/1.cpp --output=/tmp/1.bc
; ModuleID = '/tmp/1.cpp'
source_filename = "/tmp/1.cpp"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx11.3.0"

@.str = private unnamed_addr constant [14 x i8] c"Hello World!\0A\00", align 1

; Function Attrs: mustprogress noinline norecurse optnone ssp uwtable
define noundef i32 @main(i32 noundef %0, ptr noundef %1) #0 {
  %3 = alloca i32, align 4
  %4 = alloca i32, align 4
  %5 = alloca ptr, align 8
  store i32 0, ptr %3, align 4
  store i32 %0, ptr %4, align 4
  store ptr %1, ptr %5, align 8
  %6 = call i32 (ptr, ...) @printf(ptr noundef @.str)
  ret i32 0
}

declare i32 @printf(ptr noundef, ...) #1

attributes #0 = { mustprogress noinline norecurse optnone ssp uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 3]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 8, !"PIC Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"clang version 19.0.0git"}
# for iOS arm64
./clang -isysroot `xcrun --sdk iphoneos --show-sdk-path` -arch arm64 -emit-llvm -c /tmp/1.cpp --output=/tmp/1.bc
./clang -isysroot `xcrun --sdk iphoneos --show-sdk-path` -arch arm64 -emit-llvm -S /tmp/1.cpp --output=/tmp/1.ll
; ModuleID = '/tmp/1.cpp'
source_filename = "/tmp/1.cpp"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-ios14.5.0"

@.str = private unnamed_addr constant [14 x i8] c"Hello World!\0A\00", align 1

; Function Attrs: mustprogress noinline norecurse optnone ssp uwtable(sync)
define noundef i32 @main(i32 noundef %0, ptr noundef %1) #0 {
  %3 = alloca i32, align 4
  %4 = alloca i32, align 4
  %5 = alloca ptr, align 8
  store i32 0, ptr %3, align 4
  store i32 %0, ptr %4, align 4
  store ptr %1, ptr %5, align 8
  %6 = call i32 (ptr, ...) @printf(ptr noundef @.str)
  ret i32 0
}

declare i32 @printf(ptr noundef, ...) #1

attributes #0 = { mustprogress noinline norecurse optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-a7" "target-features"="+aes,+fp-armv8,+neon,+perfmon,+sha2,+v8a,+zcm,+zcz" }
attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-a7" "target-features"="+aes,+fp-armv8,+neon,+perfmon,+sha2,+v8a,+zcm,+zcz" }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 14, i32 5]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 8, !"PIC Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 1}
!4 = !{i32 7, !"frame-pointer", i32 1}
!5 = !{!"clang version 19.0.0git"}; ModuleID = '/tmp/1.cpp'
source_filename = "/tmp/1.cpp"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-ios14.5.0"

@.str = private unnamed_addr constant [14 x i8] c"Hello World!\0A\00", align 1

; Function Attrs: mustprogress noinline norecurse optnone ssp uwtable(sync)
define noundef i32 @main(i32 noundef %0, ptr noundef %1) #0 {
  %3 = alloca i32, align 4
  %4 = alloca i32, align 4
  %5 = alloca ptr, align 8
  store i32 0, ptr %3, align 4
  store i32 %0, ptr %4, align 4
  store ptr %1, ptr %5, align 8
  %6 = call i32 (ptr, ...) @printf(ptr noundef @.str)
  ret i32 0
}

declare i32 @printf(ptr noundef, ...) #1

attributes #0 = { mustprogress noinline norecurse optnone ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-a7" "target-features"="+aes,+fp-armv8,+neon,+perfmon,+sha2,+v8a,+zcm,+zcz" }
attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-a7" "target-features"="+aes,+fp-armv8,+neon,+perfmon,+sha2,+v8a,+zcm,+zcz" }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 14, i32 5]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 8, !"PIC Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 1}
!4 = !{i32 7, !"frame-pointer", i32 1}
!5 = !{!"clang version 19.0.0git"}

bitcode/ll编译为asm/obj

./llc --filetype=asm /tmp/1.ll -o /tmp/1.asm
./llc --filetype=obj /tmp/1.ll -o /tmp/1.obj
./llc --filetype=asm /tmp/1.bc -o /tmp/1.asm
./llc --filetype=obj /tmp/1.bc -o /tmp/1.obj
	.section	__TEXT,__text,regular,pure_instructions
	.build_version ios, 14, 5	sdk_version 14, 5
	.globl	_main                           ; -- Begin function main
	.p2align	2
_main:                                  ; @main
	.cfi_startproc
; %bb.0:
	sub	sp, sp, #32
	stp	x29, x30, [sp, #16]             ; 16-byte Folded Spill
	add	x29, sp, #16
	.cfi_def_cfa w29, 16
	.cfi_offset w30, -8
	.cfi_offset w29, -16
	stur	wzr, [x29, #-4]
	str	w0, [sp, #8]
	str	x1, [sp]
	adrp	x0, l_.str@PAGE
	add	x0, x0, l_.str@PAGEOFF
	bl	_printf
	mov	w0, #0                          ; =0x0
	ldp	x29, x30, [sp, #16]             ; 16-byte Folded Reload
	add	sp, sp, #32
	ret
	.cfi_endproc
                                        ; -- End function
	.section	__TEXT,__cstring,cstring_literals
l_.str:                                 ; @.str
	.asciz	"Hello World!\n"

.subsections_via_symbols

bitcode编译为可执行程序:

lld是通用程序, 不同平台需要调用不同二进制

  • Unix: ld.lld
  • macOS: ld64.lld
  • Windows: lld-link
  • WebAssembly: wasm-ld
./ld64.lld -arch arm64 -platform_version ios 12.0 14.5 -dylib /tmp/1.bc -o /tmp/1.exe

运行bitcode

./lli /tmp/1.ll
./lli /tmp/1.bc
# 均输出"Hello World!"

IR指令

Instruction
UnaryInstruction        一元指令
  UnaryOperator         一元操作
  CastInst              强制转换
    PossiblyNonNegInst  非负指令
BinaryOperator          二进制操作
  PossiblyDisjointInst
CmpInst                 比较操作
CallBase                调用操作
FuncletPadInst    

                    Super
AllocaInst          UnaryInstruction    An instruction to allocate memory on the stack.
LoadInst            UnaryInstruction    An instruction for reading from memory. This uses the SubclassData
                                        field in Value to store whether or not the load is volatile.
StoreInst           Instruction         An instruction for storing to memory.
FenceInst           Instruction         An instruction for ordering other memory operations.
AtomicCmpXchgInst   Instruction         An instruction that atomically checks whether a specified value 
                                        is in a memory location, and, if it is, stores a new value there. 
                                        The value returned by this instruction is a pair containing the 
                                        original value as first element, and an i1 indicating success 
                                        (true) or failure (false) as second element.
AtomicRMWInst       Instruction         An instruction that atomically reads a memory location, combines 
                                        it with another value, and then stores the result back.  Returns 
                                        the old value.
GetElementPtrInst   Instruction         An instruction for type-safe pointer arithmetic to access elements 
                                        of arrays and structs
ICmpInst            CmpInst             This instruction compares its operands according to the predicate 
                                        given to the constructor. It only operates on integers or pointers. 
                                        The operands must be identical types. Represent an integer comparison 
                                        operator.
FCmpInst            CmpInst             This instruction compares its operands according to the predicate 
                                        given to the constructor. It only operates on floating point values 
                                        or packed vectors of floating point values. The operands must be 
                                        identical types. Represents a floating point comparison operator.
CallInst            CallBase            This class represents a function call, abstracting a target machine's 
                                        calling convention. This class uses low bit of the SubClassData
                                        field to indicate whether or not this is a tail call. The rest 
                                        of the bits hold the calling convention of the call.
SelectInst          Instruction         This class represents the LLVM 'select' instruction.
VAArgInst           UnaryInstruction    This class represents the va_arg llvm instruction, which returns 
                                        an argument of the specified type given a va_list and increments 
                                        that list
ExtractElementInst  Instruction         This instruction extracts a single (scalar) element from a VectorType value
InsertElementInst   Instruction         This instruction inserts a single (scalar) element into a VectorType value
ShuffleVectorInst   Instruction         This instruction constructs a fixed permutation of two input vectors. 
                                        For each element of the result vector, the shuffle mask selects an 
                                        element from one of the input vectors to copy to the result. 
                                        Non-negative elements in the mask represent an index into the 
                                        concatenated pair of input vectors. PoisonMaskElem (-1) specifies 
                                        that the result element is poison. For scalable vectors, all the 
                                        elements of the mask must be 0 or -1. This requirement may be 
                                        relaxed in the future.
ExtractValueInst    UnaryInstruction    This instruction extracts a struct member or array element value 
                                        from an aggregate value.
InsertValueInst     Instruction         This instruction inserts a struct field of array element value 
                                        into an aggregate value.
PHINode             Instruction         PHINode - The PHINode class is used to represent the magical mystical 
                                        PHI node, that can not exist in nature, but can be synthesized in a 
                                        computer scientist's overactive imagination.
LandingPadInst      Instruction         The landingpad instruction holds all of the information necessary 
                                        to generate correct exception handling. The landingpad instruction 
                                        cannot be moved from the top of a landing pad block, which itself 
                                        is accessible only from the 'unwind' edge of an invoke. This uses 
                                        the SubclassData field in Value to store whether or not the landingpad 
                                        is a cleanup.
ReturnInst          Instruction         Return a value (possibly void), from a function. Execution does 
                                        not continue in this function any longer.
BranchInst          Instruction         Conditional or Unconditional Branch instruction.
SwitchInst          Instruction         Multiway switch.
IndirectBrInst      Instruction         Indirect Branch Instruction.
InvokeInst          CallBase            Invoke instruction. The SubclassData field is used to hold the 
                                        calling convention of the call.
CallBrInst          CallBase            CallBr instruction, tracking function calls that may not return 
                                        control but instead transfer it to a third location. The SubclassData 
                                        field is used to hold the calling convention of the call.
ResumeInst          Instruction         Resume the propagation of an exception.
CatchSwitchInst     Instruction
CleanupPadInst      FuncletPadInst
CatchPadInst        FuncletPadInst
CatchReturnInst     Instruction
CleanupReturnInst   Instruction
UnreachableInst     Instruction         This function has undefined behavior. In particular, the presence 
                                        of this instruction indicates some higher level knowledge that 
                                        the end of the block cannot be reached.
TruncInst           CastInst            This class represents a truncation of integer types.
ZExtInst            CastInst            This class represents zero extension of integer types.
SExtInst            CastInst            This class represents a sign extension of integer types.
FPTruncInst         CastInst            This class represents a truncation of floating point types.
FPExtInst           CastInst            This class represents an extension of floating point types.
UIToFPInst          CastInst            This class represents a cast unsigned integer to floating point.
SIToFPInst          CastInst            This class represents a cast from signed integer to floating point.
FPToUIInst          CastInst            This class represents a cast from floating point to unsigned integer.
FPToSIInst          CastInst            This class represents a cast from floating point to signed integer.
IntToPtrInst        CastInst            This class represents a cast from an integer to a pointer.
PtrToIntInst        CastInst            This class represents a cast from a pointer to an integer.
BitCastInst         CastInst            This class represents a no-op cast from one type to another.
AddrSpaceCastInst   CastInst            This class represents a conversion between pointers from one address
                                        space to another.
FreezeInst          UnaryInstruction    This class represents a freeze function that returns random concrete 
                                        value if an operand is either a poison value or an undef value

LLVM Pass

简介

  LLVM Pass是LLVM提供的用于优化/分析/处理IR的组件, 第三方可以自由开发Pass从而干涉编译过程, 实现代码优化/静态分析/代码混淆. 上一节学习了CMake的基本用法, 现在来用CMake实现最简单的LLVM Pass. 笔者的环境仍是MacOS. 需要注意的是LLVM Pass从LLVM版本支持可分为Legacy Pass和New Pass, 前者是历史遗留, 具体兼容性如下表.

LLVM 默认 可选
5-12 LegacyPassManager -fexperimental-new-pass-manager 启用New, 该功能有限
13-14 NewPassManager -flegacy-pass-manager 启用 Legacy
15-? NewPassManager

   Pass从类型可分为FunctionPass, ModulePass, LoopPass, RegionPass, MachineFunctionPass, AnalysePass, CallGraphSCCPass等, FunctionPass用于做函数层面的操作, ModulePass用于做模块层面的操作(模块包括函数, 全局变量等, 所以也可以包含FunctionPass的功能), AnalysePass主要做性能测试/压力测试/调优/分析/日志等.

  • 一个项目由多个Module构成, 一个Module约等于一个存在函数实体的文件, 头文件或者被包含的文件不算
  • 一个Module由多个Function/GlobalVariable/GlobalAlias/GlobalIFunc/NamedMDNode构成, 前两个最常用
  • 一个Function由多个BasicBlock构成, 继承过程: Function - GlobalObject - GlobalValue - Constant - User - Value
  • 一个BasicBlock由多个Instruction构成, 每个BasicBlock都有一个结束指令, 继承过程: BasicBlock - Value
  • 一个Instruction即为一条IR指令, 最终会编译为一条或多条汇编指令, 继承过程: Instruction - User - Value
  • GlobalVariable继承过程: GlobalVariable - GlobalObject - GlobalValue - Constant - User - Value

什么情况下使用LLVM Pass?   如第一篇所述, LLVM衍生出众多编译器前端, 如clang, swiftc, rustc等. 目前第三方代码嵌入LLVM有如下三种方式:

  • 动态Pass方式, 前端运行时动态加载Pass, 开发成本最低
  • 静态Pass方式, 编译时静态链接Pass
  • 修改LLVM源码强行嵌入, 是大部分Ollvm采用的方式; 如果编译器前端不支持Pass则是唯一选择

AppleClang的Pass   AppleClang, 即XCode自带的Clang, 苹果因为安全性考虑阉割掉了LLVM Pass, 因此常规方法并不能加载起来. 当然如果如果逆向技术过关, 也很容易将Pass改为兼容AppleClang的.

编码

  只需要demo.cppCMakeLists.txt两个文件. 因为LLVM的版本较多, 网上开源的LLVM Pass项目只支持部分版本, 笔者根据刚学习的CMake将其改造为兼容LLVM8-18.

项目地址:https://github.com/lich4/llvm-pass-hikari

demo.cpp

#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Pass.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Support/raw_ostream.h"
#if LLVM_VERSION_MAJOR <= 15
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#endif

#include "llvm/IRReader/IRReader.h"
#include "llvm/Transforms/Utils/Cloning.h"
using namespace llvm;

#include <iostream>

#define PASSNAME          "MyPassDemo"

#if LLVM_VERSION_MAJOR <= 13
#define getPtElemType getPointerElementType
#else
#define getPtElemType getNonOpaquePointerElementType
#endif

// ---------------- Legacy Pass ---------------- //
class MyPassDemoLegacy : public FunctionPass {
public:
    static char ID;
    MyPassDemoLegacy() : FunctionPass(ID) {}
    virtual bool runOnFunction(Function& F) override {
        errs() << "MyPassDemoLegacy\n";
        return false;
    }
};
char MyPassDemoLegacy::ID = 0;
#if LLVM_VERSION_MAJOR <= 15
static RegisterStandardPasses RegisterMyPass(PassManagerBuilder::EP_EarlyAsPossible, 
    [](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
        PM.add(new MyPassDemoLegacy());
    }
);
#else
static RegisterPass<MyPassDemoLegacy> RegisterMyPass(PASSNAME, PASSNAME, false, false);
#endif
// ---------------- Legacy Pass ---------------- //

// ---------------- New Pass ---------------- //
#if LLVM_VERSION_MAJOR <= 13
#define OptimizationLevel PassBuilder::OptimizationLevel
#endif

class MyPassDemo : public PassInfoMixin<MyPassDemo> {
public:
    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
        errs() << "MyPassDemo\n";
        return PreservedAnalyses::all();
    };
    static bool isRequired() { return true; }
};

extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() {
    return {
        .APIVersion = LLVM_PLUGIN_API_VERSION,
        .PluginName = PASSNAME,
        .PluginVersion = "1.0",
        .RegisterPassBuilderCallbacks = [](PassBuilder &PB) {
            PB.registerPipelineStartEPCallback(
                [](ModulePassManager &MPM
#if LLVM_VERSION_MAJOR >= 12
                , OptimizationLevel Level
#endif
                ) {
                    MPM.addPass(MyPassDemo());
            });
            PB.registerPipelineParsingCallback(
                [](StringRef Name, ModulePassManager& MPM, ArrayRef<PassBuilder::PipelineElement>) {
                    MPM.addPass(MyPassDemo());
                    return true;
            });
        }
    };
}
// ---------------- New Pass ---------------- //

__attribute__((constructor)) void onInit() {
    printf("MyPassDemo onInit\n");
}

CMakeLists.txt

cmake_minimum_required(VERSION 3.6)
project(MyPassDemo)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_BUILD_TYPE "Debug")

find_package(LLVM REQUIRED CONFIG)            # LLVMConfig.cmake初始化环境

list(APPEND CMAKE_MODULE_PATH "${LLVM_DIR}")  # 兼容LLVM<=13

include(AddLLVM)                              # 导入add_llvm_pass_plugin函数
include(HandleLLVMOptions)

add_definitions(${LLVM_DEFINITIONS})
include_directories(${LLVM_INCLUDE_DIRS})
link_directories(${LLVM_LIBRARY_DIRS}) 

if(NOT COMMAND add_llvm_pass_plugin)          # 兼容LLVM<=9
  message(WARNING "add_llvm_pass_plugin not exist")
  function(add_llvm_pass_plugin name)         
    cmake_parse_arguments(ARG "NO_MODULE" "SUBPROJECT" "" ${ARGN})
    set(link_into_tools_default OFF)
    add_llvm_library(${name} MODULE ${ARG_UNPARSED_ARGUMENTS})
    message(STATUS "Registering ${name} as a pass plugin (static build: ${LLVM_${name_upper}_LINK_INTO_TOOLS})")
  endfunction(add_llvm_pass_plugin)
endif()

add_llvm_pass_plugin(MyPassDemo${LLVM_VERSION_MAJOR}
    demo.cpp
)

测试

编译:

export LLVM_DIR=/path/to/llvm12/build/lib/cmake/llvm
cmake -B build --fresh
cmake --build build
llvm12/build/bin/clang -isysroot `xcrun --sdk macosx --show-sdk-path` -Xclang -load -Xclang build/MyPassDemo12.dylib /tmp/1.cpp
# for LLVM<=12      Legacy Pass (等价于上面)
llvm12/build/bin/clang -isysroot `xcrun --sdk macosx --show-sdk-path` -fplugin=build/MyPassDemo12.dylib /tmp/1.cpp
# for LLVM=9/10/11 New Pass (O3生效)
llvm11/build/bin/clang -isysroot `xcrun --sdk macosx --show-sdk-path` -fexperimental-new-pass-manager -fpass-plugin=build/MyPassDemo11.dylib /tmp/1.cpp -O3
# for LLVM=12      New Pass
llvm12/build/bin/clang -isysroot `xcrun --sdk macosx --show-sdk-path` -fexperimental-new-pass-manager -fpass-plugin=build/MyPassDemo12.dylib /tmp/1.cpp
# for LLVM=13/14    Legacy Pass
llvm13/build/bin/clang -isysroot `xcrun --sdk macosx --show-sdk-path` -flegacy-pass-manager -fplugin=build/MyPassDemo13.dylib /tmp/1.cpp
# for LLVM=13/14    New Pass
llvm13/build/bin/clang -isysroot `xcrun --sdk macosx --show-sdk-path` -fpass-plugin=build/MyPassDemo13.dylib /tmp/1.cpp
# for LLVM>=15      New Pass
llvm15/build/bin/clang -isysroot `xcrun --sdk macosx --show-sdk-path` -fpass-plugin=build/MyPassDemo15.dylib /tmp/1.cpp

opt支持ll需要llvm>=15

llvm15/build/bin/opt --O3 -S -o /tmp/test_new.ll /tmp/test.ll 
llvm15/build/bin/opt -load-pass-plugin build/MyPassDemo15.dylib -passes all -S -o /tmp/test_new.ll /tmp/test.ll 

经过测试可以发现, Pass的LLVM版本需要和LLVM大版本一致, 否则也会产生错误, 但总体来说Pass解决了LLVM编译代码量过大的问题, 所以仍然值得采用, 测试结果: hello MyPassDemo

Pass加载时机

// EP_Peephole                  PeepholeEPCallbacks
void registerPeepholeEPCallback(const std::function<void(FunctionPassManager&, OptimizationLevel)>&);
// EP_LoopOptimizerEnd          LateLoopOptimizationsEPCallbacks
void registerLateLoopOptimizationsEPCallback(const std::function<void(LoopPassManager&, OptimizationLevel)>&);
// EP_LateLoopOptimizations     LoopOptimizerEndEPCallbacks
void registerLoopOptimizerEndEPCallback(const std::function<void(LoopPassManager&, OptimizationLevel)>&);
// EP_ScalarOptimizerLate       ScalarOptimizerLateEPCallbacks
void registerScalarOptimizerLateEPCallback(const std::function<void(FunctionPassManager&, OptimizationLevel)>&);
// EP_CGSCCOptimizerLate        CGSCCOptimizerLateEPCallbacks
void registerCGSCCOptimizerLateEPCallback(const std::function<void(CGSCCPassManager&, OptimizationLevel)>&);
// EP_VectorizerStart           VectorizerStartEPCallbacks
void registerVectorizerStartEPCallback(const std::function<void(FunctionPassManager&, OptimizationLevel)>&);
// EP_EarlyAsPossible           PipelineStartEPCallbacks
void registerPipelineStartEPCallback(const std::function<void(ModulePassManager&, OptimizationLevel)>&);
// EP_ModuleOptimizerEarly      PipelineEarlySimplificationEPCallbacks
void registerPipelineEarlySimplificationEPCallback(const std::function<void(ModulePassManager&, OptimizationLevel)>&);
// 
void registerOptimizerEarlyEPCallback(const std::function<void(ModulePassManager&, OptimizationLevel)>&);
// EP_OptimizerLast             OptimizerLastEPCallbacks
void registerOptimizerLastEPCallback(const std::function<void(ModulePassManager&, OptimizationLevel)>&);
//
void registerFullLinkTimeOptimizationEarlyEPCallback(const std::function<void(ModulePassManager&, OptimizationLevel)>&);
//
void registerFullLinkTimeOptimizationLastEPCallback(const std::function<void(ModulePassManager&, OptimizationLevel)>&);

使用helloworld测试EP顺序:

  • Debug: PipelineStart -> PipelineEarlySimplification -> OptimizerLast
  • Release: PipelineStart -> PipelineEarlySimplification -> Peephole -> Peephole -> Peephole -> ScalarOptimizerLate -> Peephole -> VectorizerStart -> OptimizerLast

OLLVM

现存OLLVM项目汇总

  之前提到LLVM的架构优于Gcc, 因此可以在其上可以更好的实现编译器层级的代码混淆. 这就是ollvm(obfuscated-llvm). 笔者第一次接触到ollvm是在2017年做漏洞挖掘的时候. 下图为写这篇文章时现存的开源ollvm项目

  • 最早的OLLVM

https://github.com/obfuscator-llvm/obfuscator, 是瑞士西北应用科技大学安全实验室于2010年6月份发起的一个项目, 支持LLVM3.3-4.0. 之后出现呢的所有ollvm项目均基于该项目开发. 该项目首次提出LLVM方式进行“控制流伪造”(BCF),“控制流平坦化”(FLA),“指令替换”(SUB)的代码混淆方式.

  • Hikari

https://github.com/HikariObfuscator/Hikari, 2018年开始的Ollvm项目, 支持LLVM6.0-8.0, 与obfuscator相比增加了如下功能:

  • OC混淆, 混淆Objective-C类名及selector, 防止你优雅的逆向, 看一眼函数就知道干嘛的了
  • 函数包装, 将一个函数调用变成深层嵌套函数, 功能不强, 可以恶心一些小白
  • 字符串加密, 通过异或的方式加密字符串到数据区, 首次使用时解密
  • 间接分支, 将跳转地址改为跳转寄存器, 大大增强FLA难度
  • 拆分基本块, 功能也很强, 但容易产生崩溃
  • 函数调用混淆, 把函数调用改成dlopen+dlsym的动态调用
  • Hikari-LLVM15

https://github.com/61bcdefg/Hikari-LLVM15, 2022年开始的替代Hikari的项目, Hikari原作者不开发了. 支持LLVM15+, 与Hikari相比增加了如下功能:

  • 支持Swift
  • 反调试
  • 反Hook
  • 常量加密
  • 支持arm64e
  • Pluto

https://github.com/bluesadi/Pluto, 2021年开始的Ollvm项目, 支持LLVM14, 与obfuscator相比增加了如下功能:

  • 增强的FLA
  • 增强BCF为“随机控制流”(RCF)
  • 反Angr, Angr是一种利用符号执行动态分析Ollvm的方式
  • MBA(Mixed Boolean-Arithmetic)混淆, 将一个常量表达式用一系列运算代替, 算伪造指令流, 但比花指令高级多了

  混合布尔算术(Mixed Boolean Arithmetic)是2007年提出的一种混淆算法, 这种算法由算数运算(例如ADD/SUB/MUL)和布尔运算(例如AND/OR/NOT)的混合使用组成

  • goron

https://github.com/amimo/goron, 2019年开始的Ollvm项目, 支持LLVM7/8/9/10, 与obfuscator相比增加了如下功能:

  • 间接调用, 将引用的函数地址变换后存到数据区, 再动态调用
  • 间接全局变量, 将引用的变量地址变换后存到数据区, 再动态调用
  • Arkari

https://github.com/KomiMoe/Arkari, 2022年开始的基于goron的项目, goron原作者不开发了. 支持LLVM14+, 功能和goron一致

  • xVMP

https://github.com/GANGE666/xVMP, 用LLVM实现的VMP, 支持LLVM8, 只支持Debug不支持Release

  • 其他Ollvm

https://github.com/DeNA/DeClang, 用于Swift混淆, 不支持C++
https://github.com/open-obfuscator/o-mvll, 基于LLVMPass, 支持Python, 扩展性较强
https://github.com/25077667/VMPilot, 也是基于LLVM的VMP

  关于基于Ollvm的VMP, iOS上因为有内存保护, 非越狱无法动态更改指令, 因此iOS上的VMP只能沦为IR解释器, 因此其解密难度比Lua/JS等解释器低得多. 所以还不如用跨语言方式去增加整体复杂度.

  就跨平台兼容性和稳定性而言, 原版Ollvm > Hikari > 其他Ollvm, 这里的不稳定指的是编译失败, 或运行时未达到混淆目的, 或运行时因为混淆导致问题; 就功能而言, Hikari > 原版Ollvm, 其他Ollvm各有特色. 可以根据自己实际情况使用

混淆控制方式

简介

  在很多实际项目中, 由于以下原因无法对整个项目完全混淆, 实际操作时, 常常需要根据业务敏感程度使用不同程度的混淆, 比如攻防模块多用一些混淆:

  • 项目较大, 依赖较多, 或使用了很多header-only的库, 混淆了很多不需要混淆的代码, 导致编译出来的二进制过大
  • 项目较大, 依赖较多, 使用了平坦化(或其他方式)混淆了很多不需要混淆的代码, 导致编译极其缓慢, Ollvm比较耗内存
  • 混淆了复杂算法, 导致运行时耗时比正常大很多, 一般使用平坦化后耗时会增加10%以上
  • 混淆过多可能不允许上架AppStore, GooglePlay等

  不同的Ollvm采用的方式大同小异, 无非是以下几种:

  • 对需要混淆的模块单独指定命令行参数, 如-llvm -fla, 这种方式兼容所有支持llvm命令行参数的编译器前端
  • 使用环境变量指定混淆参数
  • 对需要混淆的函数指定注解, 如__attribute((__annotate__(("fla"))))(新式语法[[clang::annotate("fla")]]), 这种方式仅支持C/C++, Objective-C和其他语言均不支持
  • 对需要混淆的函数指定标记函数, 如下所示, 这种方式支持Objective-C
extern void hikari_fla(void);
@implementation foo2:NSObject
+(void)foo{
  hikari_fla();
  NSLog(@"FOOOO2");
}
@end
  • 使用配置文件来指定需要混淆的函数和模块, 这种方式兼容所有编译器前端, 用于解决前几种方式搞不定的情况

现存控制方式

  • 命令行参数
static cl::opt<bool> EnableFlattening("enable-cffobf", cl::init(false),
                                      cl::NotHidden,
                                      cl::desc("Enable Flattening."));
  • 函数注解
std::string readAnnotate(Function *f) {
  std::string annotation = "";

  // Get annotation variable
  GlobalVariable *glob =
      f->getParent()->getGlobalVariable("llvm.global.annotations");

  if (glob != NULL) {
    // Get the array
    if (ConstantArray *ca = dyn_cast<ConstantArray>(glob->getInitializer())) {
      for (unsigned i = 0; i < ca->getNumOperands(); ++i) {
        // Get the struct
        if (ConstantStruct *cs = dyn_cast<ConstantStruct>(ca->getOperand(i))) {
          if (ConstantExpr *expr = dyn_cast<ConstantExpr>(cs->getOperand(0))) {
            // If it's a bitcast we can check if the annotation is concerning
            // the current function
            if (expr->getOpcode() == Instruction::BitCast && expr->getOperand(0) == f) {
              ConstantExpr *note = cast<ConstantExpr>(cs->getOperand(1));
              // If it's a GetElementPtr, that means we found the variable
              // containing the annotations
              if (note->getOpcode() == Instruction::GetElementPtr) {
                if (GlobalVariable *annoteStr =
                        dyn_cast<GlobalVariable>(note->getOperand(0))) {
                  if (ConstantDataSequential *data =
                          dyn_cast<ConstantDataSequential>(
                              annoteStr->getInitializer())) {
                    if (data->isString()) {
                      annotation += data->getAsString().lower() + " ";
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
  return annotation;
}
  • 标记函数
bool readFlag(Function *f, std::string attribute) {
  for (inst_iterator I = inst_begin(f); I != inst_end(f); I++) {
    Instruction *Inst = &*I;
    if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
      if (CI->getCalledFunction() != nullptr &&
          CI->getCalledFunction()->getName().contains("hikari_" + attribute)) {
        CI->eraseFromParent();
        return true;
      }
    }
  }
  return false;
}

控制编译器优化

  由于现代编译器的卓越能力, 一些混淆手段很可能在Release下被还原导致实际未混淆, 这种情况下Ollvm项目以Debug编译反而能达到效果, 而笔者认为正确的解决方式为, 在静态区构造特殊数据, 用于关联待混淆常量及函数, 并将特殊数据指定为used避免被优化. 以下是Clang支持的针对函数和变量优化的语法, 对函数关闭优化可以同时防止内联, 对变量关闭优化可以防止其被优化成常量.

  • __attribute__((optnone)) 对函数关闭优化 (如果是Gcc可以指定优化等级)
  • #pragma clang optimize off #pragma clang optimize on 对区间内的函数关闭优化
  • volatile 对变量关闭优化
  • __attribute__((used)) 对全局变量关闭优化

字符串加密模块分析

Hikari源码

void HandleFunction(Function *Func)
{
    FixFunctionConstantExpr(Func);
    SmallVector<GlobalVariable *, 32> Globals;
    std::set<User *> Users;
    {
        std::unordered_set<User *> VisitedUsers;
        for (Instruction &I : instructions(Func))
            HandleUser(&I, Globals, Users, VisitedUsers);
    }
    std::set<GlobalVariable *> rawStrings;
    std::set<GlobalVariable *> objCStrings;
    std::unordered_map<GlobalVariable *,
                       std::pair<Constant *, GlobalVariable *>>
        GV2Keys;
    std::unordered_map<GlobalVariable * /*old*/,
                       std::pair<GlobalVariable * /*encrypted*/,
                                 GlobalVariable * /*decrypt space*/>>
        old2new;

    auto end = Globals.end();
    for (auto it = Globals.begin(); it != end; ++it)
    {
        end = std::remove(it + 1, end, *it);
    }
    Globals.erase(end, Globals.end());

    Module *M = Func->getParent();

    SmallVector<GlobalVariable *, 32> transedGlobals, unhandleablegvs;

    do
    {
        for (GlobalVariable *GV : Globals)
        {
            if (std::find(transedGlobals.begin(), transedGlobals.end(), GV) ==
                transedGlobals.end())
            {
                bool breakThisFor = false;
                if (handleableGV(GV))
                {
                    if (GlobalVariable *CastedGV = dyn_cast<GlobalVariable>(
                            GV->getInitializer()->stripPointerCasts()))
                    {
                        if (std::find(Globals.begin(), Globals.end(), CastedGV) ==
                            Globals.end())
                        {
                            Globals.emplace_back(CastedGV);
                            ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer());
                            Users.insert(CE ? CE : GV->getInitializer());
                            breakThisFor = true;
                        }
                    }
                    if (GV->getInitializer()->getType() ==
                        StructType::getTypeByName(M->getContext(),
                                                  "struct.__NSConstantString_tag"))
                    {
                        objCStrings.insert(GV);
                        rawStrings.insert(cast<GlobalVariable>(
                            cast<ConstantStruct>(GV->getInitializer())
                                ->getOperand(2)
                                ->stripPointerCasts()));
                    }
                    else if (isa<ConstantDataSequential>(GV->getInitializer()))
                    {
                        rawStrings.insert(GV);
                    }
                    else if (ConstantAggregate *CA =
                                 dyn_cast<ConstantAggregate>(GV->getInitializer()))
                    {
                        processConstantAggregate(GV, CA, &rawStrings, &unhandleablegvs,
                                                 &Globals, &Users, &breakThisFor);
                    }
                }
                else
                {
                    unhandleablegvs.emplace_back(GV);
                }
                transedGlobals.emplace_back(GV);
                if (breakThisFor)
                    break;
            }
        } // foreach loop
    } while (transedGlobals.size() != Globals.size());
    for (GlobalVariable *ugv : unhandleablegvs)
        if (std::find(genedgv.begin(), genedgv.end(), ugv) != genedgv.end())
        {
            std::pair<Constant *, GlobalVariable *> mgv2keysval = mgv2keys[ugv];
            if (ugv->getInitializer()->getType() ==
                StructType::getTypeByName(M->getContext(),
                                          "struct.__NSConstantString_tag"))
            {
                GlobalVariable *rawgv =
                    cast<GlobalVariable>(cast<ConstantStruct>(ugv->getInitializer())
                                             ->getOperand(2)
                                             ->stripPointerCasts());
                mgv2keysval = mgv2keys[rawgv];
                if (mgv2keysval.first && mgv2keysval.second)
                {
                    GV2Keys[rawgv] = mgv2keysval;
                }
            }
            else if (mgv2keysval.first && mgv2keysval.second)
            {
                GV2Keys[ugv] = mgv2keysval;
            }
        }
    for (GlobalVariable *GV : rawStrings)
    {
        if (GV->getInitializer()->isZeroValue() ||
            GV->getInitializer()->isNullValue())
            continue;
        ConstantDataSequential *CDS =
            dyn_cast<ConstantDataSequential>(GV->getInitializer());
        bool rust_string = !CDS;
        if (rust_string)
            CDS = cast<ConstantDataSequential>(
                cast<ConstantAggregate>(GV->getInitializer())->getOperand(0));
        Type *ElementTy = CDS->getElementType();
        if (!ElementTy->isIntegerTy())
        {
            continue;
        }
        IntegerType *intType = cast<IntegerType>(ElementTy);
        Constant *KeyConst, *EncryptedConst, *DummyConst = nullptr;
        unencryptedindex[GV] = {};
        if (intType == Type::getInt8Ty(M->getContext()))
        {
            std::vector<uint8_t> keys, encry, dummy;
            for (unsigned i = 0; i < CDS->getNumElements(); i++)
            {
                if (cryptoutils->get_range(100) >= ElementEncryptProbTemp)
                {
                    unencryptedindex[GV].emplace_back(i);
                    keys.emplace_back(1);
                    dummy.emplace_back(CDS->getElementAsInteger(i));
                    continue;
                }
                const uint8_t K = cryptoutils->get_uint8_t();
                const uint64_t V = CDS->getElementAsInteger(i);
                keys.emplace_back(K);
                encry.emplace_back(K ^ V);
                dummy.emplace_back(cryptoutils->get_uint8_t());
            }
            KeyConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint8_t>(keys));
            EncryptedConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint8_t>(encry));
            DummyConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint8_t>(dummy));
        }
        else if (intType == Type::getInt16Ty(M->getContext()))
        {
            std::vector<uint16_t> keys, encry, dummy;
            for (unsigned i = 0; i < CDS->getNumElements(); i++)
            {
                if (cryptoutils->get_range(100) >= ElementEncryptProbTemp)
                {
                    unencryptedindex[GV].emplace_back(i);
                    keys.emplace_back(1);
                    dummy.emplace_back(CDS->getElementAsInteger(i));
                    continue;
                }
                const uint16_t K = cryptoutils->get_uint16_t();
                const uint64_t V = CDS->getElementAsInteger(i);
                keys.emplace_back(K);
                encry.emplace_back(K ^ V);
                dummy.emplace_back(cryptoutils->get_uint16_t());
            }
            KeyConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint16_t>(keys));
            EncryptedConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint16_t>(encry));
            DummyConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint16_t>(dummy));
        }
        else if (intType == Type::getInt32Ty(M->getContext()))
        {
            std::vector<uint32_t> keys, encry, dummy;
            for (unsigned i = 0; i < CDS->getNumElements(); i++)
            {
                if (cryptoutils->get_range(100) >= ElementEncryptProbTemp)
                {
                    unencryptedindex[GV].emplace_back(i);
                    keys.emplace_back(1);
                    dummy.emplace_back(CDS->getElementAsInteger(i));
                    continue;
                }
                const uint32_t K = cryptoutils->get_uint32_t();
                const uint64_t V = CDS->getElementAsInteger(i);
                keys.emplace_back(K);
                encry.emplace_back(K ^ V);
                dummy.emplace_back(cryptoutils->get_uint32_t());
            }
            KeyConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint32_t>(keys));
            EncryptedConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint32_t>(encry));
            DummyConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint32_t>(dummy));
        }
        else if (intType == Type::getInt64Ty(M->getContext()))
        {
            std::vector<uint64_t> keys, encry, dummy;
            for (unsigned i = 0; i < CDS->getNumElements(); i++)
            {
                if (cryptoutils->get_range(100) >= ElementEncryptProbTemp)
                {
                    unencryptedindex[GV].emplace_back(i);
                    keys.emplace_back(1);
                    dummy.emplace_back(CDS->getElementAsInteger(i));
                    continue;
                }
                const uint64_t K = cryptoutils->get_uint64_t();
                const uint64_t V = CDS->getElementAsInteger(i);
                keys.emplace_back(K);
                encry.emplace_back(K ^ V);
                dummy.emplace_back(cryptoutils->get_uint64_t());
            }
            KeyConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint64_t>(keys));
            EncryptedConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint64_t>(encry));
            DummyConst =
                ConstantDataArray::get(M->getContext(), ArrayRef<uint64_t>(dummy));
        }
        else
        {
            llvm_unreachable("Unsupported CDS Type");
        }
        // Prepare new rawGV
        GlobalVariable *EncryptedRawGV = new GlobalVariable(
            *M, EncryptedConst->getType(), false, GV->getLinkage(),
            EncryptedConst, "EncryptedString", nullptr, GV->getThreadLocalMode(),
            GV->getType()->getAddressSpace());
        genedgv.emplace_back(EncryptedRawGV);
        GlobalVariable *DecryptSpaceGV;
        if (rust_string)
        {
            ConstantAggregate *CA = cast<ConstantAggregate>(GV->getInitializer());
            CA->setOperand(0, DummyConst);
            DecryptSpaceGV = new GlobalVariable(
                *M, GV->getValueType(), false, GV->getLinkage(), CA,
                "DecryptSpaceRust", nullptr, GV->getThreadLocalMode(),
                GV->getType()->getAddressSpace());
        }
        else
        {
            DecryptSpaceGV = new GlobalVariable(
                *M, DummyConst->getType(), false, GV->getLinkage(), DummyConst,
                "DecryptSpace", nullptr, GV->getThreadLocalMode(),
                GV->getType()->getAddressSpace());
        }
        genedgv.emplace_back(DecryptSpaceGV);
        old2new[GV] = std::make_pair(EncryptedRawGV, DecryptSpaceGV);
        GV2Keys[DecryptSpaceGV] = std::make_pair(KeyConst, EncryptedRawGV);
        mgv2keys[DecryptSpaceGV] = GV2Keys[DecryptSpaceGV];
        unencryptedindex[KeyConst] = unencryptedindex[GV];
    }
    // Now prepare ObjC new GV
    for (GlobalVariable *GV : objCStrings)
    {
        ConstantStruct *CS = cast<ConstantStruct>(GV->getInitializer());
        GlobalVariable *oldrawString =
            cast<GlobalVariable>(CS->getOperand(2)->stripPointerCasts());
        if (old2new.find(oldrawString) ==
            old2new.end()) // Filter out zero initializers
            continue;
        GlobalVariable *EncryptedOCGV = ObjectiveCString(
            GV, "EncryptedStringObjC", old2new[oldrawString].first, CS);
        genedgv.emplace_back(EncryptedOCGV);
        GlobalVariable *DecryptSpaceOCGV = ObjectiveCString(
            GV, "DecryptSpaceObjC", old2new[oldrawString].second, CS);
        genedgv.emplace_back(DecryptSpaceOCGV);
        old2new[GV] = std::make_pair(EncryptedOCGV, DecryptSpaceOCGV);
    } // End prepare ObjC new GV
    if (GV2Keys.empty())
        return;
    // Replace Uses
    for (User *U : Users)
    {
        for (std::unordered_map<
                 GlobalVariable *,
                 std::pair<GlobalVariable *, GlobalVariable *>>::iterator iter =
                 old2new.begin();
             iter != old2new.end(); ++iter)
        {
            if (isa<Constant>(U) && !isa<GlobalValue>(U))
            {
                Constant *C = cast<Constant>(U);
                for (Value *Op : C->operands())
                    if (Op == iter->first)
                    {
                        C->handleOperandChange(iter->first, iter->second.second);
                        break;
                    }
            }
            else
                U->replaceUsesOfWith(iter->first, iter->second.second);
            iter->first->removeDeadConstantUsers();
        }
    } // End Replace Uses
    // CleanUp Old ObjC GVs
    for (GlobalVariable *GV : objCStrings)
    {
        GlobalVariable *PtrauthGV = nullptr;
        if (appleptrauth)
        {
            Constant *C = dyn_cast_or_null<Constant>(
                opaquepointers
                    ? GV->getInitializer()
                    : cast<ConstantExpr>(GV->getInitializer()->getOperand(0)));
            if (C)
            {
                PtrauthGV = dyn_cast<GlobalVariable>(C->getOperand(0));
                if (PtrauthGV->getSection() == "llvm.ptrauth")
                {
                    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(
                            PtrauthGV->getInitializer()->getOperand(2)))
                    {
                        if (GlobalVariable *GV2 =
                                dyn_cast<GlobalVariable>(CE->getOperand(0)))
                        {
                            if (GV->getNumUses() <= 1 &&
                                GV2->getGlobalIdentifier() == GV->getGlobalIdentifier())
                                PtrauthGV->getInitializer()->setOperand(
                                    2, ConstantExpr::getPtrToInt(
                                           M->getGlobalVariable(
                                               "__CFConstantStringClassReference"),
                                           Type::getInt64Ty(M->getContext())));
                        }
                    }
                    else if (GlobalVariable *GV2 = dyn_cast<GlobalVariable>(
                                 PtrauthGV->getInitializer()->getOperand(2)))
                        if (GV->getNumUses() <= 1 &&
                            GV2->getGlobalIdentifier() == GV->getGlobalIdentifier())
                            PtrauthGV->getInitializer()->setOperand(
                                2, ConstantExpr::getPtrToInt(
                                       M->getGlobalVariable(
                                           "__CFConstantStringClassReference"),
                                       Type::getInt64Ty(M->getContext())));
                }
            }
        }
        GV->removeDeadConstantUsers();
        if (GV->getNumUses() == 0)
        {
            GV->dropAllReferences();
            old2new.erase(GV);
            GV->eraseFromParent();
        }
        if (PtrauthGV)
        {
            PtrauthGV->removeDeadConstantUsers();
            if (PtrauthGV->getNumUses() == 0)
            {
                PtrauthGV->dropAllReferences();
                PtrauthGV->eraseFromParent();
            }
        }
    }
    // CleanUp Old Raw GVs
    for (std::unordered_map<
             GlobalVariable *,
             std::pair<GlobalVariable *, GlobalVariable *>>::iterator iter =
             old2new.begin();
         iter != old2new.end(); ++iter)
    {
        GlobalVariable *toDelete = iter->first;
        toDelete->removeDeadConstantUsers();
        if (toDelete->getNumUses() == 0)
        {
            toDelete->dropAllReferences();
            toDelete->eraseFromParent();
        }
    }
    GlobalVariable *StatusGV = encstatus[Func];
    /*
       - Split Original EntryPoint BB into A and C.
       - Create new BB as Decryption BB between A and C. Adjust the terminators
         into: A (Alloca a new array containing all)
               |
               B(If not decrypted)
               |
               C
     */
    BasicBlock *A = &(Func->getEntryBlock());
    BasicBlock *C = A->splitBasicBlock(A->getFirstNonPHIOrDbgOrLifetime());
    C->setName("PrecedingBlock");
    BasicBlock *B =
        BasicBlock::Create(Func->getContext(), "StringDecryptionBB", Func, C);
    // Change A's terminator to jump to B
    // We'll add new terminator to jump C later
    BranchInst *newBr = BranchInst::Create(B);
    ReplaceInstWithInst(A->getTerminator(), newBr);
    // Insert DecryptionCode
    HandleDecryptionBlock(B, C, GV2Keys);
    IRBuilder<> IRB(A->getFirstNonPHIOrDbgOrLifetime());
    // Add atomic load checking status in A
    LoadInst *LI = IRB.CreateLoad(StatusGV->getValueType(), StatusGV,
                                  "LoadEncryptionStatus");
    LI->setAtomic(
        AtomicOrdering::Acquire); // Will be released at the start of C
    LI->setAlignment(Align(4));
    Value *condition = IRB.CreateICmpEQ(
        LI, ConstantInt::get(Type::getInt32Ty(Func->getContext()), 0));
    A->getTerminator()->eraseFromParent();
    BranchInst::Create(B, C, condition, A);
    // Add StoreInst atomically in C start
    // No matter control flow is coming from A or B, the GVs must be decrypted
    StoreInst *SI =
        new StoreInst(ConstantInt::get(Type::getInt32Ty(Func->getContext()), 1),
                      StatusGV, C->getFirstNonPHIOrDbgOrLifetime());
    SI->setAlignment(Align(4));
    SI->setAtomic(AtomicOrdering::Release); // Release the lock acquired in LI
} // End of HandleFunction

void HandleDecryptionBlock(
    BasicBlock *B, BasicBlock *C,
    std::unordered_map<GlobalVariable *,
                       std::pair<Constant *, GlobalVariable *>> &GV2Keys)
{
    IRBuilder<> IRB(B);
    Value *zero = ConstantInt::get(Type::getInt32Ty(B->getContext()), 0);
    for (std::unordered_map<GlobalVariable *,
                            std::pair<Constant *, GlobalVariable *>>::iterator
             iter = GV2Keys.begin();
         iter != GV2Keys.end(); ++iter)
    {
        bool rust_string =
            !isa<ConstantDataSequential>(iter->first->getInitializer());
        ConstantAggregate *CA =
            rust_string ? cast<ConstantAggregate>(iter->first->getInitializer())
                        : nullptr;
        Constant *KeyConst = iter->second.first;
        ConstantDataArray *CastedCDA = cast<ConstantDataArray>(KeyConst);
        // Prevent optimization of encrypted data
        appendToCompilerUsed(*iter->second.second->getParent(),
                             {iter->second.second});
        // Element-By-Element XOR so the fucking verifier won't complain
        // Also, this hides keys
        uint64_t realkeyoff = 0;
        for (uint64_t i = 0; i < CastedCDA->getType()->getNumElements(); i++)
        {
            if (unencryptedindex[KeyConst].size() &&
                std::find(unencryptedindex[KeyConst].begin(),
                          unencryptedindex[KeyConst].end(),
                          i) != unencryptedindex[KeyConst].end())
                continue;
            Value *offset =
                ConstantInt::get(Type::getInt64Ty(B->getContext()), realkeyoff);
            Value *offset2 = ConstantInt::get(Type::getInt64Ty(B->getContext()), i);
            Value *EncryptedGEP =
                IRB.CreateGEP(iter->second.second->getValueType(),
                              iter->second.second, {zero, offset});
            Value *DecryptedGEP =
                rust_string
                    ? IRB.CreateGEP(
                          CA->getOperand(0)->getType(),
                          IRB.CreateGEP(
                              CA->getType(), iter->first,
                              {zero, ConstantInt::getNullValue(
                                         Type::getInt64Ty(B->getContext()))}),
                          {zero, offset2})
                    : IRB.CreateGEP(iter->first->getValueType(), iter->first,
                                    {zero, offset2});
            LoadInst *LI = IRB.CreateLoad(CastedCDA->getElementType(), EncryptedGEP,
                                          "EncryptedChar");
            Value *XORed = IRB.CreateXor(LI, CastedCDA->getElementAsConstant(i));
            IRB.CreateStore(XORed, DecryptedGEP);
            realkeyoff++;
        }
    }
    IRB.CreateBr(C);
}

Hikari测试

#include <stdio.h>
int main(int argc, char** argv) {
	puts("hello");
	return 0;
}

未混淆IR

@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1

define i32 @main(i32 %argc, i8** %argv) #0 {
entry:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %call = call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0))
  ret i32 0
}
declare i32 @puts(i8*) #1

已混淆IR

@0 = private global i32 0
@EncryptedString = private global [6 x i8] c"*d\D2\15-A"
@DecryptSpace = private global [6 x i8] c"\A7\F1\D9*\82\C8"
@llvm.compiler.used = appending global [1 x i8*] [i8* getelementptr inbounds ([6 x i8], [6 x i8]* @EncryptedString, i32 0, i32 0)], section "llvm.metadata"

define i32 @main(i32 %argc, i8** %argv) #0 {
entry:
  %LoadEncryptionStatus = load atomic i32, i32* @0 acquire, align 4
  %0 = icmp eq i32 %LoadEncryptionStatus, 0
  br i1 %0, label %StringDecryptionBB, label %PrecedingBlock

StringDecryptionBB:                               ; preds = %entry
  %EncryptedChar = load i8, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @EncryptedString, i32 0, i32 0)
  %1 = xor i8 %EncryptedChar, 66
  store i8 %1, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @DecryptSpace, i32 0, i32 0)
  %EncryptedChar1 = load i8, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @EncryptedString, i32 0, i32 1)
  %2 = xor i8 %EncryptedChar1, 1
  store i8 %2, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @DecryptSpace, i32 0, i32 1)
  %EncryptedChar2 = load i8, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @EncryptedString, i32 0, i32 2)
  %3 = xor i8 %EncryptedChar2, -66
  store i8 %3, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @DecryptSpace, i32 0, i32 2)
  %EncryptedChar3 = load i8, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @EncryptedString, i32 0, i32 3)
  %4 = xor i8 %EncryptedChar3, 121
  store i8 %4, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @DecryptSpace, i32 0, i32 3)
  %EncryptedChar4 = load i8, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @EncryptedString, i32 0, i32 4)
  %5 = xor i8 %EncryptedChar4, 66
  store i8 %5, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @DecryptSpace, i32 0, i32 4)
  %EncryptedChar5 = load i8, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @EncryptedString, i32 0, i32 5)
  %6 = xor i8 %EncryptedChar5, 65
  store i8 %6, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @DecryptSpace, i32 0, i32 5)
  br label %PrecedingBlock

PrecedingBlock:                                   ; preds = %entry, %StringDecryptionBB
  store atomic i32 1, i32* @0 release, align 4
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %7 = getelementptr inbounds [6 x i8], [6 x i8]* @DecryptSpace, i32 0, i32 0
  %call = call i32 @puts(i8* %7)
  ret i32 0
}

declare i32 @puts(i8*) #1
flowchart LR
A([entry]) --> B([StringDecryptionBB])
A([entry]) --> C([PrecedingBlock])
B([StringDecryptionBB]) --> C([PrecedingBlock])

Hikari分析

  Hikari是用异或方式将静态区字符串在编译期加密,并在函数入口处动态解密到预分配的静态区,支持C/OC字符串

  • 因为字符串属于模块范围可操作的元素而非函数, 因此需要注册为ModulePass, 入口点为runOnModule
  • 入口点使用toObfuscate判断是否需要字符串加密, HandleFunction为加密处理函数
  • HandleFunction区分出哪些全局数据是字符串, 以及哪些字符串需要混淆
  • 需要处理编译期优化, 防止静态区数据丢失, 或者混淆逻辑被还原, 混淆和优化其实是2个方向相反的过程

加解密过程:

  • 编译期Hikari将字符串异或加密并存储为可执行模块的静态数据, 预分配解密后的存储区, 对同一字符串的多个引用, Hikari会创建多份加密副本防止冲突
  • 编译期Hikari将异或解密逻辑StringDecryptionBB插入到函数入口点
  • 运行时可执行模块解密静态数据到预分配存储区, 使用LoadEncryptionStatus变量记录是否已解密, 如果未解密则执行StringDecryptionBB否则执行PrecedingBlock

优缺点:

  • 异或加密简单可靠,兼容性较强
  • 异或加密算法较简单;执行一次函数即可在静态内存取获取解密的字符串

实验:将静态字符串转换为栈字符串

  以下代码在LLVM8-18下测试, 仅使用NewPass. 注意本节只是为了验证静态转栈的可行性, 不推荐实际使用.

#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Pass.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Support/raw_ostream.h"
#if LLVM_VERSION_MAJOR <= 15
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#endif

using namespace llvm;

#define PASSNAME          "MyPassDemo"

static void doModule(Module& M);

// ---------------- New Pass ---------------- //
#if LLVM_VERSION_MAJOR <= 13
#define OptimizationLevel PassBuilder::OptimizationLevel
#endif

class MyPassDemo : public PassInfoMixin<MyPassDemo> {
public:
    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
        doModule(M);
        return PreservedAnalyses::all();
    };
    static bool isRequired() { return true; }
};

extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() {
    return {
        .APIVersion = LLVM_PLUGIN_API_VERSION,
        .PluginName = PASSNAME,
        .PluginVersion = "1.0",
        .RegisterPassBuilderCallbacks = [](PassBuilder &PB) {
            PB.registerPipelineStartEPCallback(
                [](ModulePassManager &MPM
#if LLVM_VERSION_MAJOR >= 12
                , OptimizationLevel Level
#endif
                ) {
                    MPM.addPass(MyPassDemo());
            });
            PB.registerPipelineParsingCallback(
                [](StringRef Name, ModulePassManager& MPM, ArrayRef<PassBuilder::PipelineElement>) {
                    MPM.addPass(MyPassDemo());
                    return true;
            });
        }
    };
}
// ---------------- New Pass ---------------- //
#include <vector>
class TodoItem {
public:
    Instruction*    inst;
    unsigned        idx;
    StringRef       data;     
};

void doModule(Module& M) {   
    std::vector<TodoItem> todo_list;
    auto handle_gv = [&todo_list](Instruction* I, unsigned i, GlobalVariable* GV) {
        if (GV->isConstant() && GV->hasInitializer()) {
            Constant* GVI = GV->getInitializer();
            ConstantDataArray* CDA = dyn_cast<ConstantDataArray>(GVI);
            if (CDA != 0) {
                StringRef data = CDA->getAsString(); // 如果是字符串则包括'\0'
                if (data.size() >= 2) {
                    errs() << "Add todo_list: " << data << "\n";
                    todo_list.push_back({I, i, data});
                }
            }
        }
    };
    for (Function& F : M) {
        for (BasicBlock& bb : F) {
            for (Instruction& I : bb) {
                for (unsigned i = 0; i < I.getNumOperands(); i++) {
                    Value* v = I.getOperand(i);
                    unsigned valueID = v->getValueID();
                    if (valueID == Value::GlobalVariableVal) { // LLVM>=15
                        GlobalVariable* GV = dyn_cast<GlobalVariable>(v);
                        handle_gv(&I, i, GV);
                        // @printf(ptr noundef @.str) -> @printf(ptr noundef %str)
                    } else if (valueID == Value::ConstantExprVal) { // LLVM<=14
                        ConstantExpr* CE = dyn_cast<ConstantExpr>(v);
                        unsigned op = CE->getOpcode();
                        if (op == Instruction::GetElementPtr) {
                            Value* v0 = CE->getOperand(0);
                            Value* v1 = CE->getOperand(1);
                            Value* v2 = CE->getOperand(2);
                            unsigned vID0 = v0->getValueID();
                            unsigned vID1 = v1->getValueID();
                            unsigned vID2 = v2->getValueID();
                            if (vID0 == Value::GlobalVariableVal && vID1 == Value::ConstantIntVal && vID2 == Value::ConstantIntVal ) {
                                if (dyn_cast<ConstantInt>(v1)->getSExtValue() == 0 && dyn_cast<ConstantInt>(v2)->getSExtValue() == 0) {
                                    GlobalVariable* GV = dyn_cast<GlobalVariable>(v0);
                                    handle_gv(&I, 0, GV);
                                    // @printf(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) -> @printf(i8* noundef %str)
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    for (TodoItem& item : todo_list) {
        Instruction* inst = item.inst;
        unsigned idx = item.idx;
        StringRef data = item.data;     
        BasicBlock* bb = inst->getParent();
        IRBuilder<> IRB(&bb->front());
        AllocaInst* alloca = IRB.CreateAlloca(IRB.getInt8Ty(), IRB.getInt32(data.size()));
        for (unsigned i = 0; i < data.size(); i++) {
            Value* gep = IRB.CreateConstGEP1_64(IRB.getInt8Ty(), alloca, i);
            Constant* n = ConstantInt::get(IRB.getInt8Ty(), data[i]);
            IRB.CreateStore(n, gep);
        }
        inst->setOperand(idx, alloca);
    }
}


测试

// /tmp/1.cpp
#include <stdio.h>
int main(int argc, char** argv) {
	printf("helloworld");
	return 0;
}

编译为Debug

llvm15/build/bin/clang -isysroot `xcrun --sdk iphoneos --show-sdk-path` -arch arm64 -fpass-plugin=build/MyPassDemo15.dylib -o /tmp/1.bin /tmp/1.cpp 
__text:0000000100007E8C                 SUB             X0, X29, #-var_13 ; char *
__text:0000000100007E90                 MOV             W9, #0x68
__text:0000000100007E94                 STURB           W9, [X29,#var_13]
__text:0000000100007E98                 MOV             W9, #0x65
__text:0000000100007E9C                 STURB           W9, [X29,#var_12]
__text:0000000100007EA0                 MOV             W9, #0x6C
__text:0000000100007EA4                 STURB           W9, [X29,#var_11]
__text:0000000100007EA8                 STURB           W9, [X29,#var_10]
__text:0000000100007EAC                 MOV             W10, #0x6F
__text:0000000100007EB0                 STURB           W10, [X29,#var_F]
__text:0000000100007EB4                 MOV             W11, #0x77
__text:0000000100007EB8                 STURB           W11, [X29,#var_E]
__text:0000000100007EBC                 STURB           W10, [X29,#var_D]
__text:0000000100007EC0                 MOV             W10, #0x72
__text:0000000100007EC4                 STURB           W10, [X29,#var_C]
__text:0000000100007EC8                 STURB           W9, [X29,#var_B]
__text:0000000100007ECC                 MOV             W9, #0x64
__text:0000000100007ED0                 STURB           W9, [X29,#var_A]
__text:0000000100007ED4                 STURB           WZR, [X29,#var_9]
__text:0000000100007ED8                 STR             WZR, [SP,#0x30+var_18]
__text:0000000100007EDC                 STR             W8, [SP,#0x30+var_1C]
__text:0000000100007EE0                 STR             X1, [SP,#0x30+var_28]
__text:0000000100007EE4                 BL              _printf
// IDA伪代码
int __cdecl main(int argc, const char **argv, const char **envp)
{
  // [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]

  v4 = 104;
  v5 = 101;
  v6 = 108;
  v7 = 108;
  v8 = 111;
  v9 = 119;
  v10 = 111;
  v11 = 114;
  v12 = 108;
  v13 = 100;
  printf(&v4);
  return 0;
}

注意:此结果为IDA7.0版生成,如果使用IDA7.7+则识别为strcpy

llvm15/build/bin/clang -isysroot `xcrun --sdk macosx --show-sdk-path` -fpass-plugin=build/MyPassDemo15.dylib -o /tmp/1.bin /tmp/1.cpp 
__text:0000000100003F06                 mov     [rbp+var_13], 68h
__text:0000000100003F0A                 mov     [rbp+var_12], 65h
__text:0000000100003F0E                 mov     [rbp+var_11], 6Ch
__text:0000000100003F12                 mov     [rbp+var_10], 6Ch
__text:0000000100003F16                 mov     [rbp+var_F], 6Fh
__text:0000000100003F1A                 mov     [rbp+var_E], 77h
__text:0000000100003F1E                 mov     [rbp+var_D], 6Fh
__text:0000000100003F22                 mov     [rbp+var_C], 72h
__text:0000000100003F26                 mov     [rbp+var_B], 6Ch
__text:0000000100003F2A                 mov     [rbp+var_A], 64h
__text:0000000100003F2E                 mov     [rbp+var_9], 0
__text:0000000100003F32                 mov     [rbp+var_18], 0
__text:0000000100003F39                 mov     [rbp+var_1C], edi
__text:0000000100003F3C                 mov     [rbp+var_28], rsi
__text:0000000100003F40                 lea     rdi, [rbp+var_13] ; char *
__text:0000000100003F44                 mov     al, 0
__text:0000000100003F46                 call    _printf
// IDA伪代码
int __cdecl main(int argc, const char **argv, const char **envp)
{
  // [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]

  v4 = 104;
  v5 = 101;
  v6 = 108;
  v7 = 108;
  v8 = 111;
  v9 = 119;
  v10 = 111;
  v11 = 114;
  v12 = 108;
  v13 = 100;
  v14 = 0;
  printf(&v4, argv, envp);
  result = __stack_chk_guard;
  if ( __stack_chk_guard == v15 )
    result = 0;
  return result;
}

编译为Release

llvm15/build/bin/clang -isysroot `xcrun --sdk iphoneos --show-sdk-path` -arch arm64 -fpass-plugin=build/MyPassDemo15.dylib -o /tmp/1.bin /tmp/1.cpp -O3
__text:0000000100007ED8                 LDR             D0, =0x726F776F6C6C6568
__text:0000000100007EDC                 STR             D0, [SP,#0x20+var_18]
__text:0000000100007EE0                 MOV             W8, #0x646C
__text:0000000100007EE4                 STRH            W8, [SP,#0x20+var_10]
__text:0000000100007EE8                 STRB            WZR, [SP,#0x20+var_E]
__text:0000000100007EEC                 ADD             X0, SP, #0x20+var_18 ; char *
__text:0000000100007EF0                 BL              _printf
// IDA伪代码
int __cdecl main(int argc, const char **argv, const char **envp)
{
  // [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]

  strcpy(v4, "helloworld");
  result = printf(v4, argv, envp);
  if ( __stack_chk_guard == v5 )
    result = 0;
  return result;
}

注意:此时IDA已经把上述指令集识别成内部函数的strcpy, 这里strcpy非动态库里的那个函数

llvm15/build/bin/clang -isysroot `xcrun --sdk macosx --show-sdk-path` -fpass-plugin=build/MyPassDemo15.dylib -o /tmp/1.bin /tmp/1.cpp -O3
__text:0000000100003F46                 mov     rax, 726F776F6C6C6568h
__text:0000000100003F50                 mov     qword ptr [rbp+var_18], rax
__text:0000000100003F54                 mov     [rbp+var_10], 646Ch
__text:0000000100003F5A                 mov     [rbp+var_E], 0
__text:0000000100003F5E                 lea     rdi, [rbp+var_18] ; char *
__text:0000000100003F62                 xor     eax, eax
__text:0000000100003F64                 call    _printf
// IDA伪代码
int __cdecl main(int argc, const char **argv, const char **envp)
{
  // [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]

  strcpy(v4, "helloworld");
  printf(v4, argv, envp);
  if ( __stack_chk_guard != v5 )
    __stack_chk_fail();
  return 0;
}

介入时机指定为EP_OptimizerLast,编译为Release

llvm15/build/bin/clang -isysroot `xcrun --sdk iphoneos --show-sdk-path` -arch arm64 -fpass-plugin=build/MyPassDemo15.dylib -o /tmp/1.bin /tmp/1.cpp -O3
__text:0000000100007ED4                 MOV             X8, #0x6568
__text:0000000100007ED8                 MOVK            X8, #0x6C6C,LSL#16
__text:0000000100007EDC                 MOVK            X8, #0x776F,LSL#32
__text:0000000100007EE0                 MOVK            X8, #0x726F,LSL#48
__text:0000000100007EE4                 STUR            X8, [SP,#0x20+var_13]
__text:0000000100007EE8                 MOV             W8, #0x646C
__text:0000000100007EEC                 STURH           W8, [SP,#0x20+var_B]
__text:0000000100007EF0                 STRB            WZR, [SP,#0x20+var_9]
__text:0000000100007EF4                 ADD             X0, SP, #0x20+var_13 ; char *
__text:0000000100007EF8                 BL              _printf
// IDA伪代码
int __cdecl main(int argc, const char **argv, const char **envp)
{
  // [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]

  strcpy(v4, "helloworld");
  result = printf(v4, argv, envp);
  if ( __stack_chk_guard == v5 )
    result = 0;
  return result;
}
llvm15/build/bin/clang -isysroot `xcrun --sdk macosx --show-sdk-path` -fpass-plugin=build/MyPassDemo15.dylib -o /tmp/1.bin /tmp/1.cpp -O3
__text:0000000100003F46                 mov     rax, 726F776F6C6C6568h
__text:0000000100003F50                 mov     qword ptr [rbp+var_13], rax
__text:0000000100003F54                 mov     [rbp+var_B], 646Ch
__text:0000000100003F5A                 mov     [rbp+var_9], 0
__text:0000000100003F5E                 lea     rdi, [rbp+var_13] ; char *
__text:0000000100003F62                 xor     eax, eax
__text:0000000100003F64                 call    _printf
// IDA伪代码
int __cdecl main(int argc, const char **argv, const char **envp)
{
  // [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]

  strcpy(v4, "helloworld");
  printf(v4, argv, envp);
  if ( __stack_chk_guard != v5 )
    __stack_chk_fail();
  return 0;
}

平坦化模块分析

Hikari源码

void Flattening::flatten(Function *f)
{
    SmallVector<BasicBlock *, 8> origBB;
    BasicBlock *loopEntry, *loopEnd;
    LoadInst *load;
    SwitchInst *switchI;
    AllocaInst *switchVar, *switchVarAddr;
    const DataLayout &DL = f->getParent()->getDataLayout();

    // SCRAMBLER
    std::unordered_map<uint32_t, uint32_t> scrambling_key;
    // END OF SCRAMBLER

    PassBuilder PB;
    FunctionAnalysisManager FAM;
    FunctionPassManager FPM;
    PB.registerFunctionAnalyses(FAM);
    FPM.addPass(LowerSwitchPass());
    FPM.run(*f, FAM);

    for (BasicBlock &BB : *f)
    {
        if (BB.isEHPad() || BB.isLandingPad())
        {
            errs() << f->getName()
                   << " Contains Exception Handing Instructions and is unsupported "
                      "for flattening in the open-source version of Hikari.\n";
            return;
        }
        if (!isa<BranchInst>(BB.getTerminator()) &&
            !isa<ReturnInst>(BB.getTerminator()))
            return;
        origBB.emplace_back(&BB);
    }

    // Nothing to flatten
    if (origBB.size() <= 1)
        return;

    // Remove first BB
    origBB.erase(origBB.begin());

    // Get a pointer on the first BB
    Function::iterator tmp = f->begin();
    BasicBlock *insert = &*tmp;

    // If main begin with an if
    BranchInst *br = nullptr;
    if (isa<BranchInst>(insert->getTerminator()))
        br = cast<BranchInst>(insert->getTerminator());

    if ((br && br->isConditional()) ||
        insert->getTerminator()->getNumSuccessors() > 1)
    {
        BasicBlock::iterator i = insert->end();
        --i;

        if (insert->size() > 1)
        {
            --i;
        }

        BasicBlock *tmpBB = insert->splitBasicBlock(i, "first");
        origBB.insert(origBB.begin(), tmpBB);
    }

    // Remove jump
    Instruction *oldTerm = insert->getTerminator();

    // Create switch variable and set as it
    switchVar = new AllocaInst(Type::getInt32Ty(f->getContext()),
                               DL.getAllocaAddrSpace(), "switchVar", oldTerm);
    switchVarAddr =
        new AllocaInst(Type::getInt32Ty(f->getContext())->getPointerTo(),
                       DL.getAllocaAddrSpace(), "", oldTerm);

    // Remove jump
    oldTerm->eraseFromParent();

    new StoreInst(ConstantInt::get(Type::getInt32Ty(f->getContext()),
                                   cryptoutils->scramble32(0, scrambling_key)),
                  switchVar, insert);
    new StoreInst(switchVar, switchVarAddr, insert);

    // Create main loop
    loopEntry = BasicBlock::Create(f->getContext(), "loopEntry", f, insert);
    loopEnd = BasicBlock::Create(f->getContext(), "loopEnd", f, insert);

    load = new LoadInst(switchVar->getAllocatedType(), switchVar, "switchVar",
                        loopEntry);

    // Move first BB on top
    insert->moveBefore(loopEntry);
    BranchInst::Create(loopEntry, insert);

    // loopEnd jump to loopEntry
    BranchInst::Create(loopEntry, loopEnd);

    BasicBlock *swDefault =
        BasicBlock::Create(f->getContext(), "switchDefault", f, loopEnd);
    BranchInst::Create(loopEnd, swDefault);

    // Create switch instruction itself and set condition
    switchI = SwitchInst::Create(&*f->begin(), swDefault, 0, loopEntry);
    switchI->setCondition(load);

    // Remove branch jump from 1st BB and make a jump to the while
    f->begin()->getTerminator()->eraseFromParent();

    BranchInst::Create(loopEntry, &*f->begin());

    // Put BB in the switch
    for (BasicBlock *i : origBB)
    {
        ConstantInt *numCase = nullptr;

        // Move the BB inside the switch (only visual, no code logic)
        i->moveBefore(loopEnd);

        // Add case to switch
        numCase = cast<ConstantInt>(ConstantInt::get(
            switchI->getCondition()->getType(),
            cryptoutils->scramble32(switchI->getNumCases(), scrambling_key)));
        switchI->addCase(numCase, i);
    }

    // Recalculate switchVar
    for (BasicBlock *i : origBB)
    {
        ConstantInt *numCase = nullptr;

        // If it's a non-conditional jump
        if (i->getTerminator()->getNumSuccessors() == 1)
        {
            // Get successor and delete terminator
            BasicBlock *succ = i->getTerminator()->getSuccessor(0);
            i->getTerminator()->eraseFromParent();

            // Get next case
            numCase = switchI->findCaseDest(succ);

            // If next case == default case (switchDefault)
            if (!numCase)
            {
                numCase = cast<ConstantInt>(
                    ConstantInt::get(switchI->getCondition()->getType(),
                                     cryptoutils->scramble32(switchI->getNumCases() - 1,
                                                             scrambling_key)));
            }

            // Update switchVar and jump to the end of loop
            new StoreInst(
                numCase,
                new LoadInst(switchVarAddr->getAllocatedType(), switchVarAddr, "", i),
                i);
            BranchInst::Create(loopEnd, i);
            continue;
        }

        // If it's a conditional jump
        if (i->getTerminator()->getNumSuccessors() == 2)
        {
            // Get next cases
            ConstantInt *numCaseTrue =
                switchI->findCaseDest(i->getTerminator()->getSuccessor(0));
            ConstantInt *numCaseFalse =
                switchI->findCaseDest(i->getTerminator()->getSuccessor(1));

            // Check if next case == default case (switchDefault)
            if (!numCaseTrue)
            {
                numCaseTrue = cast<ConstantInt>(
                    ConstantInt::get(switchI->getCondition()->getType(),
                                     cryptoutils->scramble32(switchI->getNumCases() - 1,
                                                             scrambling_key)));
            }

            if (!numCaseFalse)
            {
                numCaseFalse = cast<ConstantInt>(
                    ConstantInt::get(switchI->getCondition()->getType(),
                                     cryptoutils->scramble32(switchI->getNumCases() - 1,
                                                             scrambling_key)));
            }

            // Create a SelectInst
            BranchInst *br = cast<BranchInst>(i->getTerminator());
            SelectInst *sel =
                SelectInst::Create(br->getCondition(), numCaseTrue, numCaseFalse, "",
                                   i->getTerminator());

            // Erase terminator
            i->getTerminator()->eraseFromParent();
            // Update switchVar and jump to the end of loop
            new StoreInst(
                sel,
                new LoadInst(switchVarAddr->getAllocatedType(), switchVarAddr, "", i),
                i);
            BranchInst::Create(loopEnd, i);
            continue;
        }
    }
    errs() << "Fixing Stack\n";
    fixStack(f);
    errs() << "Fixed Stack\n";
}

Hikari测试

#include <stdio.h>
int main(int argc, char** argv) {
    if (argc == 1) {
        puts("arg1");
    }
	return 0;
}

未混淆IR

define i32 @main(i32 %argc, i8** %argv) #0 {
entry:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i32, i32* %argc.addr, align 4
  %cmp = icmp eq i32 %0, 1
  br i1 %cmp, label %if.then, label %if.end

if.then:                                          ; preds = %entry
  %call = call i32 @puts(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0))
  br label %if.end

if.end:                                           ; preds = %if.then, %entry
  ret i32 0
}
flowchart LR
A([entry]) --> B([if.then])
A([entry]) --> C([if.end])
B([if.then]) --> C([if.end])
C([if.end]) --> D([ret])

已混淆IR

define i32 @main(i32 %argc, i8** %argv) #0 {
entry:
  %.reg2mem = alloca i32
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  store i32 0, i32* %retval, align 4
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %0 = load i32, i32* %argc.addr, align 4
  store i32 %0, i32* %.reg2mem
  %switchVar = alloca i32
  store i32 175153184, i32* %switchVar
  br label %loopEntry

loopEntry:                                        ; preds = %entry, %loopEnd
  %switchVar1 = load i32, i32* %switchVar
  switch i32 %switchVar1, label %switchDefault [
    i32 175153184, label %first
    i32 -210782820, label %if.then
    i32 165052502, label %if.end
  ]

switchDefault:                                    ; preds = %loopEntry
  br label %loopEnd

first:                                            ; preds = %loopEntry
  %.reload = load volatile i32, i32* %.reg2mem
  %cmp = icmp eq i32 %.reload, 1
  %1 = select i1 %cmp, i32 -210782820, i32 165052502
  store i32 %1, i32* %switchVar
  br label %loopEnd

if.then:                                          ; preds = %loopEntry
  %call = call i32 @puts(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0))
  store i32 165052502, i32* %switchVar
  br label %loopEnd

if.end:                                           ; preds = %loopEntry
  ret i32 0

loopEnd:                                          ; preds = %if.then, %first, %switchDefault
  br label %loopEntry
}
flowchart LR
A([entry]) --> B([loopEntry])
B([loopEntry]) --> C([first])
B([loopEntry]) --> D([if.then])
B([loopEntry]) --> G([if.end])
B([loopEntry]) --> E([switchDefault])
C([first]) --> F([loopEnd])
D([if.then]) --> F([loopEnd])
E([switchDefault]) --> F([loopEnd])
F([loopEnd]) --> B([loopEntry])
G([if.end]) --> H([ret])

Hikari分析

。。。未完待续