LLVM的编程技巧

先获取模块中的每个函数,再获取每个函数中的每个BasicBlock,再获取每个BasicBlock中的每条instruction(最常用)

1
2
3
4
5
6
7
8
9
10
11
12
for (Module::iterator FunIt = mod->begin() ; FunIt != mod->end() ; ++FunIt)
{
for (Function::iterator b = FunIt->begin(), e = FunIt->end(); b != e; ++b) //获取每个函数中的basic block
{
cout<< "Basic block name=" << b->getName().str() << endl;
for (BasicBlock::iterator i = b->begin(), e = b->end(); i != e; ++i) //获取每个basic block中的instruction
{
outs() << *i << "\n";
Instruction* inst = &(*i);
}
}
}

获取Module中的每一个Function

1
2
for (Module::iterator FunIt = mod->begin() ; FunIt != mod->end() ; ++FunIt) {
Function* F = &(*FunIt); //获取模块中的每一个函数

直接获取Function中的instruction

1
2
3
4
5
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
{
Instruction* inst = &(*I);
outs() << inst->getName() << " " << inst->getOpcode() << "\n";
}

获取Function中的每个BasicBlock,直接打印整个BasicBlock的内容

1
2
3
4
for (Function::iterator i = FunIt->begin(), e = FunIt->end(); i != e; ++i) {  //获取每个函数中的basic block
cout<< "Basic block name=" << i->getName().str() << std::endl;
outs() << *i << "\n"; //打印basic blocks的名字和内容
}

声明一个函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
//先创造函数的类型
IntegerType *IntTy32 = IntegerType::get(mod->getContext(), 32);
std::vector<Type*>FuncTy_args;
FuncTy_args.push_back(IntTy32);
FuncTy_args.push_back(IntTy32);
FunctionType* FuncTy = FunctionType::get(/*Result=*/Type::getVoidTy(mod->getContext()),
/*Params=*/FuncTy_args,/*isVarArg=*/false);

//再声明函数,设置链接类型,函数名,调用惯例和属性
Function* func = mod->getFunction("PrintWrite");
if (!func)
{
Function* func =Function::Create(/*Type=*/FuncTy, /*Linkage=*/GlobalValue::ExternalLinkage,
/*Name=*/"PrintWrite",mod);
func->setCallingConv(CallingConv::C);
}
AttrListPtr func_Add_PAL;
func->setAttributes(func_Add_PAL);

调用一个函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
//BasicBlock::iterator i   i是BasicBlock迭代器,通过Instruction* inst = &(*i)获取指令
//在i指令后插入一条函数调用

++i;

CallInst *newcall;
std::vector<Value*> para;
Constant *content = CreateWords(mod, "sum outputted by Hujun:\n");
para.push_back(content);
newcall = CallInst::Create(print, para1, "", i); //在i之前插入一条函数调用,print是函数的指针
newcall->setCallingConv(CallingConv::C);
newcall->setTailCall(false);

--i;

创建一个指向字符串的指针,若已存在则直接返回指针

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
Constant *CreateWords(Module *mod, string str)
{
//look up the word in global value table
string mystr = "%%" + str;
GlobalValue *my_name = mod->getNamedValue(mystr);
if (my_name)
{
GlobalVariable *my_global = cast <GlobalVariable> (my_name);
ConstantInt* const_int = ConstantInt::get(mod->getContext(), APInt(32, 0));
std::vector<Constant*> const_ptr_indices;
const_ptr_indices.push_back(const_int);
const_ptr_indices.push_back(const_int);
Constant* const_ptr = ConstantExpr::getGetElementPtr(my_global, const_ptr_indices);
//ConstantExpr - a constant value that is initialized with an expression using other constant values.
return const_ptr;
}

//if not find it, then create a new word in global value table
ArrayType *ArrayTy = ArrayType::get(IntegerType::get(mod->getContext(), 8), str.length()+1);
GlobalVariable *global_name = new GlobalVariable(*mod, ArrayTy, true,
GlobalValue::PrivateLinkage, 0, mystr);
global_name->setAlignment(1);

//Constant Definitions
Constant* const_array = ConstantDataArray::getString(mod->getContext(), str, true);
//This method constructs a CDS and initializes it with a text string.
ConstantInt* const_int = ConstantInt::get(mod->getContext(), APInt(32, 0));
std::vector<Constant*> const_ptr_indices;
const_ptr_indices.push_back(const_int);
const_ptr_indices.push_back(const_int);
Constant* const_ptr = ConstantExpr::getGetElementPtr(global_name, const_ptr_indices);

//Global Variable Definitions
global_name->setInitializer(const_array);
return const_ptr;
}

itoa用sprintf替代

1
2
3
4
5
6
int nValue = 80;
char* szBuffer = (char *)malloc(20);//分配动态内存
memset(szBuffer, 0, 20); //内存块初始化
sprintf(szBuffer, "%d", nValue);//整数转化为字符串

free(szBuffer); //释放动态分配的内存

llvm中调用printf

直接创建一个string类型的字串,然后获取它的地址Value*,调用printf的Function*即可。

1
2
3
4
5
6
7
8
9
10
11
12
13
Function *print = mod->getFunction("printf");
if(print)
{
CallInst *newcall;
std::vector<Value*> para;
Constant *content = CreateWords(mod, "output something\n");
para.push_back(content);
++i;
newcall = CallInst::Create(print, para, "printRand", i); //CallInst represents a function call,在i指令之前插入一条func_record
newcall->setCallingConv(CallingConv::C);
newcall->setTailCall(false);
--i;
}

从Value*得到int类型数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
Value *oneInt = %add = add nsw i32 %call, %total.06
ConstantInt* CI = dyn_cast<ConstantInt>(oneInt)

Value *ten_int = ConstantInt::get(mod->getContext(), APInt(32, 10));
if (ConstantInt* CI = dyn_cast<ConstantInt>(ten_int))
{
outs() << "转换成功\n";
if (CI->getBitWidth() <= 64)
{
int constIntValue = CI->getSExtValue();
outs() << "整型值为:" << constIntValue << "\n"; //这里输出10
}
}
else
{
outs() << "转换失败!\n";
}

从Value得到string字符串

1
2
3
4
5
6
7
8
9
10
11
有getName方法时:
StringRef oriValue = Funtion.getName(); //Funtion的名字,类型为StringRef
string mystr = oriValue.str(); //StringRef::str: Get the contents as an std::string.

无getName方法时(比如要打印整条指令的内容时):
Instruction *MyIn = &(*it_BB); //MyIn为一条指令
string mystr("");
raw_string_ostream stream(mystr);
stream << *MyIn; //重载实现了<<
mystr = stream.str() //raw_string_ostream::str: Flushes the stream contents to the target string
//and returns the string's reference.

从Value得到char* 字符串(由string转char*得到)

1
2
3
4
5
6
7
8
9
10
string getString = MyIn->getName().str();
char mystr[100];
strcpy(mystr, getString.c_str()); //mystr中为函数名

/*extern int strcmp(const char *s1,const char *s2);
当s1<s2时,返回为负数;
当s1==s2时,返回值= 0;
当s1>s2时,返回正数。*/

if(strcmp(mystr, "printf") == 0) //判断mystr是否等于"printf"

调用外部的C函数(生成静态库)

LLVM IR和C/C++函数相互调用时的注意事项

1
2
3
4
5
6
7
8
9
生成静态库.a:
gcc -c printInt.c #生成printInt.o
ar -cr libmyprint.a printInt.o #printInt.o生成静态库libmyprint.a

./RandSum ../../hello.bc #插桩生成.bc文件
llc update.bc -o update.s #将插桩后得到的.bc变为.s
gcc -c update.s -o update.o #将.s变为.o
gcc update.o ../../libmyprint.a -o update #将.o与静态库.a生成可执行文件
./update #运行可执行文件

获取指令的操作数和返回值

1
2
3
4
5
6
7
int operand_num = inst->getNumOperands();           //获得所有的操作数
outs() << "总共" << operand_num << "个操作数" << "\n";
for(int i=0; i<operand_num; ++i)
{
outs() << "第" << i+1 << "个操作数是:" << *inst->getOperand(i) << "\n"; //printf的第2个操作数是字符串
}
outs() << "返回值是:" << *inst << "\n\n"; //返回值就是指令本身

获取函数的参数的位置和类型

1
2
3
4
5
6
7
8
9
10
if(myfunc->isVarArg())
{
outs() << "可变参数\n"; //如printf(i8*,...),则是可变参数
}

for(Function::arg_iterator start = myfunc->arg_begin(), end = myfunc->arg_end(); start != end; ++start)
{
outs() << "argument:" << *start << "\n"; //获得参数类型和位置
//如printf(i8*,...) 则输出argument:i8* %0
}

replaceAllUsesWith

replaceUsesOfWith