待整理:
官方文檔:
線上文章:
線上課程:
開發注意事項:
printf("%8x %8x %8x %8x", *((unsigned int *)(&var) + 0), *((unsigned int *)(&var) + 1), *((unsigned int *)(&var) + 2), *((unsigned int *)(&var) + 3));
$ clang -O2 -c -mllvm -print-after-all -mllvm -filter-print-funcs=main func.c
$ clang -O0 -emit-llvm -S -o sum.ll sum.c # 和參考後端比較 O2 IR 生成過程中是否有差異。 $ opt -O2 sum.ll -print-after-all
$ clang -O2 -emit-llvm -S -o sum.ll sum.c # 觀察 DAG 變換過程,看是否有優化空間。 $ llc sum.ll -debug
$ brew install cmake ninja $ git clone http://llvm.org/git/llvm.git $ git clone http://llvm.org/git/clang.git llvm/tools/clang $ mkdir build.Ninja; cd build.Ninja $ cmake -G Ninja -DCMAKE_INSTALL_PREFIX=/Users/chenwj/Projects/opt/ -DCMAKE_BUILD_TYPE=Debug ../llvm $ ninja; ninja install
$ mkdir build.Xcode $ cmake -G Xcode -DCMAKE_INSTALL_PREFIX=/Users/chenwj/Projects/opt/ -DCMAKE_BUILD_TYPE=Debug ../llvm $ open LLVM.xcodeproj
$ cd build.Ninja $ cmake -G Ninja -DCMAKE_INSTALL_PREFIX=/Users/chenwj/Projects/opt/ -DCMAKE_BUILD_TYPE=Debug -DLLVM_BUILD_EXAMPLES=1 ../llvm # 編譯 example BrainF $ ninja BrainF
$ clang -emit-llvm -S hello.c -o hello.ll $ clang -emit-llvm -c hello.c -o hello.bc
$ clang -c sum.c -mllvm -debug-pass=Structure
$ llvm-as hello.ll -o hello.bc $ llvm-dis hello.bc -o hello.ll
$ gcc `llvm-config --cxxflags --ldflags` foo.c `llvm-config --libs`
# 顯示隱藏選項。 $ llc -help-hidden # 顯示在編譯過程中傳遞哪些參數給 opt。 $ llc -debug-pass=Arguments sum.ll # 顯示在編譯過程中 PassManager 調度哪些 pass。 $ llc -debug-pass=Structure sum.ll # 只顯示代碼中有 #define DEBUG_TYPE "legalize-types" 的 pass。 $ llc -debug-only=legalize-types sum.ll
# 顯示隱藏選項。 $ opt -help-hidden # 顯示每一個 pass 處理之後的 LLVM IR。可以用來觀察感興趣的 pass 做了哪些變換。 $ opt -S -O2 -print-after-all sum.ll
$ cat sum.c int sum(int a, int b) { return a + b; } $ clang -emit-llvm -S sum.c -o sum.ll
; ModuleID = 'sum.c' source_filename = "sum.c" ; 從底下 target datalayout 和 triple 即可知 LLVM IR 並非平台中立。 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" ; Function Attrs: noinline nounwind ssp uwtable define i32 @sum(i32 %a, i32 %b) #0 { entry: %a.addr = alloca i32, align 4 ; 在 stack 上分配空間給 i32,4 byte 對齊。 %b.addr = alloca i32, align 4 store i32 %a, i32* %a.addr, align 4 ; 將函式參數存到 stack。 store i32 %b, i32* %b.addr, align 4 %0 = load i32, i32* %a.addr, align 4 ; 從 stack 讀出 operand。 %1 = load i32, i32* %b.addr, align 4 %add = add nsw i32 %0, %1 ; 加法運算。 ret i32 %add ; 返回運算結果。 }
跟 LLVM IR 相關的類如下,所在目錄為 include/llvm/IR:
較難理解的部分:
$ cat struct.c struct RT { char A; char C; }; char *func1(struct RT *s) { return &(s->A); } char *func2(struct RT *s) { return &(s->C); } char *func3(struct RT *s) { return &(s[1].C); } $ clang -emit-llvm -O1 -S struct.c -o struct.ll
%struct.RT = type { i8, i8 } ; Function Attrs: norecurse nounwind readnone ssp uwtable define i8* @func1(%struct.RT* readnone %s) local_unnamed_addr #0 { entry: %A = getelementptr inbounds %struct.RT, %struct.RT* %s, i64 0, i32 0 ret i8* %A } ; Function Attrs: norecurse nounwind readnone ssp uwtable define i8* @func2(%struct.RT* readnone %s) local_unnamed_addr #0 { entry: %C = getelementptr inbounds %struct.RT, %struct.RT* %s, i64 0, i32 1 ret i8* %C } ; Function Attrs: norecurse nounwind readnone ssp uwtable define i8* @func3(%struct.RT* readnone %s) local_unnamed_addr #0 { entry: %C = getelementptr inbounds %struct.RT, %struct.RT* %s, i64 1, i32 1 ret i8* %C }
%struct.RT
: 第二個參數所指類型。%struct.RT* %0
: 計算位址的起始位址,其型別一定是指針類型。之後的參數皆是欲對其計算位址的子元素的 index。i32 0
: 類似 %0[0]
。LLVM 某種程度上將第二個參數視為 array。如果實際上不是 array,此值皆為 0。i32 0
: 類似 %0[0].A
。bool InstCombiner::run() { if (Instruction *Result = visit(*I)) { ++NumCombined; // Should we replace the old instruction with a new one? if (Result != I) { DEBUG(dbgs() << "IC: Old = " << *I << '\n' << " New = " << *Result << '\n'); } }
$ cat bitfield.c typedef struct { unsigned int width : 16; unsigned int height : 16; } status; void foo() { status s; s.width = 4; s.height = 5; } $ clang bitfiled.c -emit-llvm -S -o bitfiled.ll
%struct.status = type { i32 } ; Function Attrs: noinline nounwind ssp uwtable define void @foo() #0 { entry: %s = alloca %struct.status, align 4 %0 = bitcast %struct.status* %s to i32* %bf.load = load i32, i32* %0, align 4 %bf.clear = and i32 %bf.load, -65536 ; and FFFF0000。0000 代表先被 clear,之後要被設置的部分,取 status.width。 %bf.set = or i32 %bf.clear, 4 ; or 0004。把 status.width 設為 4。 store i32 %bf.set, i32* %0, align 4 %1 = bitcast %struct.status* %s to i32* %bf.load1 = load i32, i32* %1, align 4 %bf.clear2 = and i32 %bf.load1, 65535 ; and 0000FFFF。0000 代表先被 clear,之後要被設置的部分,取 status.height。 %bf.set3 = or i32 %bf.clear2, 327680 ; or 50000。把 status.height 設為 5。 store i32 %bf.set3, i32* %1, align 4 ret void }
術語:
llc -print-machineinstrs
列出指令選擇後,各個階段的 MachineInstr。$ llc -print-machineinstrs sum.ll # After Instruction Selection: # 打印出 MachineFunction 的 Property。從 IsSSA 和 TracksLiveness 可以知道我們現在處於暫存器分配前的階段。 # Machine code for function sum: IsSSA, TracksLiveness Frame Objects: fi#0: size=4, align=4, at location [SP+8] fi#1: size=4, align=4, at location [SP+8] Function Live Ins: %EDI in %vreg0, %ESI in %vreg1 # MachineBasicBlock 可以連結到相關的 LLVM IR BasicBlock。 BB#0: derived from LLVM BB %entry Live Ins: %EDI %ESI %vreg1<def> = COPY %ESI; GR32:%vreg1 %vreg0<def> = COPY %EDI; GR32:%vreg0 MOV32mr <fi#0>, 1, %noreg, 0, %noreg, %vreg0; mem:ST4[%a.addr] GR32:%vreg0 MOV32mr <fi#1>, 1, %noreg, 0, %noreg, %vreg1; mem:ST4[%b.addr] GR32:%vreg1 %vreg2<def,tied1> = ADD32rm %vreg1<tied0>, <fi#0>, 1, %noreg, 0, %noreg, %EFLAGS<imp-def,dead>; mem:LD4[%a.addr](dereferenceable) GR32:%vreg2,%vreg1 %EAX<def> = COPY %vreg2; GR32:%vreg2 RET 0, %EAX
%vreg
是虛擬暫存器,%ESI
是物理暫存器。<def>
表示該暫存器於該指令被定義,沒有 <def>
代表該暫存器於該指令被使用。<dead>
代表被定義的暫存器在該指令之後沒有被使用。 <kill>
代表該指令是最後一條使用該暫存器的指令。imp-def
和 imp-use
表示該暫存器是被隱式定義或使用,不出現在匯編指令中,如 %EFLAGS
。tied
表示。其餘 flag 的意義請見 MachineOperand。$ llc -stop-after=isel sum.ll -o sum.mir $ llc -run-pass=machine-scheduler sum.mir -o sum_scheduled.mir $ llc -start-after=machine-scheduler sum_scheduled.mir -o sum.s
$ llc -verify-machineinstrs sum.ll
從匯編器角度所設計的 MCInst (MachineCode),所帶的資訊比 MachineInstr (MI) 更少,其主要用途是讓匯編器生成目的檔,或是透過反匯編器生成 MCInst。
相關文章:
# 產生匯編的同時,於註解加上對應的 MCInst。 $ llc -asm-show-inst sum.ll -o - movq %rsp, %rbp ## <MCInst #1741 MOV64rr ## <MCOperand Reg:36> ## <MCOperand Reg:44>> # 產生匯編的同時,於註解加上對應的指令編碼。 $ llc -show-mc-encoding sum.ll movq %rsp, %rbp ## encoding: [0x48,0x89,0xe5] # llvm-mc 用於測試 MCInst 的匯編和反匯編。 $ echo "movq%rsp, %rbp" | llvm-mc -show-encoding .section __TEXT,__text,regular,pure_instructions movq %rsp, %rbp ## encoding: [0x48,0x89,0xe5] $ echo "0x48,0x89,0xe5" | llvm-mc -disassemble .section __TEXT,__text,regular,pure_instructions movq %rsp, %rbp $ echo "0x48,0x89,0xe5" | llvm-mc -disassemble -show-inst .section __TEXT,__text,regular,pure_instructions movq %rsp, %rbp ## <MCInst #1741 MOV64rr ## <MCOperand Reg:36> ## <MCOperand Reg:44>>
在 post-register allocation pass 之後,於 AsmPrinter 調用 EmitInstruction,將 MI 轉換成 MCInst,可以輸出匯編或是目的檔。目標平台繼承 AsmPrinter 並覆寫相關函式。
$ lldb llc (lldb) b X86AsmPrinter::runOnMachineFunction # MCAsmStreamer (lldb) r -filetype=asm sum.ll # MCObjectStreamer (lldb) r -filetype=obj sum.ll
$ opt -load=install/lib/libLLVMHello.so -hello < hello.bc > /dev/null
opt
和 libLLVMHello.so
是同一份源碼和設定編譯而成。Error opening 'install/lib/libLLVMHello.so': install/lib/libLLVMHello.so: undefined symbol: _ZN4llvm4Pass26getAdjustedAnalysisPointerEPKv -load request ignored.
getAnalysisUsage(AnalysisUsage &AU)
指定其輸出入的依賴關係。AU.addRequired<PassName>()
指定當前 pass 需要哪些 PassName 先運行; AU.addPreserved<PassName>()
指定當前 pass 運行之後,PassName 的結果不受影響,可以繼續使用。static void AddOptimizationPasses(legacy::PassManagerBase &MPM, legacy::FunctionPassManager &FPM, TargetMachine *TM, unsigned OptLevel, unsigned SizeLevel) { if (!NoVerify || VerifyEach) FPM.add(createVerifierPass()); // Verify that input is correct PassManagerBuilder Builder; Builder.OptLevel = OptLevel; Builder.SizeLevel = SizeLevel; if (DisableInline) { // No inlining pass } else if (OptLevel > 1) { Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel, false); } else { Builder.Inliner = createAlwaysInlinerLegacyPass(); } Builder.populateFunctionPassManager(FPM); Builder.populateModulePassManager(MPM); }
extern template class PassManager<Module>; typedef PassManager<Module> ModulePassManager; extern template class PassManager<Function>; typedef PassManager<Function> FunctionPassManager
LLVM 後端基本上有三類 IR (LLVM + ARM = ?),由上至下依序是:
後兩者的名稱容易被混淆:
如同 Design and Implementation of a TriCore Backend for the LLVM Compiler Framework 第 12 頁所展示的,LLVM 後端的流程如下:
LLVM IR -> DAG Lowering -> non-legalized DAGs -> DAG Legalization -> legalized DAGs -> Instruction Selection -> DAGs (native instructions, MI) -> Scheduling -> SSA Form -> SSA-based Opt -> SSA Form -> Register Allocation -> native instrictions with phys reg -> Post Allocation -> Prolog/Epilog Code Insertion -> resolved stack reference -> Peehole Opt -> Assembly Printing -> assembly
Tutorial: Building a backend in 24 hours (2012) 第 3 頁展示各個階段分別為何種形式的中介碼。
$ llc -debug-pass=Structure sum.ll Eliminate PHI nodes for register allocation Simple Register Coalescing Greedy Register Allocator Virtual Register Rewriter
PHIElimination::LowerPHINode()
改用 COPY 指令取代 PHI 指令。RegisterCoalescer::joinCopy()
如果 COPY 指令的來源和目的暫存器其 interval 相同,消除該 COPY 指令。VirtRegMap
,紀錄虛擬暫存器和物理暫存器的對應,VirtRegRewriter::runOnMachineFunction()
再根據 VirtRegMap
將 MI 中的虛擬暫存器改寫成物理暫存器。$ cat phi.c void func(bool r, bool y) { bool l = y || r; } $ clang++ -O0 phi.c -mllvm -print-machineinstrs -c # After Instruction Selection: # Machine code for function func: IsSSA, TracksLiveness BB#2: derived from LLVM BB %lor.end Predecessors according to CFG: BB#0 BB#1 %vreg1<def> = PHI %vreg6, <BB#0>, %vreg13, <BB#1>; GR8:%vreg1,%vreg6,%vreg13 %vreg15<def,tied1> = AND8ri %vreg1<tied0>, 1, %EFLAGS<imp-def>; GR8:%vreg15,%vreg1 MOV8mr <fi#2>, 1, %noreg, 0, %noreg, %vreg15<kill>; mem:ST1[%l] GR8:%vreg15 RETQ # After Eliminate PHI nodes for register allocation: # Machine code for function func: NoPHIs, TracksLiveness BB#2: derived from LLVM BB %lor.end Predecessors according to CFG: BB#0 BB#1 %vreg1<def> = COPY %vreg16; GR8:%vreg1,%vreg16 %vreg15<def,tied1> = AND8ri %vreg1<tied0>, 1, %EFLAGS<imp-def>; GR8:%vreg15,%vreg1 MOV8mr <fi#2>, 1, %noreg, 0, %noreg, %vreg15<kill>; mem:ST1[%l] GR8:%vreg15 RETQ # After Fast Register Allocator: # Machine code for function func: NoPHIs, TracksLiveness, NoVRegs BB#2: derived from LLVM BB %lor.end Predecessors according to CFG: BB#0 BB#1 %AL<def> = MOV8rm <fi#3>, 1, %noreg, 0, %noreg; mem:LD1[FixedStack3] %AL<def,tied1> = AND8ri %AL<tied0>, 1, %EFLAGS<imp-def> MOV8mr <fi#2>, 1, %noreg, 0, %noreg, %AL<kill>; mem:ST1[%l] RETQ
SDValue.getNode()
和 SDValue.getResNo()
分別代表該 SDValue 是從哪個 SDNode 生成,以及是該 SDNode 第幾個輸出。SDNode 之間有 data 或 control (chain, 簡寫 ch) dependency。上圖中黑色箭頭代表 data dependency,藍色箭頭代表 control dependency (注意它指向節點中的 ch 欄位),紅色箭頭代表兩個節點之間是 glue 的關係 (和 ch 相比,glue 之間不能插入其它節點)。EntryToken 和 TokenFactor 負責 control dependency。GraphRoot 代表 SelectionDAG 的最底層。每個節點中間一欄代表其 operator (ISD::NodeType),上面一欄代表其輸入,下面一欄代表其輸出。
SelectionDAG 主要有底下幾個流程 (SelectionDAGISel::CodeGenAndEmitDAG()):
# Pop up a window to show dags before the first dag combine pass $ llc -view-dag-combine1-dags sum.ll
# Pop up a window to show dags before legalize types $ llc -view-legalize-types-dags sum.ll
# Pop up a window to show dags before the post legalize types dag combine pass $ llc -view-dag-combine-lt-dags sum.ll
# Pop up a window to show dags before legalize $ llc -view-legalize-dags sum.ll
# Pop up a window to show dags before the second dag combine pass $ llc -view-dag-combine2-dags sum.ll
# Pop up a window to show isel dags as they are selected $ llc -view-isel-dags sum.ll
# Pop up a window to show sched dags as they are processed $ llc -view-sched-dags sum.ll
# Pop up a window to show SUnit dags after they are processed $ llc -view-sunit-dags sum.ll
SelectCode(Node)
透過 *.td 檔所寫的匹配規則選擇指令,或者也可以針對特定 SDNode 生成其它 SDNode 進行置換。函式 SelectCode(Node)
由 tblgen 產生。AddedComplexity
指定指令選擇的偏好。AddedComplexity
可以為正或負數,數值越大,代表我們越傾向使用該指令。LLVM IR -> Global MI (GMI) -> MI
# 顯示 SDNode Sched DAG。 $ llc -view-sunit-dags sum.ll # 顯示 MI Sched DAG, pre-RA 和 post-RA。 $ llc -view-misched-dags sum.ll # pre-RA, SDNode $ llc -pre-RA-sched # pre-RA, MI $ llc -enable-misched # post-RA, MI $ llc -enable-post-misched
class SchedMachineModel { int IssueWidth = -1; int MicroOpBufferSize = -1; int LoopMicroOpBufferSize = -1; int LoadLatency = -1; int HighLatency = -1; int MispredictPenalty = -1; ProcessorItineraries Itineraries = NoItineraries; bit PostRAScheduler = 0; bit CompleteModel = 1; list<Predicate> UnsupportedFeatures = []; bit NoModel = 0; }
def HexagonModelV60 : SchedMachineModel { // Max issue per cycle == bundle width. let IssueWidth = 4; let Itineraries = HexagonItinerariesV60; let LoadLatency = 1; let CompleteModel = 0; }
class SchedReadWrite; class Sched<list<SchedReadWrite> schedrw> { list<SchedReadWrite> SchedRW = schedrw; } class SchedWrite : SchedReadWrite; class SchedRead : SchedReadWrite; class ProcResource<int num> : ProcResourceKind, ProcResourceUnits<EponymousProcResourceKind, num>; class WriteRes<SchedWrite write, list<ProcResourceKind> resources> : ProcWriteResources<resources> { SchedWrite WriteType = write; } class ReadAdvance<SchedRead read, int cycles, list<SchedWrite> writes = []> : ProcReadAdvance<cycles, writes> { SchedRead ReadType = read; }
// Basic ALU operation. def WriteALU : SchedWrite; def ReadALU : SchedRead; def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; } def : ReadAdvance<ReadALU, 1>;
class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp, list<InstrItinData> iid> { list<FuncUnit> FU = fu; list<Bypass> BP = bp; list<InstrItinData> IID = iid; }
lib/CodeGen/SelectionDAG/
底下有幾個實現。ScheduleDAGInstrs 作用於 MI,為 pre-register allocation/post-register MI scheduler 提供接口。lib/Target/
底下各目標平台有各自的實現,均繼承 ScheduleDAGMILive (ScheduleDAGMILive 考慮暫存器壓力。3))schedule
,在 MachineScheduler/PostMachineScheduler/PostRASchedulerList 的 runOnMachineFunction
被調用。ScheduleDAGMI::postprocessDAG
在 ScheduleDAGMILive::schedule
被調用。pickNode
和 schedNode
在 ScheduleDAGMILive::schedule
被調用。。runOnMachineFunction
中,會調用到 MI scheduler 的 schedule
進行調度。schedule()
實現 (ScheduleDAGMI::schedule()ScheduleDAGMILive::schedule())。buildSchedGraph(AA); findRootsAndBiasEdges(TopRoots, BotRoots); SchedImpl->initialize(this); // SchedImpl 為 MachineSchedStrategy 的實例。 initQueues(TopRoots, BotRoots); bool IsTopNode = false; while (true) { // 如果 SU 為 nullptr,或為非法,跳出循環。 SUnit *SU = SchedImpl->pickNode(IsTopNode); // ScheduleDAGMILive 調用 scheduleMI,其基礎為原來 ScheduleDAGMI 的代碼。 // 此處透過 moveInstruction 調整 SU 對應 MI 在原來 basic block 的位置。 SchedImpl->schedNode(SU, IsTopNode); updateQueues(SU, IsTopNode); }
RPTracker will cover the entire DAG region, TopTracker and BottomTracker will be initialized to the top and bottom of the DAG region without covereing any unscheduled instruction.
VLIWResourceModel::isResourceAvailable()
)。schedule()
接口,供上層回調。schedule()
。實際挑選節點,並更新 data dependency graph 各節點 priority 的工作交給 SchedImpl (即 ConvergingVLIWScheduler
)。HexagonPassConfig
複寫 createMachineScheduler
,回傳 VLIWMachineScheduler。createMachineScheduler
在 MachineScheduler::runOnMachineFunction
被調用。透過這樣的方式,Hexagon 執行自己的 PreRA scheduler。getHazardType
和 EmitInstruction
改變調度結果。ScheduleHazardRecognizer::HazardType HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) { if (!Resources->canReserveResources(*MI)) { DEBUG(dbgs() << "*** Hazard in cycle " << PacketNum << ", " << *MI); HazardType RetVal = Hazard; if (TII->mayBeNewStore(*MI)) { // Make sure the register to be stored is defined by an instruction in the // packet. MachineOperand &MO = MI->getOperand(MI->getNumOperands() - 1); if (!MO.isReg() || RegDefs.count(MO.getReg()) == 0) return Hazard; // The .new store version uses different resources so check if it // causes a hazard. MachineFunction *MF = MI->getParent()->getParent(); MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(TII->getDotNewOp(*MI)), MI->getDebugLoc()); if (Resources->canReserveResources(*NewMI)) RetVal = NoHazard; DEBUG(dbgs() << "*** Try .new version? " << (RetVal == NoHazard) << "\n"); MF->DeleteMachineInstr(NewMI); } return RetVal; } }
HexagonPacketizer::runOnMachineFunction
(HexagonVLIWPacketizer.cpp) 被調用。Machine Basic Block 會被切分成數個 Region,再由 PacketizeMIs 將 Region 中的數個 MI 打包成一個 Packet (Bundle)。PacketizeMIs
先對 Region 中的指令做調度,之後根據 DFAPacketizer 偵測 Structural hazards,根據 isLegalToPacketizeTogether和 isLegalToPruneDependencies 偵測 Data hazards,決定 MI 是否能加入當前的 Packet。finalizeBundle
(MachineInstrBundle.cpp) 透過 MIBundleBuilder 將 CurrentPacketMIs 中的 MI 標記其屬於某一個 Packet。$ lldb llc (lldb) b HexagonPacketizer::runOnMachineFunction (lldb) b VLIWPacketizerList::PacketizeMIs (lldb) r test/CodeGen/Hexagon/packetize_cond_inst.ll
createMachineScheduler
和 createPostMachineScheduler
可以被覆寫。可以返回 ScheduleDAGInstrs 的子類,或是只返回 ScheduleDAGInstrs 的實例,傳入客制的 MachineSchedStrategy。又或是透過 substitutePass
將預設執行的 PostRASchedulerList 替換成 PostMachineScheduler。// 取 TryVal 和 CandVal 中最小的。 static bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; } if (TryVal > CandVal) { // Reason 越小,優先級越高。把 Cand.Reason 替換掉。 if (Cand.Reason > Reason) Cand.Reason = Reason; return true; } return false; }
$ llc -O2 -debug-pass=Structure test/CodeGen/Hexagon/packetize_cond_inst.ll
runOnMachine
開始,直到 SchedulePostRATDList::ListScheduleTopDown
開始實際的調度。void SchedulePostRATDList::ListScheduleTopDown() { while (!AvailableQueue.empty() || !PendingQueue.empty()) { }
$ llc -O2 -debug-only=post-RA-sched test/CodeGen/Hexagon/packetize_cond_inst.ll ********** List Scheduling ********** SU(0): %P0<def> = C4_cmplte %R2, %R1 # preds left : 0 # succs left : 3 # rdefs left : 0 Latency : 1 Depth : 0 Height : 4 Successors: SU(2): Data Latency=1 Reg=%P0 SU(1): Data Latency=1 Reg=%P0 SU(1): Anti Latency=0 SU(1): %R1<def> = A2_paddt %P0, %R2<kill>, %R1<kill> # preds left : 2 # succs left : 2 # rdefs left : 0 Latency : 1 Depth : 1 Height : 3 Predecessors: SU(0): Data Latency=1 Reg=%P0 SU(0): Anti Latency=0 Successors: SU(2): Out Latency=1 SU(2): Data Latency=1 Reg=%R1
Reset hazard recognizer *** Examining Available *** Scheduling [0]: SU(0): %P0<def> = C4_cmplte %R2, %R1 Add instruction %P0<def> = C4_cmplte %R2, %R1 *** Examining Available *** Finished cycle 0 Advance cycle, clear state
***
開頭的訊息,是由SchedulePostRATDList::ListScheduleTopDown() 生成。其它訊息由 HazardRecognizer 選擇性生成 (如: HexagonHazardRecognizer.cpp)。顯示在第幾個 cycle,選中哪一條指令。class
和 def
都是 record。 class
類似 C++ 裡的 Class,def
類似 C++ 裡的 Object。 set
。list<Register> SubRegs
和 list<SubRegIndex> SubRegIndices
一起搭配。前者定義此 register 包含哪些 sub-register,後者用來定位 (index) 這些 sub-register。以 X86RegisterInfo.td 為例:class X86Reg<string n, bits<16> Enc, list<Register> subregs = []> : Register<n> { let Namespace = "X86"; let HWEncoding = Enc; let SubRegs = subregs; } let Namespace = "X86" in { def sub_8bit : SubRegIndex<8>; def sub_8bit_hi : SubRegIndex<8, 8>; // 此 subreg 大小為 8 bit,在 reg 從 offset 8 bit 開始。 def sub_16bit : SubRegIndex<16>; def sub_32bit : SubRegIndex<32>; def sub_xmm : SubRegIndex<128>; def sub_ymm : SubRegIndex<256>; } let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in { def AX : X86Reg<"ax", 0, [AL,AH]>; // AX 由 AL 和 AH 兩個 subreg 組成,AL 和 AH 如何在 AX 佔位由 SubRegIndices 描述。 def DX : X86Reg<"dx", 2, [DL,DH]>; def CX : X86Reg<"cx", 1, [CL,CH]>; def BX : X86Reg<"bx", 3, [BL,BH]>; }
class RegisterClass<string namespace, list<ValueType> regTypes, int alignment, dag regList, RegAltNameIndex idx = NoRegAltName> def GR32 : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
list<dag> Pattern
欄位即是用來撰寫指令選擇所需要的 pattern match。def XNORrr : F3_1<2, 0b000111, (outs IntRegs:$rd), 輸出 (ins IntRegs:$rs1, IntRegs:$rs2), 輸入 "xnor $rs1, $rs2, $rd", // 匯編指令 [(set i32:$rd, (not (xor i32:$rs1, i32:$rs2)))] // DAG 匹配樣式,$rd = not ($rs1 xor $rs2) >;
$rd = not ($rs1 xor $rs2)
之後,會將其替換成 XNORrr
。class Operand<ValueType ty> : DAGOperand { ValueType Type = ty; string PrintMethod = "printOperand"; string EncoderMethod = ""; bit hasCompleteDecoder = 1; string OperandType = "OPERAND_UNKNOWN"; dag MIOperandInfo = (ops); ... }
/// PointerLikeRegClass - Values that are designed to have pointer width are /// derived from this. TableGen treats the register class as having a symbolic /// type that it doesn't know, and resolves the actual regclass to use by using /// the TargetRegisterInfo::getPointerRegClass() hook at codegen time. class PointerLikeRegClass<int Kind> { int RegClassKind = Kind; } /// ptr_rc definition - Mark this operand as being a pointer value whose /// register class is resolved dynamically via a callback to TargetInstrInfo. /// FIXME: We should probably change this to a class which contain a list of /// flags. But currently we have but one flag. def ptr_rc : PointerLikeRegClass<0>; def MEMrr : Operand<iPTR> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops ptr_rc, ptr_rc); let ParserMatchClass = SparcMEMrrAsmOperand; } def MEMri : Operand<iPTR> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops ptr_rc, i32imm); let ParserMatchClass = SparcMEMriAsmOperand; }
class CalleeSavedRegs<dag saves> { dag SaveList = saves; dag OtherPreserved; } def HexagonCSR : CalleeSavedRegs<(add R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27)>;
class CCIfType<list<ValueType> vts, CCAction A> : CCPredicateAction<A> { list<ValueType> VTs = vts; } class CCAssignToReg<list<Register> regList> : CCAction { list<Register> RegList = regList; } CCIfType<[f32,f64], CCAssignToReg<[R0, R1]>>
class CallingConv<list<CCAction> actions> { list<CCAction> Actions = actions; bit Custom = 0; } def RetCC_Sparc32 : CallingConv<[ CCIfType<[i32], CCAssignToReg<[I0, I1]>>, CCIfType<[f32], CCAssignToReg<[F0]>>, CCIfType<[f64], CCAssignToReg<[D0]>> ]>;
__builtin_yyy -> @llvm.xxx.yyy -> yyy
tools/clang/include/clang/Basic/
底下。例如: BuiltinsARM.def。會用到 Builtins.def 定義的型別和屬性。BUILTIN(__builtin_atan2 , "ddd" , "Fnc" ) builtin 函式名 返回和參數型別 builtin 屬性
EmitARMBuiltinExpr
。EmitARMBuiltinExpr
。EmitTargetArchBuiltinExpr() 調用定義前述的 Emit 函式。Sema::CheckARMBuiltinFunctionCall
。include/llvm/IR/
底下平台各自定義 LLVM IR 階段會產生的 intrinsic,以 llvm.xxx 開頭,其中 xxx 為目標平台名稱。例如: IntrinsicsHexagon.td。會用到 Intrinsics.td 中定義的型別和屬性。注意!在 Intrinsics.td 最後要引用自己定義的 *.td 檔。def int_xxx_foo : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadArgMem]>; 返回型別 參數型別 屬性
lib/Target/
底下各自的目錄定義如何將 LLVM IR 中的 intrinsic 對應到實際的指令。例如: HexagonIntrinsics.td。let isPseudo = 1 in { def FOO : PseudoI<(outs i32mem:$dst), (ins i32mem:$src1, i32mem:$src2,), [(set i32mem:$dst, (int_xxx_foo i32mem:$src1, i32mem:$src2))]>; }
class SDNode<string opcode, SDTypeProfile typeprof, list<SDNodeProperty> props = [], string sdclass = "SDNode"> : SDPatternOperator { string Opcode = opcode; string SDClass = sdclass; let Properties = props; SDTypeProfile TypeProfile = typeprof; } def imm : SDNode<"ISD::Constant" , SDTIntLeaf , [], "ConstantSDNode">;
class PatFrag<dag ops, dag frag, code pred = [{}], SDNodeXForm xform = NOOP_SDNodeXForm> : SDPatternOperator { dag Operands = ops; dag Fragment = frag; code PredicateCode = pred; code ImmediateCode = [{}]; SDNodeXForm OperandTransform = xform; }
class Pattern<dag patternToMatch, list<dag> resultInstrs> { dag PatternToMatch = patternToMatch; list<dag> ResultInstrs = resultInstrs; list<Predicate> Predicates = []; // See class Instruction in Target.td. int AddedComplexity = 0; // See class Instruction in Target.td. } class Pat<dag pattern, dag result> : Pattern<pattern, [result]>; 匹配 輸出
class T_I_pat <InstHexagon MI, Intrinsic IntID> : Pat <(IntID imm:$Is), // 匹配 (MI imm:$Is)>; // 輸出
def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>; 匹配 輸出
class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs> { // SubRegs - N lists of registers to be zipped up. Super-registers are // synthesized from the first element of each SubRegs list, the second // element and so on. list<dag> SubRegs = Regs; // SubRegIndices - N SubRegIndex instances. This provides the names of the // sub-registers in the synthesized super-registers. list<SubRegIndex> SubRegIndices = Indices; } // VGPR_32 共有 VGPR0 ~ VGPR255 個暫存器。 def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, (add (sequence "VGPR%u", 0, 255))> { let AllocationPriority = 1; let Size = 32; } // VGPR 128-bit registers def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], [(add (trunc VGPR_32, 253)), // VGPR0 ~ VGPR252 (add (shl VGPR_32, 1)), // VGPR1 ~ VGPR255 (add (shl VGPR_32, 2)), // VGPR2 ~ VGPR255 (add (shl VGPR_32, 3))]>; // VGPR3 ~ VGPR255 def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> { let Size = 128; // Requires 4 v_mov_b32 to copy let CopyCost = 4; let AllocationPriority = 4; }
// Calling convention for leaf functions def CC_AMDGPU_Func : CallingConv<[ // 針對要映射到 quad-register 的型別,調用相應的處理函式。 CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>, CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>, ]>;
def HasV5T : Predicate<"HST->hasV5TOps()">, AssemblerPredicate<"ArchV5">; let Predicates = [HasV5T] in { def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>; def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>; } def: Pat<(sra (add (sra I64:$src, u6_0ImmPred:$u6), 1), (i32 1)), (S2_asr_i_p_rnd DoubleRegs:$src, imm:$u6)>, Requires<[HasV5T]>;
不同版本指令之間的映射關係,例如 Hexagon 的 Dot-New 指令 (Hexagon DSP Architecture),可以使用 InstrMapping 表示 (How To Use Instruction Mappings)。例如我們想建立 add/add.t/add.f 和 sub/sub.t/sub.f 的映射表,以 add/sub 為 key,可以取得不同版本的指令,如: add.t/sub.t 和 add.f/sub.f。
no pred | true | false | |
---|---|---|---|
add | add | add.t | add.f |
sub | sub | sub.t | sub.f |
def getPredOpcode : InstrMapping { // 描述此 mapping 的字串。 let FilterClass = "PredRel"; // add/add.t/add.f 和 sub/sub.t/sub.f 分別有相同的 RowFields。 let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isBrTaken", "isNT"]; // add/sub,add.t/sub.t 和 add.f/sub.f 分別有相同的 ColFields。 let ColFields = ["PredSense"]; // 取 ColFields 某值作為 mapping 中的 key。這裡取 add 和 sub 作為 key。 let KeyCol = [""]; // 取 ColFields 某值作為 mapping 中的 value。add 映射到 add.t/add.f,sub 映射到 sub.t/sub.f。 let ValueCols = [["true"], ["false"]]; }
class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr, InstrItinClass itin, IType type> : Instruction { let Namespace = "Hexagon"; // Fields used for relation models. string isNT = ""; // set to "true" for non-temporal vector stores. string BaseOpcode = ""; string PredSense = ""; string PNewValue = ""; string isBrTaken = !if(isTaken, "true", "false"); // Set to "true"/"false" for jump instructions let PredSense = !if(isPredicated, !if(isPredicatedFalse, "false", "true"), ""); let PNewValue = !if(isPredicatedNew, "new", ""); let NValueST = !if(isNVStore, "true", "false"); let isNT = !if(isNonTemporal, "true", "false"); }
class HInst<dag outs, dag ins, string asmstr, InstrItinClass itin, IType type> : InstHexagon<outs, ins, asmstr, [], "", itin, type>; def A2_add : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add($Rs32,$Rt32)", tc_548f402d, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011000; let hasNewValue = 1; let opNewValue = 0; let CextOpcode = "A2_add"; let InputType = "reg"; let BaseOpcode = "A2_add"; let isCommutable = 1; let isPredicable = 1; } def A2_paddf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4) $Rd32 = add($Rs32,$Rt32)", tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111011000; let isPredicated = 1; let isPredicatedFalse = 1; let hasNewValue = 1; let opNewValue = 0; let CextOpcode = "A2_add"; let InputType = "reg"; let BaseOpcode = "A2_add"; } def A2_paddt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4) $Rd32 = add($Rs32,$Rt32)", tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111011000; let isPredicated = 1; let hasNewValue = 1; let opNewValue = 0; let CextOpcode = "A2_add"; let InputType = "reg"; let BaseOpcode = "A2_add"; }
int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { enum Hexagon::PredSense inPredSense; inPredSense = invertPredicate ? Hexagon::PredSense_false : Hexagon::PredSense_true; int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense); if (CondOpcode >= 0) // Valid Conditional opcode/instruction return CondOpcode; llvm_unreachable("Unexpected predicable instruction"); }
在 Hexagon Dot-New 指令 (Hexagon DSP Architecture) 的應用。原本 VLIW 的一個 bundle (packet) 內的指令不會有依賴關係,為了增加 bundle 內的指令個數,引入 Dot-New 指令,使得 bundle 內的指令可以有依賴關係。
isLegalToPacketizeTogether
檢視 SUI 是否能和 SUJ 做成一個 packet。bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // SUJ 是當前 packet 內已存在的指令,檢查 SUI 是否和 SUJ 有依賴,以及該種依賴是否可以處理, // 使得 SUI 和 SUJ 可以放在同一個 packet。 for (unsigned i = 0; i < SUJ->Succs.size(); ++i) { if (SUJ->Succs[i].getSUnit() != SUI) continue; // SUI 和 SUJ 是何種依賴? SDep::Kind DepType = SUJ->Succs[i].getKind(); // For instructions that can be promoted to dot-new, try to promote. if (DepType == SDep::Data) { if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) { if (promoteToDotNew(I, DepType, II, RC)) { PromotedToDotNew = true; if (cannotCoexist(I, J)) FoundSequentialDependence = true; continue; } } if (HII->isNewValueJump(I)) continue; }
promoteToDotNew
透過 InstrMapping 生成的接口,返回當前指令對應的 Dot-New 版本。// Promote an instruction to its .new form. At this time, we have already // made a call to canPromoteToDotNew and made sure that it can *indeed* be // promoted. bool HexagonPacketizerList::promoteToDotNew(MachineInstr &MI, SDep::Kind DepType, MachineBasicBlock::iterator &MII, const TargetRegisterClass* RC) { assert (DepType == SDep::Data); int NewOpcode; if (RC == &Hexagon::PredRegsRegClass) NewOpcode = HII->getDotNewPredOp(MI, MBPI); else NewOpcode = HII->getDotNewOp(MI); MI.setDesc(HII->get(NewOpcode)); return true; }
先介紹後端主要目錄結構。後端代碼位於 include/llvm
和 lib
底下幾個目錄:
tblgen
代碼。LLVM 利用 tblgen
讀取目標平台 *.td 檔生成 instruction selector,instruction scheduling,register allocation 和 assembly printing。$ cd lib/Target/X86 $ llvm-tblgen X86.td -print-enums -class=Register -I ../../../include/ $ llvm-tblgen X86.td -print-enums -class=Instruction -I ../../../include/
tblgen
語法近似 C++ template。*.td 檔主體為 record,record 分為兩類: class 和 definition。class 是模板 record,definition 是實例 record。$ cat insns.td // 指令基類 class Insn<bits <4> MajOpc, bit MinOpc> { bits<32> insnEncoding; // 指令編碼,長度 32 位。 let insnEncoding{15-12} = MajOpc; // 第 15 至 12 位為 MajOpc let insnEncoding{11} = MinOpc; // 第 11 位為 MinOpc } multiclass RegAndImmInsn<bits <4> opcode> { def rr : Insn<0x00, 0>; // MajOpc 皆為 0x00, MinOpc 分別為 0 和 1。 def ri : Insn<0x00, 1>; } // SUB 指令 def SUB: Insn<0x00, 0>; // MajOpc = 0x00, MinOpc = 0 // ADD 指令。運算元可以是暫存器 + 暫存器,或是暫存器 + 立即數。 defm ADD : RegAndImmInsn<0x01>; $ llvm-tblgen -print-records inst.td
這裡介紹開發後端會碰到的主要幾個檔案。參考文檔:
以 X86 為例,X86TargetMachine.h 和 X86.td 是兩個最主要的檔案。上層平台無關代碼生成算法透過 X86TargetMachine.h 的接口得到底層平台相關資訊。X86.td 描述 ISA 擴展和處理器家族,並 include 其它 *.td 檔。
X86RegisterInfo.td
生成的 X86GenRegisterInfo.inc
。提供關於暫存器組的各項資訊,諸如: 被調用方保存暫存器,暫存器分配次序等等。include/llvm/Target/Target.td
,從 DAG (MI) 轉成目標匯編。include/llvm/Target/TargetSelectionDAG.td
定義各種 SDNode 型別。X86ISelLowering.h
繼承 TargetLowering 實現回調,並定義平台相關的 SDNode。getExceptionPointerRegister
和 getExceptionSelectorRegister
。參考 X86ISelLowering.cpp。例外處理需要傳遞兩個指針,一個指向例外本身,另一個指向例外的型別。前者由 getExceptionPointerRegister
傳遞,後者由 getExceptionSelectorRegister
傳遞。EH_RETURN
,以 Mips 為例。參考 MipsInstrInfo.td 的註解:// Exception handling related node and instructions. // The conversion sequence is: // ISD::EH_RETURN -> MipsISD::EH_RETURN -> // MIPSeh_return -> (stack change + indirect branch)
MipsTargetLowering::lowerEH_RETURN
(MipsISelLowering.cpp) 將 ISD::EH_RETURN
替換成 MipsISD::EH_RETURN
。ISD::EH_RETURN
的 offset 和 handler 選擇可用的暫存器傳遞,MipsTargetLowering::lowerEH_RETURN
選擇使用暫存器 V0 和 V1 傳遞。MipsDAGToDAGISel::Select
(MipsDAGToDAGISel.cpp) 將 MipsISD::EH_RETURN
匹配成 MIPSeh_return32
/MIPSeh_return64
。MIPSeh_return32
/MIPSeh_return64
是在 MipsInstrInfo.td 定義的偽指令。def SDT_MipsEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>; def MIPSehret : SDNode<"MipsISD::EH_RETURN", SDT_MipsEHRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1, isCTI = 1 in { def MIPSeh_return32 : MipsPseudo<(outs), (ins GPR32:$spoff, GPR32:$dst), [(MIPSehret GPR32:$spoff, GPR32:$dst)]>; // lowerEH_RETURN 生成 (MipsISD::EH_RETURN V0, V1) // 和此處需要一致。 def MIPSeh_return64 : MipsPseudo<(outs), (ins GPR64:$spoff, GPR64:$dst), [(MIPSehret GPR64:$spoff, GPR64:$dst)]>; }
MipsSEInstrInfo::expandEhReturn
(MipsSEInstrInfo.cpp) 擴展偽指令 MIPSeh_return32
/MIPSeh_return64
,並生成指令,用 V0 調整棧,並將 V1 作為返回地址。EH_RETURN
,以 XCore 為例。於 XCoreInstrInfo.td 定義 EH_RETURN
偽指令。於 XCoreISelLowering.h 定義 XCoreISD::EH_RETURN
。XCoreTargetLowering::LowerEH_RETURN
(XCoreISelLowering.cpp) 將 ISD::EH_RETURN
轉成 XCoreISD::EH_RETURN
,並將 offset 和 handler 保存至暫存器 R2 和 R3。__builtin_eh_return
在 _Unwind_RaiseException
被調用,其中 caller-saved 暫存器 R0 和 R1 已被 personality function 用於傳參,因此這裡使用暫存器 R2 和 R3 傳遞 offset 和 handler。 XCoreDAGToDAGISel::Select
(XCoreISelDAGToDAG.cpp) 將 XCoreISD::EH_RETURN
替換成 EH_RETURN
偽指令。XCoreFrameLowering::emitEpilogue
(XCoreFrameLowering.cpp) 擴展偽指令 XCore::EH_RETURN
,並生成指令,用 R2 調整棧,並將 R3 作為返回地址。EH_RETURN
,以 Hexagon 為例。HexagonTargetLowering::LowerEH_RETURN
(HexagonISelLowering.cpp) 將 ISD::EH_RETURN
轉成 HexagonISD::EH_RETURN
,並將 offset 存到暫存器 R28,handler 存到暫存器 R30 所指向的內存。HexagonDAGToDAGISel::Select
(HexagonISelDAGToDAG.cpp) 將 HexagonISD::EH_RETURN
替換成 EH_RETURN_JMPR
偽指令。HexagonFrameLowering::insertEpilogueInBlock
(HexagonFrameLowering.cpp) 替換 EH_RETURN_JMPR
偽指令,並生成指令,用 R28 調整棧,並將 R30 所存的值作為返回地址。EH_RETURN
,以 X86 為例。X86TargetLowering::LowerEH_RETURN
(X86ISelLowering.cpp) 將 ISD::EH_RETURN
轉成 X86ISD::EH_RETURN
,並將 handler 存在 frame + offset 偏移處,且將 frame + offset 存在 ECX。X86DAGToDAGISel::Select
將 X86ISD::EH_RETURN
替換成 X86::EH_RETURN
偽指令。emitPrologue
/emitEpilogue
,以 Mips 為例。可以透過 MipsFunctionInfo 或是 MachineFunctionInfo 定義的 callsEHReturn
得知函式是否有調用 eh_return。MipsSEFrameLowering::emitPrologue
(MipsSEFrameLowering.cpp)。注意!此處需要為例外處理需要用到的暫存器生成對應的 cfi。if (MipsFI->callsEhReturn()) { // Insert instructions that spill eh data registers. for (int I = 0; I < 4; ++I) { if (!MBB.isLiveIn(ABI.GetEhDataReg(I))) MBB.addLiveIn(ABI.GetEhDataReg(I)); TII.storeRegToStackSlot(MBB, MBBI, ABI.GetEhDataReg(I), false, MipsFI->getEhDataRegFI(I), RC, &RegInfo); } // Emit .cfi_offset directives for eh data registers. for (int I = 0; I < 4; ++I) { int64_t Offset = MFI.getObjectOffset(MipsFI->getEhDataRegFI(I)); unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true); unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, Offset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } }
MipsSEFrameLowering::emitEpilogue
(MipsSEFrameLowering.cpp)。if (MipsFI->callsEhReturn()) { const TargetRegisterClass *RC = ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; // Find first instruction that restores a callee-saved register. MachineBasicBlock::iterator I = MBBI; for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i) --I; // Insert instructions that restore eh data registers. for (int J = 0; J < 4; ++J) { TII.loadRegFromStackSlot(MBB, I, ABI.GetEhDataReg(J), MipsFI->getEhDataRegFI(J), RC, &RegInfo); } }
MipsSEFrameLowering::determineCalleeSaves
(MipsSEFrameLowering.cpp) 會調用 MipsFunctionInfo::createEhDataRegsFI
(MipsMachineFunction.cpp) 為例外處理使用到的暫存器,創建 spill 空間。emitPrologue
/emitEpilogue
依據該位置生成 load/store 代碼。emitPrologue
/emitEpilogue
,以 XCore 為例。XCoreFrameLowering::emitPrologue
(XCoreFrameLowering.cpp)。if (XFI->hasEHSpillSlot()) { // The unwinder requires stack slot & CFI offsets for the exception info. // We do not save/spill these registers. const Function *Fn = &MF.getFunction(); const Constant *PersonalityFn = Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr; SmallVector<StackSlotInfo, 2> SpillList; GetEHSpillList(SpillList, MFI, XFI, PersonalityFn, MF.getSubtarget().getTargetLowering()); assert(SpillList.size()==2 && "Unexpected SpillList size"); EmitCfiOffset(MBB, MBBI, dl, TII, MRI->getDwarfRegNum(SpillList[0].Reg, true), SpillList[0].Offset); EmitCfiOffset(MBB, MBBI, dl, TII, MRI->getDwarfRegNum(SpillList[1].Reg, true), SpillList[1].Offset); }
XCoreFrameLowering::emitPrologue
(XCoreFrameLowering.cpp)。if (RetOpcode == XCore::EH_RETURN) { // 'Restore' the exception info the unwinder has placed into the stack // slots. const Function *Fn = &MF.getFunction(); const Constant *PersonalityFn = Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr; SmallVector<StackSlotInfo, 2> SpillList; GetEHSpillList(SpillList, MFI, XFI, PersonalityFn, MF.getSubtarget().getTargetLowering()); RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList); // Return to the landing pad. unsigned EhStackReg = MBBI->getOperand(0).getReg(); unsigned EhHandlerReg = MBBI->getOperand(1).getReg(); BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r)).addReg(EhStackReg); BuildMI(MBB, MBBI, dl, TII.get(XCore::BAU_1r)).addReg(EhHandlerReg); MBB.erase(MBBI); // Erase the previous return instruction. return; }
XCoreFrameLowering::determineCalleeSaves
(XCoreFrameLowering.cpp) 會調用 XCoreFunctionInfo::createEHSpillSlot
(XCoreMachineFunctionInfo.cpp) 為例外處理使用到的暫存器,創建 spill 空間。emitPrologue
/emitEpilogue
依據該位置生成 load/store 代碼。// 如果之前的代碼都不能處理,看是否能調用 libcall。 if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi); return; } // 如果 LC 對應的函式名為 nullptr,嘗試 ExpandShiftWithUnknownAmountBit 是否能加以處理。 if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi)) llvm_unreachable("Unsupported shift!");
if (countTrailingOnes(Imm) == countPopulation(Imm))
PPCMIPeephole::simplifyCode()
搜尋特定的 MachineInstr 加以優化。交叉編譯有底下術語:
/lib/n32
和 /lib/n64
分別是支持 MIPS n32 和 n64 ABI 的函式庫。交叉工具鏈,除了編譯器,基本牽涉到底下幾個元件:
–target=<triple>
(Triple.h)-march=<arch>
: 指定指令集架構版本。如: armv4t。此選項同時會設置預設的 CPU。-mcpu=<cpu>
: 指定 CPU。如: cortex-a8。此選項同時會設置預設的 float-abi。-mfloat-abi=<abi>
: 指定軟浮點或是硬浮點,以及 abi。詳細請見 LLVM Testing Infrastructure Guide。
$ ./build.Ninja/bin/llvm-lit llvm/test/CodeGen/* $ ./build.Ninja/bin/llvm-lit llvm/tools/clang/test/CodeGen/*
;CHECK:
匹配指定的字串。多個 ;CHECK:
依序匹配。;CHECK:
和 ;CHECK-NEXT:
搭配可以匹配連續的字串。;CHECK-LABEL:
;CHECK-LABEL:
後的字串,把欲匹配的文本切成數個 block。每個 block 再應用各自的 ;CHECK:
。 發行版測試流程請見 How To Validate a New Release。一般在預定發行日期前一個月,會公告徵求測試者 ([Release-testers] [5.0.0 Release] Schedule and call for testers)。
update_llc_test_checks.py
。臭蟲請發送到這裡 並先閱讀 How To Submit A Bug。目前發送補丁方式請參照 Code Reviews with Phabricator。
git diff -U999999 master > b.patch
[Doc]
。