# git clone git://sourceware.org/git/binutils-gdb.git $ wget http://ftp.gnu.org/gnu/binutils/binutils-2.21.1.tar.bz2; tar xvf binutils-2.21.tar.bz2 # 只編譯 gas $ mkdir build; cd build $ ../binutils-2.21.1/configure --enable-as --prefix=$INSTALL $ make; make install
主要目錄:
以 xtensa 為例 (xtensa 需要處理 VLIW。也可以參考 Add support for Andes NDS32):
fragP→tc_frag_data
取得。tinsn_to_insnbuf (&tinsn, insnbuf); // tinsn -> insnbuf // insnbuf -> chars xtensa_insnbuf_to_chars (xtensa_default_isa, insnbuf, (unsigned char *) buf, 0);
開發流程:
$ as t.s -o t.o $ objdump -D t.o
$ as t.s -o t.o # 觀察重定位項是否正確。 $ objdump -r t.o $ objdump -D t.o # 觀察鏈結器是否能正確修正重定位項。 $ ld t.o $ objdump -D a.out
# -d 只反匯編包含指令的部分,-D 對所有的段進行反匯編,-s 把所有段的內容以 16 進制的方式印出。 $ objdump -d hello # 只列出特定 function 的組語。 $ objdump -d hello | grep -A20 main.: # 編譯時加上 -g,顯示源碼。 $ objdump -S hello # -h 顯示段表,-p 顯示 program header $ objdump -h hello # -t 顯示 symbol table,-T 顯示 dynamic symbol table $ objdump -t hello # 顯示 ELF header 詳細訊息 $ readelf -h hello
$ readelf -h hello | less ELF Header: Magic: 7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00 Class: ELF32 Data: 2's complement, little endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: EXEC (Executable file) Machine: ARM Version: 0x1 Entry point address: 0x8309 Start of program headers: 52 (bytes into file) Start of section headers: 4516 (bytes into file) Flags: 0x5000002, has entry point, Version5 EABI Size of this header: 52 (bytes) Size of program headers: 32 (bytes) Number of program headers: 9 Size of section headers: 40 (bytes) Number of section headers: 30 Section header string table index: 27
ELF 格式最主要精神在於用 header 描述 ELF 文件相關資訊,主要有三個 header (ELF文件):
目標文件需要鏈接器做進一步處理,所以一定有 section header table;可執行文件需要加載運行,所以一定有 program header table;而共享庫既要加載運行,又要在加載時做動態鏈接,所以既有 section header table 又有 program header table。
有幾個特殊段:
/* We want .cfi_* pseudo-ops for generating unwind info. */ #define TARGET_USE_CFIPOP 1 #define DWARF2_DEFAULT_RETURN_COLUMN 31 #define DWARF2_CIE_DATA_ALIGNMENT (-4) #define tc_regname_to_dw2regnum nios2_regname_to_dw2regnum extern int nios2_regname_to_dw2regnum (char *regname); #define tc_cfi_frame_initial_instructions nios2_frame_initial_instructions extern void nios2_frame_initial_instructions (void);
匯編器 (Assembler ),讀入匯編代碼,輸出目的檔 (object file) 和清單檔 (listing file)。處理匯編指令、巨集和匯編指示符。
BEQ AB JMP AB AB:
USE
匯編指示符切換不同的 location counter,其實就是將代碼切成多個 section。#
前綴。如果平台不支持常量 operand,改以 literal pool 實現,將常量寫入 literal pool,再從 literal pool 載入常量。# 代碼行號 位址 目的碼 源代碼 line LC Object Source
FOO: .EQU BAR ; forward reference to BAR ; snip BAR: .EQU 0 ; BAR is defined here
鏈結器 (Linker) 負責將輸入目的檔中的各個段 (section) 合併,輸出至最終執行檔。鏈結器在各個輸出段之間,會依照對齊要求,插入填充値。另外,鏈結器在合併各輸入段時,也會依照對齊要求,插入填充値。
Each object file has, among other things, a list of sections. We sometimes refer to a section in an input file as an input section; similarly, a section in the output file is an output section.
A section may be marked as loadable, which means that the contents should be loaded into memory when the output file is run. A section with no contents may be allocatable, which means that an area in memory should be set aside, but nothing in particular should be loaded there (in some cases this memory must be zeroed out). A section which is neither loadable nor allocatable typically contains some sort of debugging information.
Every loadable or allocatable output section has two addresses. The first is the VMA, or virtual memory address. This is the address the section will have when the output file is run. The second is the LMA, or load memory address. This is the address at which the section will be loaded. In most cases the two addresses will be the same. An example of when they might be different is when a data section is loaded into ROM, and then copied into RAM when the program starts up (this technique is often used to initialize global variables in a ROM based system). In this case the ROM address would be the LMA, and the RAM address would be the VMA.
Every object file also has a list of symbols, known as the symbol table. A symbol may be defined or undefined. Each symbol has a name, and each defined symbol has an address, among other information. If you compile a C or C\+\+ program into an object file, you will get a defined symbol for every defined function and global or static variable. Every undefined function or global variable which is referenced in the input file will become an undefined symbol.
You use the `:phdr' output section attribute to place a section in a particular segment.
$ ld -verbose
ENTRY(_start) // 指定入口函式 SECTIONS // 其中的 SECTIONS 命令指示 linker 如何合併目標文件的 section { // 定義在目標文件被引用,但未被所有目標文件定義的符號。 // SEGMENT_START 是內建函式,返回指定 segment 的基底位址。如果命令行參數沒有指定該 segment 的基底位址,返回預設值。 // . 這個特殊符號代表目前位址計數器的值。 PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS; // 將所有輸入目標文件的 .interp 合併成一個 .interp 至輸出目標文件。 .interp : { *(.interp) } // .init 是在進入 main 函式之前必須要執行的初始化,與之對應的是 .fini。 // 如果命令行參數有 --gc-sections,ld 會去掉輸入目標文件中被視為無用的 section。 // KEEP 代表保留該 section。 // [=FILLEXP] 代表該段空隙處以 FILEEXP 填滿。 .init : { KEEP (*(.init)) } =0x90909090 // 在 C++ 中,全局建構子必須在 main 函式之前被呼叫,全局解構子必須在 main 返回後被呼叫。 // 透過 crtbegin.o 和 crtend.o,以及 .ctors 和 .dtors 可達成以上目的。 .ctors : { KEEP (*crtbegin.o(.ctors)) KEEP (*crtbegin?.o(.ctors)) // 在對 .ctors 排序之前,不包含 crtend 的 .ctors。 // crtend 的 .ctors 包含 ctors 結尾的標記,必須擺在最後。 KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors)) KEEP (*(SORT(.ctors.*))) KEEP (*(.ctors)) } // .bss 預留空間給未初始化的全局變量和局部靜態變量。 .bss : { *(.dynbss) *(.bss .bss.* .gnu.linkonce.b.*) *(COMMON) // ALIGN(exp,align) . = ALIGN(. != 0 ? 64 / 8 : 1); } // ALIGN(align) . = ALIGN(64 / 8); _end = .; PROVIDE (end = .); }
In some cases, it is desirable for a linker script to define a symbol only if it is referenced and is not defined by any object included in the link.
對於 section 來說,有虚拟地址 (Virtual Memory Address) 或加載地址 (Load Memory Address) 之分。虛擬地址代表該 section 被執行時所在的位址; 加載地址代表該 section 被加載時所在的位址。一般情況下,虚拟地址與加載地址具有相同的值。但對於嵌入式系統來說,可能會出現加載該 section 至 ROM (flash),等到執行時再把該 section 複製到 RAM (內存) 再開始執行。
嵌入式系統的鏈結腳本範例。
// MEMORY 描述目標平台內存區塊起始位址和長度,也可以指定區塊的存取權限。 // ld 預設可以使用內存任意位址。MEMORY 可以指定 ld 使用那些區塊。 MEMORY { // 定義 rom 和 ram 兩個內存區塊,並指定其起始位址和長度。 rom : ORIGIN = 0x0, LENGTH = 32K ram : ORIGIN = 0x8000, LENGTH = 32K } SECTIONS { .loader : { *(.loader) } > rom // .loader 段放在 rom 區塊。 .text ram_start : { text_start = . ; *(.text) text_end = . ; } AT > rom // 指定该 section 加载地址的范围, }
Any otherwise unspecified regions of memory within the output section (for example, gaps left due to the required alignment of input sections) will be filled with the value, repeated as necessary.
The FILL command is similar to the `=fillexp' output section attribute, but it only affects the part of the section following the FILL command, rather than the entire section. If both are used, the FILL command takes precedence.
// .fini_array 是 m_text 內存區段中最後一個段。 .fini_array : { PROVIDE_HIDDEN (__fini_array_start = .); KEEP (*(SORT(.fini_array.*))) KEEP (*(.fini_array*)) PROVIDE_HIDDEN (__fini_array_end = .); // 在此段中定義符號 ___ROM_AT 供外部使用。因為填充段 .fill 的存在,將此符號定義移到 .fill。 /*___ROM_AT = .; */ } > m_text // .fill 為人工創造的填充段。 .fill : { // 自 FILL 以下填充 0xDEADBEEF。 FILL(0xDEADBEEF); // 設置位址指示符,並填充 .fill 最後一個 byte。此處是為了 .fill 有內容。 . = ORIGIN(m_text) + LENGTH(m_text) - 1; BYTE(0xAA) // 定義原本定義在 .fini_array 段的符號 ___ROM_AT。 ___ROM_AT = .; } > m_text
.fill <start address>: { FILL(<fill pattern>); . += <end address> - <start address> - 1; BYTE(<fill pattern>) } > ram // 自位址 0x3b0 開始,到 0x3ff 為止,填充 0xCC。 .fill 0x3b0: { FILL(0xCC); . += 0x400 - 0x3b0 - 1; BYTE(0xCC) } > ram
位址無關代碼 (Position Independent Code (PIC))。
# 列出 LD_DEBUG 有哪些選項 $ LD_DEBUG=help ./a.out # 顯示 runtime linker 做了哪些事 $ LD_DEBUG=all ./a.out
# 將映像檔移至 4G 虛擬位址以上。-fPIE 是編譯器選項,-pie 是鏈結器選項。 $ gcc -Wl,-Ttext-segment=0x100000000 -fPIE -pie hello.c -o hello $ ./hello $ cat /proc/`pidof hello`/maps
-mcmodel
選項。預設情況下所鏈結的函式庫都是以 -mcmodel=small
編譯,因此必須在虛擬位址 2G 以下的空間。Before adding a PIE mode the program's executable can't be placed at randomly address in memory, only PIC dynamic libraries can be relocated to random. http://stackoverflow.com/questions/2463150/fpie-position-independent-executable-option-gcc-ld
-M --print-map Print a link map to the standard output. A link map provides information about the link, including the following: · Where object files are mapped into memory. · How common symbols are allocated. · All archive members included in the link, with a mention of the symbol which caused the archive member to be brought in. · The values assigned to symbols. Note - symbols whose values are computed by an expression which involves a reference to a previous value of the same symbol may not have correct result displayed in the link map. This is because the linker discards intermediate results and only retains the final value of an expression. Under such circumstances the linker will display the final value enclosed by square brackets. Thus for example a linker script containing: foo = 1 foo = foo * 4 foo = foo + 8 will produce the following output in the link map if the -M option is used: 0x00000001 foo = 0x1 [0x0000000c] foo = (foo * 0x4) [0x0000000c] foo = (foo + 0x8) See Expressions for more information about expressions in linker scripts.
; section vma size lma .ovly0 0x00100100 0x1f4 load address 0x00108000
# Project.out 為輸入,Project.bin 為輸出。 $ arm-none-eabi-objcopy.exe -O binary Project.out Project.bin
An overlay description provides an easy way to describe sections which are to be loaded as part of a single memory image but are to be run at the same memory address. At run time, some sort of overlay manager will copy the overlaid sections in and out of the runtime memory address as required, perhaps by simply manipulating addressing bits. This approach can be useful, for example, when a certain region of memory is faster than another.
sections which are to be loaded as part of a single memory image
but are to be run at the same memory address.
The sections are all defined with the same starting address. The load addresses of the sections are arranged such that they are consecutive in memory starting at the load address used for the OVERLAY as a whole.
If the NOCROSSREFS keyword is used, and there are any references among the sections, the linker will report an error. Since the sections all run at the same address, it normally does not make sense for one section to refer directly to another.
For each section within the OVERLAY, the linker automatically provides two symbols. The symbol load_start_secname is defined as the starting load address of the section. The symbol load_stop_secname is defined as the final load address of the section.
/* Overlay sections: */ /* section vma : lma */ .ovly0 0x1001000 : AT (0x108000) { foo.o(.text) } .ovly1 0x1001000 : AT (0x109000) { bar.o(.text) } .ovly2 0x1002000 : AT (0x10a000) { baz.o(.text) } .ovly3 0x1002000 : AT (0x10b000) { grbx.o(.text) } .data00 0x2001000 : AT (0x10c000) { foo.o(.data) } .data01 0x2001000 : AT (0x10d000) { bar.o(.data) } .data02 0x2002000 : AT (0x10e000) { baz.o(.data) } .data03 0x2002000 : AT (0x10f000) { grbx.o(.data) } .text : { /* snip */ } /* snip */ .data : { _ovly_table = .; LONG(ABSOLUTE(ADDR(.ovly0))); /* VMA */ LONG(SIZEOF(.ovly0)); /* SIZE */ LONG(LOADADDR(.ovly0)); /* LMA */ LONG(0); /* MAPPED */ /* snip */ LONG(ABSOLUTE(ADDR(.data01))); LONG(SIZEOF(.data01)); LONG(LOADADDR(.data01)); LONG(0); /* snip */ _novlys = .; LONG((_novlys - _ovly_table) / 16); /* snip */ }
int main () { int a, b, c, d, e; OverlayLoad (0); // 載入 foo 的 .text 段 OverlayLoad (4); // 載入 foo 的 .data 段 a = foo (1); // a = 'f' + 'o' + 'o' OverlayLoad (1); // 載入 bar 的 .text 段 OverlayLoad (5); // 載入 bar 的 .data 段 b = bar (1); // b = 'b' + 'a' + 'r' OverlayLoad (2); // 載入 baz 的 .text 段 OverlayLoad (6); // 載入 baz 的 .data 段 c = baz (1); // c = 'b' + 'a' + 'z' OverlayLoad (3); // 載入 grbx 的 .text 段 OverlayLoad (7); // 載入 grbx 的 .data 段 d = grbx (1); // d = 'g' + 'r' + 'b' + 'x' e = a + b + c + d; return (e != ('f' + 'o' +'o' + 'b' + 'a' + 'r' + 'b' + 'a' + 'z' + 'g' + 'r' + 'b' + 'x')); }
bool OverlayLoad (unsigned long ovlyno) { unsigned long i; if (ovlyno < 0 || ovlyno >= _novlys) exit (-1); /* fail, bad ovly number */ if (_ovly_table[ovlyno][MAPPED]) return TRUE; /* this overlay already mapped -- nothing to do! */ for (i = 0; i < _novlys; i++) if (i == ovlyno) _ovly_table[i][MAPPED] = 1; /* this one now mapped */ else if (_ovly_table[i][VMA] == _ovly_table[ovlyno][VMA]) _ovly_table[i][MAPPED] = 0; /* this one now un-mapped */ // 對 memcpy 的包裝。在調用 memcpy 之前,可能需要對位址做轉換。 ovly_copy (_ovly_table[ovlyno][VMA], /* dst */ _ovly_table[ovlyno][LMA], /* src */ _ovly_table[ovlyno][SIZE]); FlushCache (); // 目標平台若有快取 (cache),需要將其內容清除。 _ovly_debug_event (); // 提供 GDB 下斷點,在 _ovly_table 內容被更改時,通知 GDB。 return TRUE; }
# 預設即為 Berkeley 格式。System V 格式列出較詳細的資訊,但這裡不需要。 $ size --format=Berkeley ranlib size text data bss dec hex filename 294880 81920 11592 388392 5ed28 ranlib 294880 81920 11888 388688 5ee50 size
如果一個程式庫 A 需要依賴程式庫 B,在連結命令中 A 應該要放在 B 之前。
–start-group archives –end-group
The specified archives are searched repeatedly until no new undefined references are created. Normally, an archive is searched only once in the order that it is specified on the command line. If a symbol in that archive is needed to resolve an undefined symbol referred to by an object in an archive that appears later on the command line, the linker would not be able to resolve that reference. By grouping the archives, they all be searched repeatedly until all possible references are resolved.
–whole-archive
For each archive mentioned on the command line after the –whole-archive option, include every object file in the archive in the link, rather than searching the archive for the required object files.
$ cat hello.txt hello world! $ cat main.c #include <stdio.h> extern char _binary_hello_txt_start[]; int main() { char *p; p = _binary_hello_txt_start; printf("%s", p); return 0; } $ ld -r -b binary -o hello.o hello.txt $ gcc -o main.exe main.c hello.o $ ./main.exe hello world!
The weak attribute causes the declaration to be emitted as a weak symbol rather than a global. This is primarily useful in defining library functions that can be overridden in user code, though it can also be used with non-function declarations. Weak symbols are supported for ELF targets, and also for a.out targets when using the GNU assembler and linker.
// def.c #include <stdio.h> __attribute((weak)) int f() { printf("I am in def.c\n"); } // main.c #include <stdio.h> // override function f in def.c void f() { printf("I am in main.c\n"); } int main() { if (f) { f(); } else { printf("f() is not found\n"); } return 0; }
The weakref attribute marks a declaration as a weak reference.
#include <stdio.h> // 1. static int x() __attribute__ ((weakref ("y"))); // 2. static int x() __attribute__ ((weak, weakref, alias ("y"))); // 缺少參數的 weakref,必須伴隨對應的 alias,指定目標函式。 static int x() __attribute__ ((weakref)); static int x() __attribute__ ((alias ("y"))); static int y() { printf("I am in y()\n"); } int main() { x(); // I am in y() }
Without arguments, it should be accompanied by an alias attribute naming the target symbol.
static int x() __attribute__ ((weakref)); static int x() __attribute__ ((alias ("y")))
Optionally, the target may be given as an argument to weakref itself.
static int x() __attribute__ ((weakref ("y")));
In either case, weakref implicitly marks the declaration as weak. Without a target, given as an argument to weakref or to alias, weakref is equivalent to weak.
static int x() __attribute__ ((weak, weakref, alias ("y")));
COMMON其實主要的用途是用來讓linker做merge用的。因此uninitialized的global變數會被暫時放在COMMON section,等Linker做完merge之後再看情況搬到正確的section中,也可能繼續留在COMMON section。
ld: section .data_bank1 loaded at [0000000000002000,0000000000003fff] overlaps section .text loaded at [0000000000000630,00000000000020df]
// gcc -g hello.c // addr2line -e a.out 0x400536 #offset in the `main` function #include <stdio.h> int main() { printf("hello\n"); return 0; }