This file has four sections: a header, the program text, a
symbol table, and relocation bits. The last two may be
empty if the program was loaded with the -s option of ld or
if the symbols and relocation have been removed by strip.
The header always contains 6 words:
1 a "br .+14" instruction (205(8))
2 The size of the program text
3 The size of the symbol table
4 The size of the relocation bits area
5 The size of a data area
6 A zero word (unused at present)
The sizes of the program, symbol table, and relocation area
are in bytes but are always even. The branch instruction
serves both to identify the file and to jump to the text
entry point. The program text size includes the 6—word
header.
The data area is used when the file is executed; the exec
system call sets the program break to the sum of the text
size and this data size.
This is about as simple a binary format as possible, while still allowing
multiple binaries to refer to each other's symbols via the symbol table and
the relocation bits. The header and the text are copied into memory verbatim.
The data (initialized global variables) is generated by the loader before
program execution from another source. (You have to appreciate the beauty of
using a branch instruction as the first word since that allows you to load the
entire file into memory and start executing from the very first byte.)
After some years the 'a.out' format was expanded - now to include the data segment - while still remaining straightforward:
struct exec {
unsigned long a_midmag; /* version-specific magic number */
unsigned long a_text; /* text size */
unsigned long a_data; /* data size */
unsigned long a_bss; /* bss (uninitialized data) size */
unsigned long a_syms; /* symbol table size */
unsigned long a_entry; /* entry point address */
unsigned long a_trsize; /* text relocation table size */
unsigned long a_drsize; /* data relocation table size */
};
A few years ago (circa 2003) you could still use this type of 'a.out' format
on a Linux box. On FreeBSD a.out persisted until 1998 as the default binary
format as it was perfectly sufficient. The simplest program imaginable could
be constructed in assembly purely by hand, including the header:
$ cat >exit99.s ; a.out header .LONG 0407 ; OMAGIC number .LONG 12 ; text size .LONG 0 ; data size .LONG 0 ; bss (uninitialized data) size .LONG 0 ; symbol table size .LONG 0 ; entry point address .LONG 0 ; text relocation table size .LONG 0 ; data relocation table size ; the program main: mov eax,#1 ; choose system call (exit) mov ebx,#99 ; parameter (99) int #0x80 ; do the system call $ as86 -3 -b exit99 exit99.s $ chmod +x exit99 $ ./exit99 $ echo $? 99While Unix binary file formats have evolved - with COFF used in early versions of System V and the now dominant ELF from SVR4 - they still follow the same general idea.
0x00000000 text - loaded by exec(), pointed to by PC
data - loaded by exec()
bss (uninitialized data) - initialized to 0 by exec()
heap - grows "down" via brk()
.
.
.
stack - grows "up" via calls, pointed to by SP & FP
0xFFFFFFFF argv[] & envp [] - set up by exec()
The code and initialized data are copied from the binary file into the memory
by the loader, which is invoked by the exec() system call. For
instance, your code in KOS will rely on a function
load_user_program(), which includes the following code:
bzero(machine->mainMemory, size);
if (noffH.code.size > 0) {
lseek(fd, noffH.code.inFileAddr, 0);
read(fd, &(machine->mainMemory[noffH.code.virtualAddr]), noffH.code.size);
}
if (noffH.initData.size > 0) {
lseek(fd, noffH.initData.inFileAddr, 0);
read(fd, &(machine->mainMemory[noffH.initData.virtualAddr]), noffH.initData.size);
}
As is often the case, the reality of memory layout in modern-day systems is
more complicated, especially now that operating systems try to randomize
placement of dynamic libraries within process address spaces, but the key
parts are still recognizable. Here's output from a modern Linux box in 2008:
$ cat /proc/self/maps 00110000-00111000 r-xp 00110000 00:00 0 [vdso] 00950000-00970000 r-xp 00000000 08:07 1368185 /lib/ld-2.8.90.so 00971000-00972000 r--p 00020000 08:07 1368185 /lib/ld-2.8.90.so 00972000-00973000 rw-p 00021000 08:07 1368185 /lib/ld-2.8.90.so 00979000-00ae7000 r-xp 00000000 08:07 1368186 /lib/libc-2.8.90.so 00ae7000-00ae9000 r--p 0016e000 08:07 1368186 /lib/libc-2.8.90.so 00ae9000-00aea000 rw-p 00170000 08:07 1368186 /lib/libc-2.8.90.so 00aea000-00aed000 rw-p 00aea000 00:00 0 08048000-08054000 r-xp 00000000 08:07 16424 /bin/cat 08054000-08055000 rw-p 0000c000 08:07 16424 /bin/cat 09f36000-09f57000 rw-p 09f36000 00:00 0 [heap] b7ebb000-b80bb000 r--p 00000000 08:07 2809859 /usr/lib/locale/locale-archive b80bb000-b80bd000 rw-p b80bb000 00:00 0 bfbbe000-bfbd3000 rw-p bffeb000 00:00 0 [stack](In case you're curious, [vdso] stands for Virtual Dynamic Shared Objects, visible as linux-gate.so.1 when you run
% pstree -A init-+-acpid |-crond |-klogd |-ntpd |-90*[ssh-agent] |-sshd-+-8*[sshd---sshd---bash] | |-sshd---sshd---csh---bash---pine | |-sshd---sshd---csh---bash | |-sshd---sshd---csh---vim | |-sshd---sshd---bash---screen | |-sshd---sshd---csh---pstree | `-sshd---sshd---tcsh---pico |-sshd---csh---sftp-server |-syslogd `-xfsThere are two implementation questions:
/*
* One structure allocated per active
* process. It contains all data needed
* about the process while the
* process may be swapped out.
* Other per process data (user.h)
* is swapped with the process.
*/
struct proc {
char p_stat;
char p_flag;
char p_pri; /* priority, negative is high */
char p_time; /* resident time for scheduling */
char p_cpu; /* cpu usage for scheduling */
char p_nice; /* nice for cpu usage */
short p_sig; /* signals pending to this process */
short p_uid; /* user id, used to direct tty signals */
short p_pgrp; /* name of process group leader */
short p_pid; /* unique process id */
short p_ppid; /* process id of parent */
short p_addr; /* address of swappable image */
short p_size; /* size of swappable image (clicks) */
caddr_t p_wchan; /* event process is awaiting */
struct text *p_textp; /* pointer to text structure */
struct proc *p_link; /* linked list of running processes */
int p_clktim; /* time to alarm clock signal */
};
extern struct proc proc[]; /* the proc table itself */
/*
* The user structure.
* One allocated per process.
* Contains all per process data
* that doesn't need to be referenced
* while the process is swapped.
* The user block is USIZE*64 bytes
* long; resides at virtual kernel
* loc 140000; contains the system
* stack per user; is cross referenced
* with the proc structure for the
* same process.
*/
struct user
{
label_t u_rsav; /* save info when exchanging stacks */
int u_fper; /* FP error register */
int u_fpsaved; /* FP regs saved for this proc */
struct {
int u_fpsr; /* FP status register */
double u_fpregs[6]; /* FP registers */
} u_fps;
char u_segflg; /* IO flag: 0:user D; 1:system; 2:user I */
char u_error; /* return error code */
short u_uid; /* effective user id */
short u_gid; /* effective group id */
short u_ruid; /* real user id */
short u_rgid; /* real group id */
struct proc *u_procp; /* pointer to proc structure */
int *u_ap; /* pointer to arglist */
union { /* syscall return values */
struct {
int r_val1;
int r_val2;
};
off_t r_off;
time_t r_time;
} u_r;
caddr_t u_base; /* base address for IO */
unsigned int u_count; /* bytes remaining for IO */
off_t u_offset; /* offset in file for IO */
struct inode *u_cdir; /* pointer to inode of current directory */
struct inode *u_rdir; /* root directory of current process */
char u_dbuf[DIRSIZ]; /* current pathname component */
caddr_t u_dirp; /* pathname pointer */
struct direct u_dent; /* current directory entry */
struct inode *u_pdir; /* inode of parent directory of dirp */
int u_uisa[16]; /* prototype of segmentation addresses */
int u_uisd[16]; /* prototype of segmentation descriptors */
struct file *u_ofile[NOFILE]; /* pointers to file structures of open files */
char u_pofile[NOFILE]; /* per-process flags of open files */
int u_arg[5]; /* arguments to current system call */
unsigned u_tsize; /* text size (clicks) */
unsigned u_dsize; /* data size (clicks) */
unsigned u_ssize; /* stack size (clicks) */
label_t u_qsav; /* label variable for quits and interrupts */
label_t u_ssav; /* label variable for swapping */
int u_signal[NSIG]; /* disposition of signals */
time_t u_utime; /* this process user time */
time_t u_stime; /* this process system time */
time_t u_cutime; /* sum of childs' utimes */
time_t u_cstime; /* sum of childs' stimes */
int *u_ar0; /* address of users saved R0 */
struct { /* profile arguments */
short *pr_base; /* buffer base */
unsigned pr_size; /* buffer size */
unsigned pr_off; /* pc offset */
unsigned pr_scale; /* pc scaling */
} u_prof;
char u_intflg; /* catch intr from sys */
char u_sep; /* flag for I and D separation */
struct tty *u_ttyp; /* controlling tty pointer */
dev_t u_ttyd; /* controlling tty dev */
struct { /* header of executable file */
int ux_mag; /* magic number */
unsigned ux_tsize; /* text size */
unsigned ux_dsize; /* data size */
unsigned ux_bsize; /* bss size */
unsigned ux_ssize; /* symbol table size */
unsigned ux_entloc; /* entry location */
unsigned ux_unused;
unsigned ux_relflg;
} u_exdata;
char u_comm[DIRSIZ];
time_t u_start;
char u_acflag;
short u_fpflag; /* unused now, will be later */
short u_cmask; /* mask for file creation */
int u_stack[1];
/* kernel stack per user
* extends from u + USIZE*64
* backward not to reach here
*/
};
That's quite a mouthful. Don't worry, in kos_start, my PCB was a little bit simpler:
typedef struct {
int reg [NumTotalRegs];
} PCB;
Waiting processes may reside in one of several queues: