aboutsummaryrefslogtreecommitdiff
path: root/src/kernel
diff options
context:
space:
mode:
authorAlex Auvolat <alex.auvolat@ens.fr>2015-02-09 17:39:41 +0100
committerAlex Auvolat <alex.auvolat@ens.fr>2015-02-09 17:40:03 +0100
commitf2c51bc81d2aa618b29ddbeaae5ac1c5308821f0 (patch)
treefae67a79d5e60128d074550326a05216694a5848 /src/kernel
parenta5dfdd2b3fa91a2cda4f807c88bd35928e3c7a61 (diff)
downloadkogata-f2c51bc81d2aa618b29ddbeaae5ac1c5308821f0.tar.gz
kogata-f2c51bc81d2aa618b29ddbeaae5ac1c5308821f0.zip
Reorganize all.
Diffstat (limited to 'src/kernel')
-rw-r--r--src/kernel/Makefile14
-rw-r--r--src/kernel/config.h27
-rw-r--r--src/kernel/core/context_switch.s58
-rw-r--r--src/kernel/core/dbglog.c159
-rw-r--r--src/kernel/core/frame.c85
-rw-r--r--src/kernel/core/gdt.c90
-rw-r--r--src/kernel/core/idt.c253
-rw-r--r--src/kernel/core/interrupt.s126
-rw-r--r--src/kernel/core/kmain.c215
-rw-r--r--src/kernel/core/kmalloc.c52
-rw-r--r--src/kernel/core/loader.s86
-rw-r--r--src/kernel/core/paging.c298
-rw-r--r--src/kernel/core/region.c397
-rw-r--r--src/kernel/core/sys.c25
-rw-r--r--src/kernel/core/thread.c208
l---------src/kernel/include/config.h1
-rw-r--r--src/kernel/include/dbglog.h8
-rw-r--r--src/kernel/include/frame.h14
-rw-r--r--src/kernel/include/gdt.h17
-rw-r--r--src/kernel/include/idt.h79
-rw-r--r--src/kernel/include/kmalloc.h11
-rw-r--r--src/kernel/include/multiboot.h63
-rw-r--r--src/kernel/include/paging.h31
-rw-r--r--src/kernel/include/process.h43
-rw-r--r--src/kernel/include/region.h38
-rw-r--r--src/kernel/include/sys.h53
-rw-r--r--src/kernel/include/thread.h46
-rw-r--r--src/kernel/linker.ld43
-rw-r--r--src/kernel/user/process.c30
29 files changed, 2570 insertions, 0 deletions
diff --git a/src/kernel/Makefile b/src/kernel/Makefile
new file mode 100644
index 0000000..3fc7e9c
--- /dev/null
+++ b/src/kernel/Makefile
@@ -0,0 +1,14 @@
+
+OBJ = core/loader.o core/kmain.o core/dbglog.o core/sys.o \
+ core/gdt.o core/idt.o core/interrupt.o core/context_switch.o core/thread.o \
+ core/frame.o core/paging.o core/region.o core/kmalloc.o
+
+LIB = ../common/common.lib
+
+CFLAGS = -I ./include -I ../common/include
+
+LDFLAGS = -T linker.ld -Xlinker -Map=kernel.map
+
+OUT = kernel.bin
+
+include ../rules.make
diff --git a/src/kernel/config.h b/src/kernel/config.h
new file mode 100644
index 0000000..a0e59d3
--- /dev/null
+++ b/src/kernel/config.h
@@ -0,0 +1,27 @@
+#if !defined(__cplusplus)
+#include <stdbool.h>
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__linux__)
+#error "This kernel needs to be compiled with a cross-compiler."
+#endif
+
+#if !defined(__i386__)
+#error "This kernel needs to be compiled with a ix86-elf compiler"
+#endif
+
+#define IN_KERNEL
+
+#define K_HIGHHALF_ADDR ((size_t)0xC0000000)
+
+#define OS_NAME "macrO.Scope"
+#define OS_VERSION "0.0.1"
+
+// Comment to disable either form of debug log output
+#define DBGLOG_TO_SERIAL
+#define DBGLOG_TO_SCREEN
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/core/context_switch.s b/src/kernel/core/context_switch.s
new file mode 100644
index 0000000..6738a03
--- /dev/null
+++ b/src/kernel/core/context_switch.s
@@ -0,0 +1,58 @@
+[EXTERN kernel_stack_top]
+[EXTERN run_scheduler]
+
+[GLOBAL save_context_and_enter_scheduler]
+; void save_context_and_enter_scheduler(struct saved_context *ctx);
+save_context_and_enter_scheduler:
+ pushf
+ cli
+ pusha ; Pushes edi,esi,ebp,esp,ebx,edx,ecx,eax
+
+ mov eax, cr3
+ push eax
+
+ mov eax, [esp+44] ; get address of saved_context structure
+ mov [eax], esp ; save esp
+ mov dword [eax+4], resume_saved_context ; save eip
+
+ mov esp, kernel_stack_top
+ jmp run_scheduler
+
+resume_saved_context:
+ pop eax
+ mov cr3, eax
+
+ popa
+ popf
+ ret
+
+[GLOBAL irq0_save_context_and_enter_scheduler]
+; meant to be called on IRQ0
+; general registers already saved by IRQ handler stub
+; flags already saved by interruption and interruptions disabled
+; only saves CR3
+irq0_save_context_and_enter_scheduler:
+ mov eax, cr3
+ push eax
+
+ mov eax, [esp+8] ; get address of saved_context structure
+ mov [eax], esp ; save esp
+ mov dword [eax+4], resume_saved_irq0_context ; save eip
+
+ mov esp, kernel_stack_top
+ jmp run_scheduler
+
+resume_saved_irq0_context:
+ pop eax
+ mov cr3, eax
+ ret
+
+
+[GLOBAL resume_context]
+resume_context:
+ mov eax, [esp+4] ; get address of saved context
+ mov esp, [eax] ; resume esp
+ mov ecx, [eax+4] ; jump to specified eip
+ jmp ecx
+
+; vim: set ts=4 sw=4 tw=0 noet :
diff --git a/src/kernel/core/dbglog.c b/src/kernel/core/dbglog.c
new file mode 100644
index 0000000..e042625
--- /dev/null
+++ b/src/kernel/core/dbglog.c
@@ -0,0 +1,159 @@
+#include <stdarg.h>
+#include <string.h>
+#include <printf.h>
+#include <mutex.h>
+
+#include <dbglog.h>
+#include <config.h>
+#include <sys.h>
+
+// ==================================================================
+
+#ifdef DBGLOG_TO_SCREEN
+
+static const size_t VGA_WIDTH = 80;
+static const size_t VGA_HEIGHT = 25;
+
+static uint8_t vga_color = 7;
+static uint16_t* vga_buffer = 0;
+static uint16_t vga_row = 0, vga_column = 0;
+
+static uint16_t make_vgaentry(char c, uint8_t color) {
+ uint16_t c16 = c;
+ uint16_t color16 = color;
+ return c16 | color16 << 8;
+}
+
+static void vga_putentryat(char c, uint8_t color, size_t x, size_t y) {
+ const size_t index = y * VGA_WIDTH + x;
+ vga_buffer[index] = make_vgaentry(c, color);
+}
+
+static void vga_update_cursor() {
+ uint16_t cursor_location = vga_row * 80 + vga_column;
+ outb(0x3D4, 14); //Sending high cursor byte
+ outb(0x3D5, cursor_location >> 8);
+ outb(0x3D4, 15); //Sending high cursor byte
+ outb(0x3D5, cursor_location);
+}
+
+static void vga_init() {
+ vga_row = 0;
+ vga_column = 0;
+ vga_buffer = (uint16_t*) (K_HIGHHALF_ADDR + 0xB8000);
+
+ for (size_t y = 0; y < VGA_HEIGHT; y++) {
+ for (size_t x = 0; x < VGA_WIDTH; x++) {
+ vga_putentryat(' ', vga_color, x, y);
+ }
+ }
+
+ vga_update_cursor();
+}
+
+static void vga_scroll() {
+ for (size_t i = 0; i < VGA_WIDTH * (VGA_HEIGHT - 1); i++) {
+ vga_buffer[i] = vga_buffer[i + VGA_WIDTH];
+ }
+ for (size_t x = 0; x < VGA_WIDTH; x++) {
+ vga_putentryat(' ', vga_color, x, VGA_HEIGHT - 1);
+ }
+ vga_row--;
+}
+
+static void vga_newline() {
+ vga_column = 0;
+ if (++vga_row == VGA_HEIGHT) {
+ vga_scroll();
+ }
+}
+
+static void vga_putc(char c) {
+ if (c == '\n') {
+ vga_newline();
+ } else if (c == '\t') {
+ vga_putc(' ');
+ while (vga_column % 4 != 0) vga_putc(' ');
+ } else {
+ vga_putentryat(c, vga_color, vga_column, vga_row);
+ if (++vga_column == VGA_WIDTH) {
+ vga_newline();
+ }
+ }
+}
+
+static void vga_puts(const char* data) {
+ size_t datalen = strlen(data);
+ for (size_t i = 0; i < datalen; i++)
+ vga_putc(data[i]);
+
+ vga_update_cursor();
+}
+
+#endif // DBGLOG_TO_SCREEN
+
+// ==================================================================
+
+#ifdef DBGLOG_TO_SERIAL
+
+#define SER_PORT 0x3F8 // COM1
+
+static void serial_init() {
+ outb(SER_PORT + 1, 0x00); // disable interrupts
+ outb(SER_PORT + 3, 0x80); // set baud rate
+ outb(SER_PORT + 0, 0x03); // set divisor to 3 (38400 baud)
+ outb(SER_PORT + 1, 0x00);
+ outb(SER_PORT + 3, 0x03); // 8 bits, no parity, one stop bit
+ outb(SER_PORT + 2, 0xC7); // enable FIFO, clear them, with 14-byte threshold
+}
+
+static void serial_putc(const char c) {
+ while (!(inb(SER_PORT + 5) & 0x20));
+ outb(SER_PORT, c);
+}
+
+static void serial_puts(const char *c) {
+ while (*c) {
+ serial_putc(*c);
+ c++;
+ }
+}
+
+#endif // DBGLOG_TO_SERIAL
+
+// ==================================================================
+
+STATIC_MUTEX(dbglog_mutex);
+
+void dbglog_setup() {
+ mutex_lock(&dbglog_mutex);
+#ifdef DBGLOG_TO_SCREEN
+ vga_init();
+#endif
+#ifdef DBGLOG_TO_SERIAL
+ serial_init();
+#endif
+ mutex_unlock(&dbglog_mutex);
+}
+
+void dbg_print(const char* str) {
+#ifdef DBGLOG_TO_SCREEN
+ vga_puts(str);
+#endif
+#ifdef DBGLOG_TO_SERIAL
+ serial_puts(str);
+#endif
+}
+
+void dbg_printf(const char* fmt, ...) {
+ va_list ap;
+ char buffer[256];
+
+ va_start(ap, fmt);
+ vsnprintf(buffer, 256, fmt, ap);
+ va_end(ap);
+
+ dbg_print(buffer);
+}
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/core/frame.c b/src/kernel/core/frame.c
new file mode 100644
index 0000000..489d010
--- /dev/null
+++ b/src/kernel/core/frame.c
@@ -0,0 +1,85 @@
+#include <frame.h>
+#include <dbglog.h>
+
+#include <mutex.h>
+
+// TODO: buddy allocator
+// this is a simple bitmap allocator
+
+#define INDEX_FROM_BIT(a) ((a)/(8*4))
+#define OFFSET_FROM_BIT(a) ((a)%(8*4))
+
+static uint32_t *frame_bitset;
+static uint32_t nframes, nused_frames;
+static uint32_t begin_search_at;
+
+void frame_init_allocator(size_t total_ram, void** kernel_data_end) {
+ nframes = PAGE_ID(total_ram);
+
+ frame_bitset = (uint32_t*)ALIGN4_UP((size_t)*kernel_data_end);
+ *kernel_data_end = (void*)frame_bitset + ALIGN4_UP(nframes / 8);
+
+ for (size_t i = 0; i < ALIGN4_UP(nframes / 8)/4; i++)
+ frame_bitset[i] = 0;
+
+ nused_frames = 0;
+
+ size_t kernel_pages = PAGE_ALIGN_UP((size_t)*kernel_data_end - K_HIGHHALF_ADDR)/PAGE_SIZE;
+ for (size_t i = 0; i < kernel_pages; i++) {
+ size_t idx = INDEX_FROM_BIT(i);
+ size_t ofs = OFFSET_FROM_BIT(i);
+ frame_bitset[idx] |= (0x1 << ofs);
+ nused_frames++;
+ }
+ begin_search_at = INDEX_FROM_BIT(kernel_pages);
+}
+
+STATIC_MUTEX(frame_allocator_mutex);
+
+uint32_t frame_alloc(size_t n) {
+ if (n > 32) return 0;
+
+ mutex_lock(&frame_allocator_mutex);
+ for (uint32_t i = begin_search_at; i < INDEX_FROM_BIT(nframes); i++) {
+ if (frame_bitset[i] == 0xFFFFFFFF) {
+ if (i == begin_search_at) begin_search_at++;
+ continue;
+ }
+
+ for (uint32_t j = 0; j < 32 - n + 1; j++) {
+ uint32_t to_test = (0xFFFFFFFF >> (32 - n)) << j;
+ if (!(frame_bitset[i]&to_test)) {
+ frame_bitset[i] |= to_test;
+ nused_frames += n;
+
+ mutex_unlock(&frame_allocator_mutex);
+ return i * 32 + j;
+ }
+ }
+ }
+ mutex_unlock(&frame_allocator_mutex);
+ return 0;
+}
+
+void frame_free(uint32_t base, size_t n) {
+ mutex_lock(&frame_allocator_mutex);
+
+ for (size_t i = 0; i < n; i++) {
+ uint32_t idx = INDEX_FROM_BIT(base + i);
+ uint32_t ofs = OFFSET_FROM_BIT(base + i);
+ if (frame_bitset[idx] & (0x1 << ofs)) {
+ frame_bitset[idx] &= ~(0x1 << ofs);
+ nused_frames--;
+ }
+ }
+ if (INDEX_FROM_BIT(base) < begin_search_at)
+ begin_search_at = INDEX_FROM_BIT(base);
+
+ mutex_unlock(&frame_allocator_mutex);
+}
+
+void dbg_print_frame_stats() {
+ dbg_printf("Used frames: %d/%d\n", nused_frames, nframes);
+}
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/core/gdt.c b/src/kernel/core/gdt.c
new file mode 100644
index 0000000..eadde5f
--- /dev/null
+++ b/src/kernel/core/gdt.c
@@ -0,0 +1,90 @@
+#include <gdt.h>
+#include <string.h>
+
+#define GDT_ENTRIES 6 // The contents of each entry is defined in gdt_init.
+
+/* One entry of the table */
+struct gdt_entry {
+ uint16_t limit_low;
+ uint16_t base_low;
+ uint8_t base_middle;
+ uint8_t access;
+ uint8_t granularity;
+ uint8_t base_high;
+} __attribute__((packed));
+typedef struct gdt_entry gdt_entry_t;
+
+/* Structure defining the whole table : address and size (in bytes). */
+struct gdt_ptr {
+ uint16_t limit;
+ uint32_t base;
+} __attribute__((packed));
+typedef struct gdt_ptr gdt_ptr_t;
+
+/* The TSS is used for hardware multitasking.
+ We don't use that, but we still need a TSS so that user mode process exceptions
+ can be handled correctly by the kernel. */
+struct tss_entry {
+ uint32_t prev_tss; // The previous TSS - if we used hardware task switching this would form a linked list.
+ uint32_t esp0; // The stack pointer to load when we change to kernel mode.
+ uint32_t ss0; // The stack segment to load when we change to kernel mode.
+ uint32_t esp1; // Unused...
+ uint32_t ss1;
+ uint32_t esp2;
+ uint32_t ss2;
+ uint32_t cr3;
+ uint32_t eip;
+ uint32_t eflags;
+ uint32_t eax;
+ uint32_t ecx;
+ uint32_t edx;
+ uint32_t ebx;
+ uint32_t esp;
+ uint32_t ebp;
+ uint32_t esi;
+ uint32_t edi;
+ uint32_t es; // The value to load into ES when we change to kernel mode.
+ uint32_t cs; // The value to load into CS when we change to kernel mode.
+ uint32_t ss; // The value to load into SS when we change to kernel mode.
+ uint32_t ds; // The value to load into DS when we change to kernel mode.
+ uint32_t fs; // The value to load into FS when we change to kernel mode.
+ uint32_t gs; // The value to load into GS when we change to kernel mode.
+ uint32_t ldt; // Unused...
+ uint16_t trap;
+ uint16_t iomap_base;
+} __attribute__((packed));
+typedef struct tss_entry tss_entry_t;
+
+// ========================= //
+// Actual definitions
+
+static gdt_entry_t gdt_entries[GDT_ENTRIES];
+static gdt_ptr_t gdt_ptr;
+
+/* For internal use only. Writes one entry of the GDT with given parameters. */
+static void gdt_set_gate(int num, uint32_t base, uint32_t limit, uint8_t access, uint8_t gran) {
+ gdt_entries[num].base_low = (base & 0xFFFF);
+ gdt_entries[num].base_middle = (base >> 16) & 0xFF;
+ gdt_entries[num].base_high = (base >> 24) & 0xFF;
+
+ gdt_entries[num].limit_low = (limit & 0xFFFF);
+ gdt_entries[num].granularity = (limit >> 16) & 0x0F;
+ gdt_entries[num].granularity |= gran & 0xF0;
+ gdt_entries[num].access = access;
+}
+
+/* Write data to the GDT and enable it. */
+void gdt_init() {
+ gdt_ptr.limit = (sizeof(gdt_entry_t) * GDT_ENTRIES) - 1;
+ gdt_ptr.base = (uint32_t)&gdt_entries;
+
+ gdt_set_gate(0, 0, 0, 0, 0); //Null segment
+ gdt_set_gate(1, 0, 0xFFFFFFFF, 0x9A, 0xCF); //Kernel code segment 0x08
+ gdt_set_gate(2, 0, 0xFFFFFFFF, 0x92, 0xCF); //Kernel data segment 0x10
+ gdt_set_gate(3, 0, 0xFFFFFFFF, 0xFA, 0xCF); //User code segment 0x18
+ gdt_set_gate(4, 0, 0xFFFFFFFF, 0xF2, 0xCF); //User data segment 0x20
+
+ asm volatile ("lgdt %0"::"m"(gdt_ptr):"memory");
+}
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/core/idt.c b/src/kernel/core/idt.c
new file mode 100644
index 0000000..2f244e3
--- /dev/null
+++ b/src/kernel/core/idt.c
@@ -0,0 +1,253 @@
+#include <idt.h>
+#include <gdt.h>
+#include <sys.h>
+#include <string.h>
+#include <dbglog.h>
+
+struct idt_entry {
+ uint16_t base_lo; //Low part of address to jump to
+ uint16_t sel; //Kernel segment selector
+ uint8_t always0;
+ uint8_t type_attr; //Type
+ uint16_t base_hi; //High part of address to jump to
+} __attribute__((packed));
+typedef struct idt_entry idt_entry_t;
+
+struct idt_ptr {
+ uint16_t limit;
+ uint32_t base;
+} __attribute__((packed));
+typedef struct idt_ptr idt_ptr_t;
+
+#define GATE_TYPE_INTERRUPT 14 // IF is cleared on interrupt
+#define GATE_TYPE_TRAP 15 // IF stays as is
+
+#define GATE_PRESENT (1<<7)
+#define GATE_DPL_SHIFT 5
+
+
+void isr0();
+void isr1();
+void isr2();
+void isr3();
+void isr4();
+void isr5();
+void isr6();
+void isr7();
+void isr8();
+void isr9();
+void isr10();
+void isr11();
+void isr12();
+void isr13();
+void isr14();
+void isr15();
+void isr16();
+void isr17();
+void isr18();
+void isr19();
+void isr20();
+void isr21();
+void isr22();
+void isr23();
+void isr24();
+void isr25();
+void isr26();
+void isr27();
+void isr28();
+void isr29();
+void isr30();
+void isr31();
+
+void irq0();
+void irq1();
+void irq2();
+void irq3();
+void irq4();
+void irq5();
+void irq6();
+void irq7();
+void irq8();
+void irq9();
+void irq10();
+void irq11();
+void irq12();
+void irq13();
+void irq14();
+void irq15();
+
+void syscall64();
+
+// ************************************************************
+// Handler code
+
+static idt_entry_t idt_entries[256];
+static idt_ptr_t idt_ptr;
+
+static isr_handler_t irq_handlers[16] = {0};
+static isr_handler_t ex_handlers[32] = {0};
+
+/* Called in interrupt.s when an exception fires (interrupt 0 to 31) */
+void idt_exHandler(registers_t *regs) {
+ if (ex_handlers[regs->int_no] != 0) {
+ ex_handlers[regs->int_no](regs);
+ } else {
+ //TODO: make sure all exceptions happenning in userspace do not cause kernel panic...
+ dbg_printf("Unhandled exception: %i\n", regs->int_no);
+ dbg_dump_registers(regs);
+ PANIC("Unhandled exception");
+ }
+}
+
+/* Called in interrupt.s when an IRQ fires (interrupt 32 to 47) */
+void idt_irqHandler(registers_t *regs) {
+ if (regs->err_code > 7) {
+ outb(0xA0, 0x20);
+ }
+ outb(0x20, 0x20);
+
+ dbg_printf("IRQ %i\n", regs->err_code);
+ if (irq_handlers[regs->err_code] != 0) {
+ irq_handlers[regs->err_code](regs);
+ }
+}
+
+/* Caled in interrupt.s when a syscall is called */
+void idt_syscallHandler(registers_t *regs) {
+ dbg_printf("Syscall %i\n", regs->int_no);
+ // do nothing, yet.
+}
+
+/* For internal use only. Sets up an entry of the IDT with given parameters. */
+static void idt_set_gate(uint8_t num, void (*fun)(), uint8_t type) {
+ uint32_t base = (uint32_t)fun;
+
+ idt_entries[num].base_lo = base & 0xFFFF;
+ idt_entries[num].base_hi = (base >> 16) & 0xFFFF;
+
+ idt_entries[num].sel = K_CODE_SEGMENT;
+ idt_entries[num].always0 = 0;
+ idt_entries[num].type_attr = GATE_PRESENT
+ | (3 << GATE_DPL_SHIFT) // accessible from user mode
+ | type;
+}
+
+static const struct {
+ uint8_t num;
+ void (*fun)();
+ uint8_t type;
+} gates[] = {
+ // Most processor exceptions are traps and handling them
+ // should be preemptible
+ { 0, isr0, GATE_TYPE_TRAP },
+ { 1, isr1, GATE_TYPE_TRAP },
+ { 2, isr2, GATE_TYPE_TRAP },
+ { 3, isr3, GATE_TYPE_TRAP },
+ { 4, isr4, GATE_TYPE_TRAP },
+ { 5, isr5, GATE_TYPE_TRAP },
+ { 6, isr6, GATE_TYPE_TRAP },
+ { 7, isr7, GATE_TYPE_TRAP },
+ { 8, isr8, GATE_TYPE_TRAP },
+ { 9, isr9, GATE_TYPE_TRAP },
+ { 10, isr10, GATE_TYPE_TRAP },
+ { 11, isr11, GATE_TYPE_TRAP },
+ { 12, isr12, GATE_TYPE_TRAP },
+ { 13, isr13, GATE_TYPE_TRAP },
+ { 14, isr14, GATE_TYPE_INTERRUPT }, // reenables interrupts later on
+ { 15, isr15, GATE_TYPE_TRAP },
+ { 16, isr16, GATE_TYPE_TRAP },
+ { 17, isr17, GATE_TYPE_TRAP },
+ { 18, isr18, GATE_TYPE_TRAP },
+ { 19, isr19, GATE_TYPE_TRAP },
+ { 20, isr20, GATE_TYPE_TRAP },
+ { 21, isr21, GATE_TYPE_TRAP },
+ { 22, isr22, GATE_TYPE_TRAP },
+ { 23, isr23, GATE_TYPE_TRAP },
+ { 24, isr24, GATE_TYPE_TRAP },
+ { 25, isr25, GATE_TYPE_TRAP },
+ { 26, isr26, GATE_TYPE_TRAP },
+ { 27, isr27, GATE_TYPE_TRAP },
+ { 28, isr28, GATE_TYPE_TRAP },
+ { 29, isr29, GATE_TYPE_TRAP },
+ { 30, isr30, GATE_TYPE_TRAP },
+ { 31, isr31, GATE_TYPE_TRAP },
+
+ // IRQs are not preemptible ; an IRQ handler should do the bare minimum
+ // (communication with the hardware), and then pass a message to a worker
+ // process in order to do further processing
+ { 32, irq0, GATE_TYPE_INTERRUPT },
+ { 33, irq1, GATE_TYPE_INTERRUPT },
+ { 34, irq2, GATE_TYPE_INTERRUPT },
+ { 35, irq3, GATE_TYPE_INTERRUPT },
+ { 36, irq4, GATE_TYPE_INTERRUPT },
+ { 37, irq5, GATE_TYPE_INTERRUPT },
+ { 38, irq6, GATE_TYPE_INTERRUPT },
+ { 39, irq7, GATE_TYPE_INTERRUPT },
+ { 40, irq8, GATE_TYPE_INTERRUPT },
+ { 41, irq9, GATE_TYPE_INTERRUPT },
+ { 42, irq10, GATE_TYPE_INTERRUPT },
+ { 43, irq11, GATE_TYPE_INTERRUPT },
+ { 44, irq12, GATE_TYPE_INTERRUPT },
+ { 45, irq13, GATE_TYPE_INTERRUPT },
+ { 46, irq14, GATE_TYPE_INTERRUPT },
+ { 47, irq15, GATE_TYPE_INTERRUPT },
+
+ // Of course, syscalls are preemptible
+ { 64, syscall64, GATE_TYPE_TRAP },
+
+ { 0, 0, 0 }
+};
+
+/* Remaps the IRQs. Sets up the IDT. */
+void idt_init() {
+ memset((uint8_t*)&idt_entries, 0, sizeof(idt_entry_t) * 256);
+
+ //Remap the IRQ table
+ outb(0x20, 0x11);
+ outb(0xA0, 0x11);
+ outb(0x21, 0x20);
+ outb(0xA1, 0x28);
+ outb(0x21, 0x04);
+ outb(0xA1, 0x02);
+ outb(0x21, 0x01);
+ outb(0xA1, 0x01);
+ outb(0x21, 0x0);
+ outb(0xA1, 0x0);
+
+ for (int i = 0; gates[i].type != 0; i++) {
+ idt_set_gate(gates[i].num, gates[i].fun, gates[i].type);
+ }
+
+ idt_ptr.limit = (sizeof(idt_entry_t) * 256) - 1;
+ idt_ptr.base = (uint32_t)&idt_entries;
+
+ asm volatile ("lidt %0"::"m"(idt_ptr):"memory");
+
+ // Some setup calls that come later on are not preemptible,
+ // so we wait until then to enable interrupts.
+}
+
+/* Sets up an IRQ handler for given IRQ. */
+void idt_set_irq_handler(int number, isr_handler_t func) {
+ if (number < 16 && number >= 0) {
+ irq_handlers[number] = func;
+ }
+}
+
+/* Sets up a handler for a processor exception */
+void idt_set_ex_handler(int number, isr_handler_t func) {
+ if (number >= 0 && number < 32) {
+ ex_handlers[number] = func;
+ }
+}
+
+void dbg_dump_registers(registers_t *regs) {
+ dbg_printf("/ Exception %i\n", regs->int_no);
+ dbg_printf("| EAX: 0x%p EBX: 0x%p ECX: 0x%p EDX: 0x%p\n", regs->eax, regs->ebx, regs->ecx, regs->edx);
+ dbg_printf("| EDI: 0x%p ESI: 0x%p ESP: 0x%p EBP: 0x%p\n", regs->edi, regs->esi, regs->esp, regs->ebp);
+ dbg_printf("| EIP: 0x%p CS : 0x%p DS : 0x%p SS : 0x%p\n", regs->eip, regs->cs, regs->ds, regs->ss);
+ dbg_printf("\\ EFl: 0x%p I# : 0x%p Err: 0x%p\n", regs->eflags, regs->int_no, regs->err_code);
+}
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
+
diff --git a/src/kernel/core/interrupt.s b/src/kernel/core/interrupt.s
new file mode 100644
index 0000000..d40fff0
--- /dev/null
+++ b/src/kernel/core/interrupt.s
@@ -0,0 +1,126 @@
+;************************************************************************************
+
+%macro COMMONSTUB 1
+[EXTERN idt_%1Handler]
+%1_common_stub:
+
+ pusha ; Pushes edi,esi,ebp,esp,ebx,edx,ecx,eax
+
+ mov ax, ds ; Lower 16-bits of eax = ds.
+ push eax ; save the data segment descriptor
+
+ mov ax, 0x10 ; load the kernel data segment descriptor
+ mov ds, ax
+ mov es, ax
+ mov fs, ax
+ mov gs, ax
+
+ ; pass the register data structure as a pointer to the function
+ ; (passing it directly results in GCC trashing the data when doing optimisations)
+ mov eax, esp
+ push eax
+ call idt_%1Handler
+ add esp, 4
+
+ pop eax ; reload the original data segment descriptor
+ mov ds, ax
+ mov es, ax
+ mov fs, ax
+ mov gs, ax
+
+ popa ; Pops edi,esi,ebp...
+ add esp, 8 ; Cleans up the pushed error code and pushed ISR number
+ iret
+%endmacro
+
+COMMONSTUB ex
+COMMONSTUB irq
+COMMONSTUB syscall
+
+;************************************************************************************
+
+%macro EX_NOERRCODE 1 ; define a macro, taking one parameter
+ [GLOBAL isr%1] ; %1 accesses the first parameter.
+ isr%1:
+ push byte 0
+ push byte %1
+ jmp ex_common_stub
+%endmacro
+
+%macro EX_ERRCODE 1
+ [GLOBAL isr%1]
+ isr%1:
+ push byte %1
+ jmp ex_common_stub
+%endmacro
+
+%macro IRQ 2
+ [GLOBAL irq%1]
+ irq%1:
+ push byte %1 ;push irq number
+ push byte %2 ;push int number
+ jmp irq_common_stub
+%endmacro
+
+%macro SYSCALL 1
+ [GLOBAL syscall%1]
+ syscall%1:
+ cli
+ push byte 0
+ push byte %1
+ jmp syscall_common_stub
+%endmacro
+
+EX_NOERRCODE 0
+EX_NOERRCODE 1
+EX_NOERRCODE 2
+EX_NOERRCODE 3
+EX_NOERRCODE 4
+EX_NOERRCODE 5
+EX_NOERRCODE 6
+EX_NOERRCODE 7
+EX_ERRCODE 8
+EX_NOERRCODE 9
+EX_ERRCODE 10
+EX_ERRCODE 11
+EX_ERRCODE 12
+EX_ERRCODE 13
+EX_ERRCODE 14
+EX_NOERRCODE 15
+EX_NOERRCODE 16
+EX_NOERRCODE 17
+EX_NOERRCODE 18
+EX_NOERRCODE 19
+EX_NOERRCODE 20
+EX_NOERRCODE 21
+EX_NOERRCODE 22
+EX_NOERRCODE 23
+EX_NOERRCODE 24
+EX_NOERRCODE 25
+EX_NOERRCODE 26
+EX_NOERRCODE 27
+EX_NOERRCODE 28
+EX_NOERRCODE 29
+EX_NOERRCODE 30
+EX_NOERRCODE 31
+
+IRQ 0, 32
+IRQ 1, 33
+IRQ 2, 34
+IRQ 3, 35
+IRQ 4, 36
+IRQ 5, 37
+IRQ 6, 38
+IRQ 7, 39
+IRQ 8, 40
+IRQ 9, 41
+IRQ 10, 42
+IRQ 11, 43
+IRQ 12, 44
+IRQ 13, 45
+IRQ 14, 46
+IRQ 15, 47
+
+SYSCALL 64
+
+; vim: set ts=4 sw=4 tw=0 noet :
diff --git a/src/kernel/core/kmain.c b/src/kernel/core/kmain.c
new file mode 100644
index 0000000..2169b7d
--- /dev/null
+++ b/src/kernel/core/kmain.c
@@ -0,0 +1,215 @@
+#include <multiboot.h>
+#include <config.h>
+#include <dbglog.h>
+#include <sys.h>
+#include <malloc.h>
+
+#include <gdt.h>
+#include <idt.h>
+#include <frame.h>
+#include <paging.h>
+#include <region.h>
+#include <kmalloc.h>
+
+#include <thread.h>
+
+#include <slab_alloc.h>
+#include <hashtbl.h>
+
+extern const void k_end_addr; // defined in linker script : 0xC0000000 plus kernel stuff
+
+void breakpoint_handler(registers_t *regs) {
+ dbg_printf("Breakpoint! (int3)\n");
+ BOCHS_BREAKPOINT;
+}
+
+void region_test1() {
+ void* p = region_alloc(0x1000, "Test region", 0);
+ dbg_printf("Allocated one-page region: 0x%p\n", p);
+ dbg_print_region_info();
+ void* q = region_alloc(0x1000, "Test region", 0);
+ dbg_printf("Allocated one-page region: 0x%p\n", q);
+ dbg_print_region_info();
+ void* r = region_alloc(0x2000, "Test region", 0);
+ dbg_printf("Allocated two-page region: 0x%p\n", r);
+ dbg_print_region_info();
+ void* s = region_alloc(0x10000, "Test region", 0);
+ dbg_printf("Allocated 16-page region: 0x%p\n", s);
+ dbg_print_region_info();
+ region_free(p);
+ dbg_printf("Freed region 0x%p\n", p);
+ dbg_print_region_info();
+ region_free(q);
+ dbg_printf("Freed region 0x%p\n", q);
+ dbg_print_region_info();
+ region_free(r);
+ dbg_printf("Freed region 0x%p\n", r);
+ dbg_print_region_info();
+ region_free(s);
+ dbg_printf("Freed region 0x%p\n", s);
+ dbg_print_region_info();
+}
+
+void region_test2() {
+ // allocate a big region and try to write into it
+ dbg_printf("Begin region test 2...");
+ const size_t n = 200;
+ void* p0 = region_alloc(n * PAGE_SIZE, "Test big region", default_allocator_pf_handler);
+ for (size_t i = 0; i < n; i++) {
+ uint32_t *x = (uint32_t*)(p0 + i * PAGE_SIZE);
+ x[0] = 12;
+ x[1] = (i * 20422) % 122;
+ }
+ // unmap memory
+ for (size_t i = 0; i < n; i++) {
+ void* p = p0 + i * PAGE_SIZE;
+ uint32_t *x = (uint32_t*)p;
+ ASSERT(x[1] == (i * 20422) % 122);
+
+ uint32_t f = pd_get_frame(p);
+ ASSERT(f != 0);
+ pd_unmap_page(p);
+ ASSERT(pd_get_frame(p) == 0);
+
+ frame_free(f, 1);
+ }
+ region_free(p0);
+ dbg_printf("OK\n");
+}
+
+void kmalloc_test(void* kernel_data_end) {
+ // Test kmalloc !
+ dbg_print_region_info();
+ dbg_printf("Begin kmalloc test...\n");
+ const int m = 200;
+ uint16_t** ptr = malloc(m * sizeof(uint32_t));
+ for (int i = 0; i < m; i++) {
+ size_t s = 1 << ((i * 7) % 11 + 2);
+ ptr[i] = (uint16_t*)malloc(s);
+ ASSERT((void*)ptr[i] >= kernel_data_end && (size_t)ptr[i] < 0xFFC00000);
+ *ptr[i] = ((i * 211) % 1024);
+ }
+ dbg_printf("Fully allocated.\n");
+ dbg_print_region_info();
+ for (int i = 0; i < m; i++) {
+ for (int j = i; j < m; j++) {
+ ASSERT(*ptr[j] == (j * 211) % 1024);
+ }
+ free(ptr[i]);
+ }
+ free(ptr);
+ dbg_printf("Kmalloc test OK.\n");
+ dbg_print_region_info();
+}
+
+void test_hashtbl_1() {
+ // hashtable test
+ hashtbl_t *ht = create_hashtbl(str_key_eq_fun, str_hash_fun, 0);
+ hashtbl_add(ht, "test1", "Hello, world [test1]");
+ hashtbl_add(ht, "test2", "Hello, world [test2]");
+ dbg_printf("ht[test1] = %s\n", hashtbl_find(ht, "test1"));
+ dbg_printf("ht[test] = %s\n", hashtbl_find(ht, "test"));
+ dbg_printf("ht[test2] = %s\n", hashtbl_find(ht, "test2"));
+ dbg_printf("adding test...\n");
+ hashtbl_add(ht, "test", "Forever alone");
+ dbg_printf("ht[test1] = %s\n", hashtbl_find(ht, "test1"));
+ dbg_printf("ht[test] = %s\n", hashtbl_find(ht, "test"));
+ dbg_printf("ht[test2] = %s\n", hashtbl_find(ht, "test2"));
+ dbg_printf("removing test1...\n");
+ hashtbl_remove(ht, "test1");
+ dbg_printf("ht[test1] = %s\n", hashtbl_find(ht, "test1"));
+ dbg_printf("ht[test] = %s\n", hashtbl_find(ht, "test"));
+ dbg_printf("ht[test2] = %s\n", hashtbl_find(ht, "test2"));
+ delete_hashtbl(ht);
+}
+
+void test_hashtbl_2() {
+ hashtbl_t *ht = create_hashtbl(id_key_eq_fun, id_hash_fun, 0);
+ hashtbl_add(ht, (void*)12, "Hello, world [12]");
+ hashtbl_add(ht, (void*)777, "Hello, world [777]");
+ dbg_printf("ht[12] = %s\n", hashtbl_find(ht, (void*)12));
+ dbg_printf("ht[144] = %s\n", hashtbl_find(ht, (void*)144));
+ dbg_printf("ht[777] = %s\n", hashtbl_find(ht, (void*)777));
+ dbg_printf("adding 144...\n");
+ hashtbl_add(ht, (void*)144, "Forever alone");
+ dbg_printf("ht[12] = %s\n", hashtbl_find(ht, (void*)12));
+ dbg_printf("ht[144] = %s\n", hashtbl_find(ht, (void*)144));
+ dbg_printf("ht[777] = %s\n", hashtbl_find(ht, (void*)777));
+ dbg_printf("removing 12...\n");
+ hashtbl_remove(ht, (void*)12);
+ dbg_printf("ht[12] = %s\n", hashtbl_find(ht, (void*)12));
+ dbg_printf("ht[144] = %s\n", hashtbl_find(ht, (void*)144));
+ dbg_printf("ht[777] = %s\n", hashtbl_find(ht, (void*)777));
+ delete_hashtbl(ht);
+}
+
+void test_thread(void* a) {
+ for(int i = 0; i < 120; i++) {
+ dbg_printf("b");
+ for (int x = 0; x < 100000; x++) asm volatile("xor %%ebx, %%ebx":::"%ebx");
+ if (i % 8 == 0) yield();
+ }
+}
+void kernel_init_stage2(void* data) {
+ dbg_print_region_info();
+ dbg_print_frame_stats();
+
+ test_hashtbl_1();
+ test_hashtbl_2();
+
+ thread_t *tb = new_thread(test_thread, 0);
+ resume_thread(tb, false);
+
+ for (int i = 0; i < 120; i++) {
+ dbg_printf("a");
+ for (int x = 0; x < 100000; x++) asm volatile("xor %%ebx, %%ebx":::"%ebx");
+ }
+ PANIC("Reached kmain end! Falling off the edge.");
+}
+
+void kmain(struct multiboot_info_t *mbd, int32_t mb_magic) {
+ dbglog_setup();
+
+ dbg_printf("Hello, kernel world!\n");
+ dbg_printf("This is %s, version %s.\n", OS_NAME, OS_VERSION);
+
+ ASSERT(mb_magic == MULTIBOOT_BOOTLOADER_MAGIC);
+
+ gdt_init(); dbg_printf("GDT set up.\n");
+
+ idt_init(); dbg_printf("IDT set up.\n");
+ idt_set_ex_handler(EX_BREAKPOINT, breakpoint_handler);
+ asm volatile("int $0x3"); // test breakpoint
+
+ size_t total_ram = ((mbd->mem_upper + mbd->mem_lower) * 1024);
+ dbg_printf("Total ram: %d Kb\n", total_ram / 1024);
+
+ // used for allocation of data structures before malloc is set up
+ // a pointer to this pointer is passed to the functions that might have
+ // to allocate memory ; they just increment it of the allocated quantity
+ void* kernel_data_end = (void*)&k_end_addr;
+
+ frame_init_allocator(total_ram, &kernel_data_end);
+ dbg_printf("kernel_data_end: 0x%p\n", kernel_data_end);
+ dbg_print_frame_stats();
+
+ paging_setup(kernel_data_end);
+ dbg_printf("Paging seems to be working!\n");
+
+ BOCHS_BREAKPOINT;
+
+ region_allocator_init(kernel_data_end);
+ region_test1();
+ region_test2();
+
+ kmalloc_setup();
+ kmalloc_test(kernel_data_end);
+
+ // enter multi-threading mode
+ // interrupts are enabled at this moment, so all
+ // code run from now on should be preemtible (ie thread-safe)
+ threading_setup(kernel_init_stage2, 0);
+ PANIC("Should never come here.");
+}
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/core/kmalloc.c b/src/kernel/core/kmalloc.c
new file mode 100644
index 0000000..e15572a
--- /dev/null
+++ b/src/kernel/core/kmalloc.c
@@ -0,0 +1,52 @@
+#include <kmalloc.h>
+
+#include <slab_alloc.h>
+#include <mutex.h>
+
+#include <frame.h>
+#include <paging.h>
+#include <region.h>
+
+static void* page_alloc_fun_for_kmalloc(size_t bytes) {
+ void* addr = region_alloc(bytes, "Core kernel heap", default_allocator_pf_handler);
+ return addr;
+}
+
+static slab_type_t slab_sizes[] = {
+ { "8B kmalloc objects", 8, 2 },
+ { "16B kmalloc objects", 16, 2 },
+ { "32B kmalloc objects", 32, 2 },
+ { "64B kmalloc objects", 64, 4 },
+ { "128B kmalloc objects", 128, 4 },
+ { "256B kmalloc objects", 256, 4 },
+ { "512B kmalloc objects", 512, 8 },
+ { "1KB kmalloc objects", 1024, 8 },
+ { "2KB kmalloc objects", 2048, 16 },
+ { "4KB kmalloc objects", 4096, 16 },
+ { 0, 0, 0 }
+};
+
+static mem_allocator_t *kernel_allocator = 0;
+STATIC_MUTEX(malloc_mutex);
+
+void kmalloc_setup() {
+ kernel_allocator =
+ create_slab_allocator(slab_sizes, page_alloc_fun_for_kmalloc,
+ region_free_unmap_free);
+}
+
+void* malloc(size_t sz) {
+ void* res = 0;
+
+ mutex_lock(&malloc_mutex);
+ res = slab_alloc(kernel_allocator, sz);
+ mutex_unlock(&malloc_mutex);
+
+ return res;
+}
+
+void free(void* ptr) {
+ mutex_lock(&malloc_mutex);
+ slab_free(kernel_allocator, ptr);
+ mutex_unlock(&malloc_mutex);
+}
diff --git a/src/kernel/core/loader.s b/src/kernel/core/loader.s
new file mode 100644
index 0000000..447d82d
--- /dev/null
+++ b/src/kernel/core/loader.s
@@ -0,0 +1,86 @@
+[EXTERN kmain] ; kmain is defined in kmain.c
+[GLOBAL loader] ; making entry point visible to linker
+[GLOBAL kernel_pd] ; make kernel page directory visible
+[GLOBAL kernel_stack_protector] ; used to detect kernel stack overflow
+[GLOBAL kernel_stack_top] ; stack re-used by scheduler
+
+; higher-half kernel setup
+K_HIGHHALF_ADDR equ 0xC0000000
+K_PAGE_NUMBER equ (K_HIGHHALF_ADDR >> 22)
+
+; loader stack size
+LOADER_STACK_SIZE equ 0x8000 ; 8Kb
+
+; setting up the Multiboot header - see GRUB docs for details
+MODULEALIGN equ 1<<0 ; align loaded modules on page boundaries
+MEMINFO equ 1<<1 ; provide memory map
+FLAGS equ MODULEALIGN | MEMINFO ; this is the Multiboot 'flag' field
+MAGIC equ 0x1BADB002 ; 'magic number' lets bootloader find the header
+CHECKSUM equ -(MAGIC + FLAGS) ; checksum required
+
+[section .setup]
+align 4
+multiboot_header:
+ dd MAGIC
+ dd FLAGS
+ dd CHECKSUM
+
+loader:
+ ; setup the boot page directory used for higher-half
+ mov ecx, kernel_pd
+ sub ecx, K_HIGHHALF_ADDR ; access its lower-half address
+ mov cr3, ecx
+
+ ; Set PSE bit in CR4 to enable 4MB pages.
+ mov ecx, cr4
+ or ecx, 0x00000010
+ mov cr4, ecx
+
+ ; Set PG bit in CR0 to enable paging.
+ mov ecx, cr0
+ or ecx, 0x80000000
+ mov cr0, ecx
+
+ ; long jump required
+ lea ecx, [higherhalf]
+ jmp ecx
+
+[section .data]
+align 0x1000
+kernel_pd:
+ ; uses 4MB pages
+ ; identity-maps the first 4Mb of RAM, and also maps them with offset += k_highhalf_addr
+ dd 0x00000083
+ times (K_PAGE_NUMBER - 1) dd 0
+ dd 0x00000083
+ times (1024 - K_PAGE_NUMBER - 1) dd 0
+
+[section .text]
+higherhalf: ; now we're running in higher half
+ ; unmap first 4Mb
+ mov dword [kernel_pd], 0
+ invlpg [0]
+
+ mov esp, kernel_stack_top ; set up the stack
+
+ push eax ; pass Multiboot magic number
+ add ebx, K_HIGHHALF_ADDR ; update the MB info structure so that it is in higher half
+ push ebx ; pass Multiboot info structure
+
+ call kmain ; call kernel proper
+
+hang:
+ ; halt machine should kernel return
+ cli
+ hlt
+ jmp hang
+
+[section .bss]
+align 0x1000
+kernel_stack_protector:
+ resb 0x1000 ; as soon as we have efficient paging, we WON'T map this page
+kernel_stack_bottom:
+ resb LOADER_STACK_SIZE
+kernel_stack_top:
+
+; vim: set ts=4 sw=4 tw=0 noet :
diff --git a/src/kernel/core/paging.c b/src/kernel/core/paging.c
new file mode 100644
index 0000000..e60ca53
--- /dev/null
+++ b/src/kernel/core/paging.c
@@ -0,0 +1,298 @@
+#include <paging.h>
+#include <frame.h>
+#include <idt.h>
+#include <dbglog.h>
+#include <region.h>
+#include <mutex.h>
+#include <thread.h>
+#include <malloc.h>
+
+#define PAGE_OF_ADDR(x) (((size_t)x >> PAGE_SHIFT) % N_PAGES_IN_PT)
+#define PT_OF_ADDR(x) ((size_t)x >> (PAGE_SHIFT + PT_SHIFT))
+
+#define PTE_PRESENT (1<<0)
+#define PTE_RW (1<<1)
+#define PTE_USER (1<<2)
+#define PTE_WRITE_THROUGH (1<<3)
+#define PTE_DISABLE_CACHE (1<<4)
+#define PTE_ACCESSED (1<<5)
+#define PTE_DIRTY (1<<6) // only PTE
+#define PTE_SIZE_4M (1<<7) // only PDE
+#define PTE_GLOBAL (1<<8) // only PTE
+#define PTE_FRAME_SHIFT 12
+
+typedef struct page_table {
+ uint32_t page[1024];
+} pagetable_t;
+
+struct page_directory {
+ uint32_t phys_addr; // physical address of page directory
+ // to modify a page directory, we first map it
+ // then we can use mirroring to edit it
+ // (the last 4M of the address space are mapped to the PD itself)
+
+ mutex_t mutex;
+};
+
+
+// access kernel page directory page defined in loader.s
+// (this is a correct higher-half address)
+extern pagetable_t kernel_pd;
+
+// pre-allocate a page table so that we can map the first 4M of kernel memory
+static pagetable_t __attribute__((aligned(PAGE_SIZE))) kernel_pt0;
+
+extern char kernel_stack_protector;
+
+static pagedir_t kernel_pd_d;
+
+#define current_pt ((pagetable_t*)PD_MIRROR_ADDR)
+#define current_pd ((pagetable_t*)(PD_MIRROR_ADDR + (N_PAGES_IN_PT-1)*PAGE_SIZE))
+
+void page_fault_handler(registers_t *regs) {
+ void* vaddr;
+ asm volatile("movl %%cr2, %0":"=r"(vaddr));
+
+ if ((size_t)vaddr >= K_HIGHHALF_ADDR) {
+ uint32_t pt = PT_OF_ADDR(vaddr);
+
+ if (current_pd != &kernel_pd && current_pd->page[pt] != kernel_pd.page[pt]) {
+ current_pd->page[pt] = kernel_pd.page[pt];
+ invlpg(&current_pt[pt]);
+ return;
+ }
+ if (regs->eflags & EFLAGS_IF) asm volatile("sti"); // from now on we are preemptible
+
+ if (vaddr >= (void*)&kernel_stack_protector && vaddr < (void*)&kernel_stack_protector + PAGE_SIZE) {
+ dbg_printf("Kernel stack overflow at 0x%p\n", vaddr);
+ PANIC("Kernel stack overflow.");
+ }
+
+ if ((size_t)vaddr >= PD_MIRROR_ADDR) {
+ dbg_printf("Fault on access to mirrorred PD at 0x%p\n", vaddr);
+ dbg_print_region_info();
+ PANIC("Unhandled kernel space page fault");
+ }
+
+ region_info_t *i = find_region(vaddr);
+ if (i == 0) {
+ dbg_printf("Kernel pagefault in non-existing region at 0x%p\n", vaddr);
+ dbg_dump_registers(regs);
+ PANIC("Unhandled kernel space page fault");
+ }
+ if (i->pf == 0) {
+ dbg_printf("Kernel pagefault in region with no handler at 0x%p\n", vaddr);
+ dbg_dump_registers(regs);
+ dbg_print_region_info();
+ PANIC("Unhandled kernel space page fault");
+ }
+ i->pf(get_current_pagedir(), i, vaddr);
+ } else {
+ if (regs->eflags & EFLAGS_IF) asm volatile("sti"); // userspace PF handlers should always be preemptible
+
+ dbg_printf("Userspace page fault at 0x%p\n", vaddr);
+ PANIC("Unhandled userspace page fault");
+ // not handled yet
+ // TODO
+ }
+}
+
+void paging_setup(void* kernel_data_end) {
+ size_t n_kernel_pages =
+ PAGE_ALIGN_UP((size_t)kernel_data_end - K_HIGHHALF_ADDR)/PAGE_SIZE;
+
+ ASSERT(n_kernel_pages <= 1024); // we use less than 4M for kernel
+
+ // setup kernel_pd_d structure
+ kernel_pd_d.phys_addr = (size_t)&kernel_pd - K_HIGHHALF_ADDR;
+ kernel_pd_d.mutex = MUTEX_UNLOCKED;
+
+ // setup kernel_pt0
+ ASSERT(PAGE_OF_ADDR(K_HIGHHALF_ADDR) == 0); // kernel is 4M-aligned
+ ASSERT(FIRST_KERNEL_PT == 768);
+ for (size_t i = 0; i < n_kernel_pages; i++) {
+ if ((i * PAGE_SIZE) + K_HIGHHALF_ADDR == (size_t)&kernel_stack_protector) {
+ kernel_pt0.page[i] = 0; // don't map kernel stack protector page
+ frame_free(i, 1);
+ } else {
+ kernel_pt0.page[i] = (i << PTE_FRAME_SHIFT) | PTE_PRESENT | PTE_RW | PTE_GLOBAL;
+ }
+ }
+ for (size_t i = n_kernel_pages; i < 1024; i++){
+ kernel_pt0.page[i] = 0;
+ }
+
+ // replace 4M mapping by kernel_pt0
+ kernel_pd.page[FIRST_KERNEL_PT] =
+ (((size_t)&kernel_pt0 - K_HIGHHALF_ADDR) & PAGE_MASK) | PTE_PRESENT | PTE_RW;
+ // set up mirroring
+ kernel_pd.page[N_PAGES_IN_PT-1] =
+ (((size_t)&kernel_pd - K_HIGHHALF_ADDR) & PAGE_MASK) | PTE_PRESENT | PTE_RW;
+
+ invlpg((void*)K_HIGHHALF_ADDR);
+
+ // paging already enabled in loader, nothing to do.
+
+ // disable 4M pages (remove PSE bit in CR4)
+ uint32_t cr4;
+ asm volatile("movl %%cr4, %0": "=r"(cr4));
+ cr4 &= ~0x00000010;
+ asm volatile("movl %0, %%cr4":: "r"(cr4));
+
+ idt_set_ex_handler(EX_PAGE_FAULT, page_fault_handler);
+}
+
+pagedir_t *get_current_pagedir() {
+ if (current_thread == 0) return &kernel_pd_d;
+ return current_thread->current_pd_d;
+}
+
+pagedir_t *get_kernel_pagedir() {
+ return &kernel_pd_d;
+}
+
+void switch_pagedir(pagedir_t *pd) {
+ asm volatile("movl %0, %%cr3":: "r"(pd->phys_addr));
+ if (current_thread != 0) current_thread->current_pd_d = pd;
+}
+
+// ============================== //
+// Mapping and unmapping of pages //
+// ============================== //
+
+uint32_t pd_get_frame(void* vaddr) {
+ uint32_t pt = PT_OF_ADDR(vaddr);
+ uint32_t page = PAGE_OF_ADDR(vaddr);
+
+ pagetable_t *pd = ((size_t)vaddr >= K_HIGHHALF_ADDR ? &kernel_pd : current_pd);
+
+ if (!pd->page[pt] & PTE_PRESENT) return 0;
+ if (!current_pt[pt].page[page] & PTE_PRESENT) return 0;
+ return current_pt[pt].page[page] >> PTE_FRAME_SHIFT;
+}
+
+int pd_map_page(void* vaddr, uint32_t frame_id, bool rw) {
+ uint32_t pt = PT_OF_ADDR(vaddr);
+ uint32_t page = PAGE_OF_ADDR(vaddr);
+
+ ASSERT((size_t)vaddr < PD_MIRROR_ADDR);
+
+ pagedir_t *pdd = ((size_t)vaddr >= K_HIGHHALF_ADDR || current_thread == 0
+ ? &kernel_pd_d : current_thread->current_pd_d);
+ pagetable_t *pd = ((size_t)vaddr >= K_HIGHHALF_ADDR ? &kernel_pd : current_pd);
+ mutex_lock(&pdd->mutex);
+
+ if (!pd->page[pt] & PTE_PRESENT) {
+ uint32_t new_pt_frame = frame_alloc(1);
+ if (new_pt_frame == 0) {
+ mutex_unlock(&pdd->mutex);
+ return 1; // OOM
+ }
+
+ current_pd->page[pt] = pd->page[pt] =
+ (new_pt_frame << PTE_FRAME_SHIFT) | PTE_PRESENT | PTE_RW;
+ invlpg(&current_pt[pt]);
+ }
+ current_pt[pt].page[page] =
+ (frame_id << PTE_FRAME_SHIFT)
+ | PTE_PRESENT
+ | ((size_t)vaddr < K_HIGHHALF_ADDR ? PTE_USER : PTE_GLOBAL)
+ | (rw ? PTE_RW : 0);
+ invlpg(vaddr);
+
+ mutex_unlock(&pdd->mutex);
+ return 0;
+}
+
+void pd_unmap_page(void* vaddr) {
+ uint32_t pt = PT_OF_ADDR(vaddr);
+ uint32_t page = PAGE_OF_ADDR(vaddr);
+
+ pagetable_t *pd = ((size_t)vaddr >= K_HIGHHALF_ADDR ? &kernel_pd : current_pd);
+ // no need to lock the PD's mutex
+
+ if (!pd->page[pt] & PTE_PRESENT) return;
+ if (!current_pt[pt].page[page] & PTE_PRESENT) return;
+
+ current_pt[pt].page[page] = 0;
+ invlpg(vaddr);
+
+ // If the page table is completely empty we might want to free
+ // it, but we would actually lose a lot of time checking if
+ // the PT is really empty (since we don't store the
+ // number of used pages in each PT), so it's probably not worth it
+}
+
+// Creation and deletion of page directories
+
+pagedir_t *create_pagedir() {
+ uint32_t pd_phys = 0;
+ pagedir_t *pd = 0;
+ void* temp = 0;
+
+ pd_phys = frame_alloc(1);
+ if (pd_phys == 0) goto error;
+
+ pd = (pagedir_t*)malloc(sizeof(pagedir_t));
+ if (pd == 0) goto error;
+
+ temp = region_alloc(PAGE_SIZE, 0, 0);
+ if (temp == 0) goto error;
+
+ int error = pd_map_page(temp, pd_phys, true);
+ if (error) goto error;
+
+ pd->phys_addr = pd_phys * PAGE_SIZE;
+ pd->mutex = MUTEX_UNLOCKED;
+
+ // initialize PD with zeroes
+ pagetable_t *pt = (pagetable_t*)temp;
+ for (size_t i = 0; i < N_PAGES_IN_PT; i++) {
+ pt->page[i] = 0;
+ }
+ // use kernel page tables
+ for(size_t i = FIRST_KERNEL_PT; i < N_PAGES_IN_PT-1; i++) {
+ pt->page[i] = kernel_pd.page[i];
+ }
+ // set up mirroring
+ pt->page[N_PAGES_IN_PT-1] = pd->phys_addr | PTE_PRESENT | PTE_RW;
+
+ region_free_unmap(temp);
+
+ return pd;
+
+ error:
+ if (pd_phys != 0) frame_free(pd_phys, 1);
+ if (pd != 0) free(pd);
+ if (temp != 0) region_free(temp);
+ return 0;
+}
+
+void delete_pagedir(pagedir_t *pd) {
+ pagedir_t *restore_pd = get_current_pagedir();
+ if (restore_pd == pd) restore_pd = &kernel_pd_d;
+
+ // make a copy of page directory on the stack
+ switch_pagedir(pd);
+ pagetable_t backup;
+ for (size_t i = 0; i < N_PAGES_IN_PT; i++) {
+ backup.page[i] = current_pd->page[i];
+ }
+ switch_pagedir(restore_pd);
+
+ // free the page tables
+ for (size_t i = 0; i < FIRST_KERNEL_PT; i++) {
+ if (backup.page[i] & PTE_PRESENT)
+ frame_free(backup.page[i] >> PTE_FRAME_SHIFT, 1);
+ }
+ // free the page directory page
+ uint32_t pd_phys = pd->phys_addr / PAGE_SIZE;
+ ASSERT(pd_phys == (backup.page[N_PAGES_IN_PT-1] >> PTE_FRAME_SHIFT));
+ frame_free(pd_phys, 1);
+ // free the pagedir_t structure
+ free(pd);
+
+ return;
+}
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/core/region.c b/src/kernel/core/region.c
new file mode 100644
index 0000000..3127048
--- /dev/null
+++ b/src/kernel/core/region.c
@@ -0,0 +1,397 @@
+#include <region.h>
+#include <dbglog.h>
+#include <frame.h>
+#include <mutex.h>
+
+typedef union region_descriptor {
+ struct {
+ union region_descriptor *next;
+ } unused_descriptor;
+ struct {
+ void* addr;
+ size_t size;
+ union region_descriptor *next_by_size, *first_bigger;
+ union region_descriptor *next_by_addr;
+ } free;
+ struct {
+ region_info_t i;
+ union region_descriptor *next_by_addr;
+ } used;
+} descriptor_t;
+
+#define N_RESERVE_DESCRIPTORS 2 // always keep at least 2 unused descriptors
+
+#define N_BASE_DESCRIPTORS 12 // pre-allocate memory for 12 descriptors
+static descriptor_t base_descriptors[N_BASE_DESCRIPTORS];
+
+static descriptor_t *first_unused_descriptor;
+uint32_t n_unused_descriptors;
+static descriptor_t *first_free_region_by_addr, *first_free_region_by_size;
+static descriptor_t *first_used_region;
+
+STATIC_MUTEX(ra_mutex); // region allocator mutex
+
+// ========================================================= //
+// HELPER FUNCTIONS FOR THE MANIPULATION OF THE REGION LISTS //
+// ========================================================= //
+
+static void add_unused_descriptor(descriptor_t *d) {
+ n_unused_descriptors++;
+ d->unused_descriptor.next = first_unused_descriptor;
+ first_unused_descriptor = d;
+}
+
+static descriptor_t *get_unused_descriptor() {
+ descriptor_t *r = first_unused_descriptor;
+ if (r != 0) {
+ first_unused_descriptor = r->unused_descriptor.next;
+ n_unused_descriptors--;
+ }
+ return r;
+}
+
+static void remove_free_region(descriptor_t *d) {
+ if (first_free_region_by_size == d) {
+ first_free_region_by_size = d->free.next_by_size;
+ } else {
+ for (descriptor_t *i = first_free_region_by_size; i != 0; i = i->free.next_by_size) {
+ if (i->free.next_by_size == d) {
+ i->free.next_by_size = d->free.next_by_size;
+ break;
+ }
+ }
+ }
+ if (first_free_region_by_addr == d) {
+ first_free_region_by_addr = d->free.next_by_addr;
+ } else {
+ for (descriptor_t *i = first_free_region_by_addr; i != 0; i = i->free.next_by_addr) {
+ if (i->free.next_by_addr == d) {
+ i->free.next_by_addr = d->free.next_by_addr;
+ break;
+ }
+ }
+ }
+}
+
+static void add_free_region(descriptor_t *d) {
+ /*dbg_printf("Add free region 0x%p - 0x%p\n", d->free.addr, d->free.size + d->free.addr);*/
+ // Find position of region in address-ordered list
+ // Possibly concatenate free region
+ descriptor_t *i = first_free_region_by_addr;
+ if (i == 0) {
+ ASSERT(first_free_region_by_size == 0);
+ first_free_region_by_addr = first_free_region_by_size = d;
+ d->free.next_by_size = d->free.first_bigger = d->free.next_by_addr = 0;
+ return;
+ } else if (d->free.addr + d->free.size == i->free.addr) {
+ // concatenate d . i
+ remove_free_region(i);
+ d->free.size += i->free.size;
+ add_unused_descriptor(i);
+ add_free_region(d);
+ return;
+ } else if (i->free.addr > d->free.addr) {
+ // insert before i
+ d->free.next_by_addr = i;
+ first_free_region_by_addr = d;
+ } else {
+ while (i != 0) {
+ ASSERT(d->free.addr > i->free.addr);
+ if (i->free.addr + i->free.size == d->free.addr) {
+ // concatenate i . d
+ remove_free_region(i);
+ i->free.size += d->free.size;
+ add_unused_descriptor(d);
+ add_free_region(i);
+ return;
+ } else if (i->free.next_by_addr == 0 || i->free.next_by_addr->free.addr > d->free.addr) {
+ d->free.next_by_addr = i->free.next_by_addr;
+ i->free.next_by_addr = d;
+ break;
+ } else if (d->free.addr + d->free.size == i->free.next_by_addr->free.addr) {
+ // concatenate d . i->next_by_addr
+ descriptor_t *j = i->free.next_by_addr;
+ remove_free_region(j);
+ d->free.size += j->free.size;
+ add_unused_descriptor(j);
+ add_free_region(d);
+ return;
+ } else {
+ // continue
+ i = i->free.next_by_addr;
+ }
+ }
+ }
+ // Now add it in size-ordered list
+ i = first_free_region_by_size;
+ ASSERT(i != 0);
+ if (d->free.size <= i->free.size) {
+ d->free.next_by_size = i;
+ d->free.first_bigger = (i->free.size > d->free.size ? i : i->free.first_bigger);
+ first_free_region_by_size = d;
+ } else {
+ while (i != 0) {
+ ASSERT(d->free.size > i->free.size);
+ if (i->free.next_by_size == 0) {
+ d->free.next_by_size = 0;
+ d->free.first_bigger = 0;
+ i->free.next_by_size = d;
+ if (d->free.size > i->free.size) i->free.first_bigger = d;
+ break;
+ } else if (i->free.next_by_size->free.size >= d->free.size) {
+ d->free.next_by_size = i->free.next_by_size;
+ d->free.first_bigger =
+ (i->free.next_by_size->free.size > d->free.size
+ ? i->free.next_by_size
+ : i->free.next_by_size->free.first_bigger);
+ i->free.next_by_size = d;
+ if (d->free.size > i->free.size) i->free.first_bigger = d;
+ break;
+ } else {
+ // continue
+ i = i->free.next_by_size;
+ }
+ }
+ }
+}
+
+static descriptor_t *find_used_region(void* addr) {
+ for (descriptor_t *i = first_used_region; i != 0; i = i->used.next_by_addr) {
+ if (addr >= i->used.i.addr && addr < i->used.i.addr + i->used.i.size) return i;
+ if (i->used.i.addr > addr) break;
+ }
+ return 0;
+}
+
+static void add_used_region(descriptor_t *d) {
+ descriptor_t *i = first_used_region;
+ ASSERT(i->used.i.addr < d->used.i.addr); // first region by address is never free
+
+ while (i != 0) {
+ ASSERT(i->used.i.addr < d->used.i.addr);
+ if (i->used.next_by_addr == 0 || i->used.next_by_addr->used.i.addr > d->used.i.addr) {
+ d->used.next_by_addr = i->used.next_by_addr;
+ i->used.next_by_addr = d;
+ return;
+ } else {
+ i = i->used.next_by_addr;
+ }
+ }
+ ASSERT(false);
+}
+
+static void remove_used_region(descriptor_t *d) {
+ if (first_used_region == d) {
+ first_used_region = d->used.next_by_addr;
+ } else {
+ for (descriptor_t *i = first_used_region; i != 0; i = i->used.next_by_addr) {
+ if (i->used.i.addr > d->used.i.addr) break;
+ if (i->used.next_by_addr == d) {
+ i->used.next_by_addr = d->used.next_by_addr;
+ break;
+ }
+ }
+ }
+}
+
+// =============== //
+// THE ACTUAL CODE //
+// =============== //
+
+void region_allocator_init(void* kernel_data_end) {
+ n_unused_descriptors = 0;
+ first_unused_descriptor = 0;
+ for (int i = 0; i < N_BASE_DESCRIPTORS; i++) {
+ add_unused_descriptor(&base_descriptors[i]);
+ }
+
+ descriptor_t *f0 = get_unused_descriptor();
+ f0->free.addr = (void*)PAGE_ALIGN_UP(kernel_data_end);
+ f0->free.size = ((void*)LAST_KERNEL_ADDR - f0->free.addr);
+ f0->free.next_by_size = 0;
+ f0->free.first_bigger = 0;
+ first_free_region_by_size = first_free_region_by_addr = f0;
+
+ descriptor_t *u0 = get_unused_descriptor();
+ u0->used.i.addr = (void*)K_HIGHHALF_ADDR;
+ u0->used.i.size = PAGE_ALIGN_UP(kernel_data_end) - K_HIGHHALF_ADDR;
+ u0->used.i.type = "Kernel code & data";
+ u0->used.i.pf = 0;
+ u0->used.next_by_addr = 0;
+ first_used_region = u0;
+}
+
+static void region_free_inner(void* addr) {
+ descriptor_t *d = find_used_region(addr);
+ if (d == 0) return;
+
+ region_info_t i = d->used.i;
+
+ remove_used_region(d);
+ d->free.addr = i.addr;
+ d->free.size = i.size;
+ add_free_region(d);
+}
+void region_free(void* addr) {
+ mutex_lock(&ra_mutex);
+ region_free_inner(addr);
+ mutex_unlock(&ra_mutex);
+}
+
+static void* region_alloc_inner(size_t size, char* type, page_fault_handler_t pf, bool use_reserve) {
+ size = PAGE_ALIGN_UP(size);
+
+ for (descriptor_t *i = first_free_region_by_size; i != 0; i = i->free.first_bigger) {
+ if (i->free.size >= size) {
+ // region i is the one we want to allocate in
+ descriptor_t *x = 0;
+ if (i->free.size > size) {
+ if (n_unused_descriptors <= N_RESERVE_DESCRIPTORS && !use_reserve) {
+ return 0;
+ }
+
+ // this assert basically means that the allocation function
+ // is called less than N_RESERVE_DESCRIPTORS times with
+ // the use_reserve flag before more descriptors
+ // are allocated.
+ x = get_unused_descriptor();
+ ASSERT(x != 0);
+
+ x->free.size = i->free.size - size;
+ if (size >= 0x4000) {
+ x->free.addr = i->free.addr + size;
+ } else {
+ x->free.addr = i->free.addr;
+ i->free.addr += x->free.size;
+ }
+ }
+ // do the allocation
+ remove_free_region(i);
+ if (x != 0) add_free_region(x);
+
+ void* addr = i->free.addr;
+ i->used.i.addr = addr;
+ i->used.i.size = size;
+ i->used.i.type = type;
+ i->used.i.pf = pf;
+ add_used_region(i);
+
+ return addr;
+ }
+ }
+ return 0; //No big enough block found
+}
+
+void* region_alloc(size_t size, char* type, page_fault_handler_t pf) {
+ void* result = 0;
+ mutex_lock(&ra_mutex);
+
+ if (n_unused_descriptors <= N_RESERVE_DESCRIPTORS) {
+ uint32_t frame = frame_alloc(1);
+ if (frame == 0) goto try_anyway;
+
+ void* descriptor_region = region_alloc_inner(PAGE_SIZE, "Region descriptors", 0, true);
+ ASSERT(descriptor_region != 0);
+
+ int error = pd_map_page(descriptor_region, frame, 1);
+ if (error) {
+ // this can happen if we weren't able to allocate a frame for
+ // a new pagetable
+ frame_free(frame, 1);
+ region_free_inner(descriptor_region);
+ goto try_anyway;
+ }
+
+ for (descriptor_t *d = (descriptor_t*)descriptor_region;
+ (void*)(d+1) <= (descriptor_region + PAGE_SIZE);
+ d++) {
+ add_unused_descriptor(d);
+ }
+ }
+ try_anyway:
+ // even if we don't have enough unused descriptors, we might find
+ // a free region that has exactly the right size and therefore
+ // does not require splitting, so we try the allocation in all cases
+ result = region_alloc_inner(size, type, pf, false);
+
+ mutex_unlock(&ra_mutex);
+ return result;
+}
+
+region_info_t *find_region(void* addr) {
+ region_info_t *r = 0;
+ mutex_lock(&ra_mutex);
+
+ descriptor_t *d = find_used_region(addr);
+ if (d != 0) r = &d->used.i;
+
+ mutex_unlock(&ra_mutex);
+ return r;
+}
+
+// ========================================================= //
+// HELPER FUNCTIONS : SIMPLE PF HANDLERS ; FREEING FUNCTIONS //
+// ========================================================= //
+
+void default_allocator_pf_handler(pagedir_t *pd, struct region_info *r, void* addr) {
+ ASSERT(pd_get_frame(addr) == 0); // if error is of another type (RO, protected), we don't do anyting
+
+ uint32_t f = frame_alloc(1);
+ if (f == 0) PANIC("Out Of Memory");
+
+ int error = pd_map_page(addr, f, 1);
+ if (error) PANIC("Could not map frame (OOM)");
+}
+
+void region_free_unmap_free(void* ptr) {
+ region_info_t *i = find_region(ptr);
+ ASSERT(i != 0);
+
+ for (void* x = i->addr; x < i->addr + i->size; x += PAGE_SIZE) {
+ uint32_t f = pd_get_frame(x);
+ if (f != 0) {
+ pd_unmap_page(x);
+ frame_free(f, 1);
+ }
+ }
+ region_free(ptr);
+}
+
+void region_free_unmap(void* ptr) {
+ region_info_t *i = find_region(ptr);
+ ASSERT(i != 0);
+
+ for (void* x = i->addr; x < i->addr + i->size; x += PAGE_SIZE) {
+ pd_unmap_page(x);
+ }
+ region_free(ptr);
+}
+
+// =========================== //
+// DEBUG LOG PRINTING FUNCTION //
+// =========================== //
+
+void dbg_print_region_info() {
+ mutex_lock(&ra_mutex);
+
+ dbg_printf("/ Free kernel regions, by address:\n");
+ for (descriptor_t *d = first_free_region_by_addr; d != 0; d = d->free.next_by_addr) {
+ dbg_printf("| 0x%p - 0x%p\n", d->free.addr, d->free.addr + d->free.size);
+ ASSERT(d != d->free.next_by_addr);
+ }
+ dbg_printf("- Free kernel regions, by size:\n");
+ for (descriptor_t *d = first_free_region_by_size; d != 0; d = d->free.next_by_size) {
+ dbg_printf("| 0x%p - 0x%p\n", d->free.addr, d->free.addr + d->free.size);
+ ASSERT(d != d->free.next_by_size);
+ }
+ dbg_printf("- Used kernel regions:\n");
+ for (descriptor_t *d = first_used_region; d != 0; d = d->used.next_by_addr) {
+ dbg_printf("| 0x%p - 0x%p %s\n", d->used.i.addr, d->used.i.addr + d->used.i.size, d->used.i.type);
+ ASSERT(d != d->used.next_by_addr);
+ }
+ dbg_printf("\\\n");
+
+ mutex_unlock(&ra_mutex);
+}
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/core/sys.c b/src/kernel/core/sys.c
new file mode 100644
index 0000000..2b77463
--- /dev/null
+++ b/src/kernel/core/sys.c
@@ -0,0 +1,25 @@
+#include <sys.h>
+#include <dbglog.h>
+
+
+// Kernel panic and kernel assert failure
+
+static void panic_do(const char* type, const char *msg, const char* file, int line) {
+ asm volatile("cli;");
+ dbg_printf("/\n| %s:\t%s\n", type, msg);
+ dbg_printf("| File: \t%s:%i\n", file, line);
+ dbg_printf("| System halted -_-'\n");
+ dbg_printf("\\---------------------------------------------------------/");
+ BOCHS_BREAKPOINT;
+ asm volatile("hlt");
+}
+
+void panic(const char* message, const char* file, int line) {
+ panic_do("PANIC", message, file, line);
+}
+
+void panic_assert(const char* assertion, const char* file, int line) {
+ panic_do("ASSERT FAILED", assertion, file, line);
+}
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/core/thread.c b/src/kernel/core/thread.c
new file mode 100644
index 0000000..7f0bb5b
--- /dev/null
+++ b/src/kernel/core/thread.c
@@ -0,0 +1,208 @@
+#include <thread.h>
+#include <malloc.h>
+#include <dbglog.h>
+#include <idt.h>
+
+#include <frame.h>
+#include <paging.h>
+
+void save_context_and_enter_scheduler(saved_context_t *ctx);
+void irq0_save_context_and_enter_scheduler(saved_context_t *ctx);
+void resume_context(saved_context_t *ctx);
+
+thread_t *current_thread = 0;
+
+// ====================== //
+// THE PROGRAMMABLE TIMER //
+// ====================== //
+
+void set_pit_frequency(uint32_t freq) {
+ uint32_t divisor = 1193180 / freq;
+ ASSERT(divisor < 65536); // must fit on 16 bits
+
+ uint8_t l = (divisor & 0xFF);
+ uint8_t h = ((divisor >> 8) & 0xFF);
+
+ outb(0x43, 0x36);
+ outb(0x40, l);
+ outb(0x40, h);
+}
+
+// ============================= //
+// HELPER : IF FLAG MANIPULATION //
+// ============================= //
+
+static inline bool disable_interrupts() {
+ uint32_t eflags;
+ asm volatile("pushf; pop %0" : "=r"(eflags));
+ asm volatile("cli");
+ return (eflags & EFLAGS_IF) != 0;
+}
+
+static inline void resume_interrupts(bool st) {
+ if (st) asm volatile("sti");
+}
+
+// ================== //
+// THE TASK SCHEDULER //
+// ================== //
+
+static thread_t *queue_first_thread = 0, *queue_last_thread = 0;
+
+void enqueue_thread(thread_t *t, bool just_ran) {
+ ASSERT(t->state == T_STATE_RUNNING);
+ if (queue_first_thread == 0) {
+ queue_first_thread = queue_last_thread = t;
+ t->next_in_queue = 0;
+ } else if (just_ran) {
+ t->next_in_queue = 0;
+ queue_last_thread->next_in_queue = t;
+ queue_last_thread = t;
+ } else {
+ t->next_in_queue = queue_first_thread;
+ queue_first_thread = t;
+ }
+}
+
+thread_t* dequeue_thread() {
+ thread_t *t = queue_first_thread;
+ if (t == 0) return 0;
+
+ queue_first_thread = t->next_in_queue;
+ if (queue_first_thread == 0) queue_last_thread = 0;
+
+ return t;
+}
+
+// ================ //
+// THE TASKING CODE //
+// ================ //
+
+void run_scheduler() {
+ // At this point, interrupts are disabled
+ // This function is expected NEVER TO RETURN
+
+ if (current_thread != 0 && current_thread->state == T_STATE_RUNNING) {
+ enqueue_thread(current_thread, true);
+ }
+
+ current_thread = dequeue_thread();
+ if (current_thread != 0) {
+ resume_context(&current_thread->ctx);
+ } else {
+ // Wait for an IRQ
+ asm volatile("sti; hlt");
+ // At this point an IRQ has happenned
+ // and has been processed. Loop around.
+ run_scheduler();
+ ASSERT(false);
+ }
+}
+
+static void run_thread(void (*entry)(void*), void* data) {
+ ASSERT(current_thread->state == T_STATE_RUNNING);
+
+ switch_pagedir(get_kernel_pagedir());
+
+ asm volatile("sti");
+ entry(data);
+
+ current_thread->state = T_STATE_FINISHED;
+ // TODO : add job for deleting the thread, or whatever
+ yield(); // expected never to return!
+ ASSERT(false);
+}
+thread_t *new_thread(entry_t entry, void* data) {
+ thread_t *t = (thread_t*)malloc(sizeof(thread_t));
+ if (t == 0) return 0;
+
+ void* stack = region_alloc(KPROC_STACK_SIZE, "Stack", 0);
+ if (stack == 0) {
+ free(t);
+ return 0;
+ }
+
+ for (void* i = stack + PAGE_SIZE; i < stack + KPROC_STACK_SIZE; i += PAGE_SIZE) {
+ uint32_t f = frame_alloc(1);
+ if (f == 0) {
+ region_free_unmap_free(stack);
+ free(t);
+ return 0;
+ }
+ pd_map_page(i, f, true);
+ }
+
+ t->stack_region = find_region(stack);
+
+ t->ctx.esp = (uint32_t*)(t->stack_region->addr + t->stack_region->size);
+ *(--t->ctx.esp) = (uint32_t)data; // push second argument : data
+ *(--t->ctx.esp) = (uint32_t)entry; // push first argument : entry point
+ *(--t->ctx.esp) = 0; // push invalid return address (the run_thread function never returns)
+
+ t->ctx.eip = (void(*)())run_thread;
+ t->state = T_STATE_PAUSED;
+
+ t->current_pd_d = get_kernel_pagedir();
+
+ t->proc = 0; // used by L1 functions
+
+ return t;
+}
+
+// ========== //
+// SETUP CODE //
+// ========== //
+
+static void irq0_handler(registers_t *regs) {
+ if (current_thread != 0)
+ irq0_save_context_and_enter_scheduler(&current_thread->ctx);
+}
+void threading_setup(entry_t cont, void* arg) {
+ set_pit_frequency(TASK_SWITCH_FREQUENCY);
+ idt_set_irq_handler(IRQ0, irq0_handler);
+
+ thread_t *t = new_thread(cont, arg);
+ ASSERT(t != 0);
+
+ resume_thread(t, false);
+
+ run_scheduler(); // never returns
+ ASSERT(false);
+}
+
+// ======================= //
+// TASK STATE MANIPULATION //
+// ======================= //
+
+void yield() {
+ if (current_thread == 0) {
+ // might happen before threading is initialized
+ // (but should not...)
+ dbg_printf("Warning: probable deadlock.\n");
+ } else {
+ save_context_and_enter_scheduler(&current_thread->ctx);
+ }
+}
+
+void pause() {
+ bool st = disable_interrupts();
+
+ current_thread->state = T_STATE_PAUSED;
+ save_context_and_enter_scheduler(&current_thread->ctx);
+
+ resume_interrupts(st);
+}
+
+void resume_thread(thread_t *thread, bool run_at_once) {
+ bool st = disable_interrupts();
+
+ if (thread->state == T_STATE_PAUSED) {
+ thread->state = T_STATE_RUNNING;
+ enqueue_thread(thread, false);
+ }
+ if (run_at_once) yield();
+
+ resume_interrupts(st);
+}
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/include/config.h b/src/kernel/include/config.h
new file mode 120000
index 0000000..93307d9
--- /dev/null
+++ b/src/kernel/include/config.h
@@ -0,0 +1 @@
+../config.h \ No newline at end of file
diff --git a/src/kernel/include/dbglog.h b/src/kernel/include/dbglog.h
new file mode 100644
index 0000000..8bf6962
--- /dev/null
+++ b/src/kernel/include/dbglog.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#include <config.h>
+#include <debug.h>
+
+void dbglog_setup();
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/include/frame.h b/src/kernel/include/frame.h
new file mode 100644
index 0000000..9ffafb3
--- /dev/null
+++ b/src/kernel/include/frame.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <sys.h>
+
+// frame.h : physical memory allocator
+
+void frame_init_allocator(size_t total_ram, void** kernel_data_end);
+
+uint32_t frame_alloc(size_t n); // allocate n consecutive frames (returns 0 on failure)
+void frame_free(uint32_t base, size_t n);
+
+void dbg_print_frame_stats();
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/include/gdt.h b/src/kernel/include/gdt.h
new file mode 100644
index 0000000..a62d0db
--- /dev/null
+++ b/src/kernel/include/gdt.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+/* The GDT is one of the x86's descriptor tables. It is used for memory segmentation.
+ Here, we don't use segmentation to separate processes from one another (this is done with paging).
+ We only use segmentation to make the difference between kernel mode (ring 3) and user mode (ring 0) */
+
+void gdt_init();
+
+#define K_CODE_SEGMENT 0x08
+#define K_DATA_SEGMENT 0x10
+#define U_CODE_SEGMENT 0x18
+#define U_DATA_SEGMENT 0x20
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/include/idt.h b/src/kernel/include/idt.h
new file mode 100644
index 0000000..8e84cea
--- /dev/null
+++ b/src/kernel/include/idt.h
@@ -0,0 +1,79 @@
+#pragma once
+
+/* The IDT is the system descriptor table that tells the CPU what to do when an interrupt fires.
+ There are three categories of interrupts :
+ - Exceptions ; eg page fault, divide by 0
+ - IRQ : interrupts caused by hardware
+ - System calls : when an applications asks the system to do something */
+
+#include <config.h>
+
+#define IRQ0 0
+#define IRQ1 1
+#define IRQ2 2
+#define IRQ3 3
+#define IRQ4 4
+#define IRQ5 5
+#define IRQ6 6
+#define IRQ7 7
+#define IRQ8 8
+#define IRQ9 9
+#define IRQ10 10
+#define IRQ11 11
+#define IRQ12 12
+#define IRQ13 13
+#define IRQ14 14
+#define IRQ15 15
+
+#define EX_DIVIDE_ERROR 0 // No error code
+#define EX_DEBUG 1 // No error code
+#define EX_NMI_INTERRUPT 2 // No error code
+#define EX_BREAKPOINT 3 // No error code
+#define EX_OVERFLOW 4 // No error code
+#define EX_BOUND_RANGE_EXCEDEED 5 // No error code
+#define EX_INVALID_OPCODE 6 // No error code
+#define EX_DEVICE_NOT_AVAILABLE 7 // No error code
+#define EX_DOUBLE_FAULT 8 // Yes (Zero)
+#define EX_COPROCESSOR_SEGMENT_OVERRUN 9 // No error code
+#define EX_INVALID_TSS 10 // Yes
+#define EX_SEGMENT_NOT_PRESENT 11 // Yes
+#define EX_STACK_SEGMENT_FAULT 12 // Yes
+#define EX_GENERAL_PROTECTION 13 // Yes
+#define EX_PAGE_FAULT 14 // Yes
+#define EX_INTEL_RESERVED_1 15 // No
+#define EX_FLOATING_POINT_ERROR 16 // No
+#define EX_ALIGNEMENT_CHECK 17 // Yes (Zero)
+#define EX_MACHINE_CHECK 18 // No
+#define EX_INTEL_RESERVED_2 19 // No
+#define EX_INTEL_RESERVED_3 20 // No
+#define EX_INTEL_RESERVED_4 21 // No
+#define EX_INTEL_RESERVED_5 22 // No
+#define EX_INTEL_RESERVED_6 23 // No
+#define EX_INTEL_RESERVED_7 24 // No
+#define EX_INTEL_RESERVED_8 25 // No
+#define EX_INTEL_RESERVED_9 26 // No
+#define EX_INTEL_RESERVED_10 27 // No
+#define EX_INTEL_RESERVED_11 28 // No
+#define EX_INTEL_RESERVED_12 29 // No
+#define EX_INTEL_RESERVED_13 30 // No
+#define EX_INTEL_RESERVED_14 31 // No
+
+#define EFLAGS_IF (0x1 << 9)
+
+typedef struct registers {
+ uint32_t ds; // Data segment selector
+ uint32_t edi, esi, ebp, useless_esp, ebx, edx, ecx, eax; // Pushed by pusha.
+ uint32_t int_no, err_code; // Interrupt number and error code (if applicable)
+ uint32_t eip, cs, eflags, esp, ss; // Pushed by the processor automatically.
+} registers_t;
+
+typedef void (*isr_handler_t)(registers_t*);
+
+void idt_init();
+
+void idt_set_ex_handler(int number, isr_handler_t func); //Set exception handler
+void idt_set_irq_handler(int number, isr_handler_t func); //Set IRQ handler
+
+void dbg_dump_registers(registers_t*);
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/include/kmalloc.h b/src/kernel/include/kmalloc.h
new file mode 100644
index 0000000..d4a9272
--- /dev/null
+++ b/src/kernel/include/kmalloc.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <stdint.h>
+#include <stddef.h>
+
+// Kernel memory allocator : one slab allocator for shared memory
+// Thread-safe.
+
+void kmalloc_setup();
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/include/multiboot.h b/src/kernel/include/multiboot.h
new file mode 100644
index 0000000..581337a
--- /dev/null
+++ b/src/kernel/include/multiboot.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#define MULTIBOOT_HEADER_MAGIC 0x1BADB002
+#define MULTIBOOT_BOOTLOADER_MAGIC 0x2BADB002
+
+struct multiboot_header_t{
+ unsigned long magic;
+ unsigned long flags;
+ unsigned long checksum;
+ unsigned long header_addr;
+ unsigned long load_addr;
+ unsigned long load_end_addr;
+ unsigned long bss_end_addr;
+ unsigned long entry_addr;
+};
+
+struct aout_symbol_table_t {
+ unsigned long tabsize;
+ unsigned long strsize;
+ unsigned long addr;
+ unsigned long reserved;
+};
+
+struct elf_section_header_table_t {
+ unsigned long num;
+ unsigned long size;
+ unsigned long addr;
+ unsigned long shndx;
+};
+
+struct multiboot_info_t {
+ unsigned long flags;
+ unsigned long mem_lower;
+ unsigned long mem_upper;
+ unsigned long boot_device;
+ unsigned long cmdline;
+ unsigned long mods_count;
+ unsigned long mods_addr;
+ union {
+ struct aout_symbol_table_t aout_sym;
+ struct elf_section_header_table_t elf_sec;
+ } u;
+ unsigned long mmap_length;
+ unsigned long mmap_addr;
+};
+
+struct module_t {
+ unsigned long mod_start;
+ unsigned long mod_end;
+ unsigned long string;
+ unsigned long reserved;
+};
+
+struct memory_map_t {
+ unsigned long size;
+ unsigned long base_addr_low;
+ unsigned long base_addr_high;
+ unsigned long length_low;
+ unsigned long length_high;
+ unsigned long type;
+};
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/include/paging.h b/src/kernel/include/paging.h
new file mode 100644
index 0000000..44014a2
--- /dev/null
+++ b/src/kernel/include/paging.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <sys.h>
+#include <stdbool.h>
+
+struct page_directory;
+typedef struct page_directory pagedir_t;
+
+
+void paging_setup(void* kernel_data_end);
+
+pagedir_t *get_current_pagedir();
+pagedir_t *get_kernel_pagedir();
+
+void switch_pagedir(pagedir_t *pd);
+
+// these functions are always relative to the currently mapped page directory
+uint32_t pd_get_frame(void* vaddr); // get physical frame for virtual address
+int pd_map_page(void* vaddr, uint32_t frame_id, bool rw); // returns nonzero on error
+void pd_unmap_page(void* vaddr); // does nothing if page not mapped
+
+// Note on concurrency : we expect that multiple threads will not try to map/unmap
+// pages in the same region at the same time. It can nevertheless happen that
+// several threads try to map pages that belong to the same 4M-section, and in that
+// case both might require the allocation of a new PT at the same location. These
+// cases are well-handled (the pagedir_t type contains a mutex used for this)
+
+pagedir_t *create_pagedir(); // returns zero on error
+void delete_pagedir(pagedir_t *pd);
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/include/process.h b/src/kernel/include/process.h
new file mode 100644
index 0000000..00ed1d7
--- /dev/null
+++ b/src/kernel/include/process.h
@@ -0,0 +1,43 @@
+#pragma once
+
+// Things described in this file are essentially a public interface
+// All implementation details are hidden in process.c
+
+#include <thread.h>
+
+#include <hashtbl.h>
+#include <buffer.h>
+
+#define PW_NOT_WAITING 0
+#define PW_WAIT_ANY_MSG 1
+#define PW_WAIT_MSG_ON_CHAN 2
+
+#define PROCESS_MAILBOX_SIZE 42
+
+typedef int chan_id_t;
+
+typedef struct chan_pair {
+ chan_id_t fst, snd;
+} chan_pair_t;
+
+typedef struct message {
+ buffer_t *data;
+ chan_id_t chan_id;
+} message_t;
+
+struct process;
+typedef struct process process_t;
+
+process_t *new_process(entry_t entry, void* data, chan_pair_t *give_chans);
+
+chan_pair_t new_chan(); // not used very often, but still usefull
+chan_id_t unbox_chan(chan_id_t chan, chan_id_t subchan);
+void detach_chan(chan_id_t chan); // chan ID is freed
+
+int send_message(chan_id_t chan, buffer_t *msg); // nonnull on error (recipient queue is full)
+
+size_t await_message(); // returns the size of the first message to come
+size_t await_message_on_chan(chan_id_t chan); // returns the size of the first message to come
+message_t get_message(); // gets the first message in the queue (or nothing when queue is empty)
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/include/region.h b/src/kernel/include/region.h
new file mode 100644
index 0000000..1fef582
--- /dev/null
+++ b/src/kernel/include/region.h
@@ -0,0 +1,38 @@
+#pragma once
+
+// Kernel virtual memory region allocator
+
+// This is entirely thread-safe
+
+#include <sys.h>
+#include <paging.h>
+
+struct region_info;
+typedef void (*page_fault_handler_t)(pagedir_t *pd, struct region_info *r, void* addr);
+
+typedef struct region_info {
+ void* addr;
+ size_t size;
+ char* type;
+ page_fault_handler_t pf;
+} region_info_t;
+
+void region_allocator_init(void* kernel_data_end);
+
+void* region_alloc(size_t size, char* type, page_fault_handler_t pf); // returns 0 on error
+region_info_t *find_region(void* addr);
+void region_free(void* addr);
+
+// some usefull PF handlers
+// default_allocator_pf_handler : just allocates new frames on page faults
+void default_allocator_pf_handler(pagedir_t *pd, struct region_info *r, void* addr);
+
+// some functions for freeing regions and frames
+// region_free_unmap_free : deletes a region and frees all frames that were mapped in it
+void region_free_unmap_free(void* addr);
+// region_free_unmap : deletes a region and unmaps all frames that were mapped in it, without freeing them
+void region_free_unmap(void* addr);
+
+void dbg_print_region_info();
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/include/sys.h b/src/kernel/include/sys.h
new file mode 100644
index 0000000..29735fa
--- /dev/null
+++ b/src/kernel/include/sys.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <debug.h> // common header
+#include <config.h>
+
+static inline void outb(uint16_t port, uint8_t value) {
+ asm volatile("outb %1, %0" : : "dN"(port), "a"(value));
+}
+
+static inline void outw(uint16_t port, uint16_t value) {
+ asm volatile("outw %1, %0" : : "dN"(port), "a"(value));
+}
+
+static inline uint8_t inb(uint16_t port) {
+ uint8_t ret;
+ asm volatile("inb %1, %0" : "=a"(ret) : "dN"(port));
+ return ret;
+}
+
+static inline uint16_t inw(uint16_t port) {
+ uint16_t ret;
+ asm volatile("inw %1, %0" : "=a"(ret) : "dN"(port));
+ return ret;
+}
+
+static inline void invlpg(void* addr) {
+ asm volatile("invlpg (%0)" : : "r"(addr) : "memory");
+}
+
+#define BOCHS_BREAKPOINT asm volatile("xchg %bx, %bx")
+
+
+// Utility functions for memory alignment
+
+#define PAGE_SIZE 0x1000
+#define PAGE_MASK 0xFFFFF000
+#define PAGE_ALIGN_DOWN(x) (((size_t)x) & PAGE_MASK)
+#define PAGE_ALIGN_UP(x) ((((size_t)x)&(~PAGE_MASK)) == 0 ? ((size_t)x) : (((size_t)x) & PAGE_MASK) + PAGE_SIZE)
+#define PAGE_ID(x) (((size_t)x) / PAGE_SIZE)
+#define PAGE_SHIFT 12
+#define PT_SHIFT 10
+// PAGE_SHIFT + PT_SHIFT + PT_SHIFT = 32
+#define N_PAGES_IN_PT 1024
+#define PD_MIRROR_ADDR 0xFFC00000 // last 4MB used for PD mirroring
+#define LAST_KERNEL_ADDR PD_MIRROR_ADDR
+#define FIRST_KERNEL_PT (K_HIGHHALF_ADDR >> (PAGE_SHIFT+PT_SHIFT)) // must be 768
+
+#define MASK4 0xFFFFFFFC
+#define ALIGN4_UP(x) ((((size_t)x)&(~MASK4)) == 0 ? ((size_t)x) : (((size_t)x) & MASK4) + 4)
+#define ALIGN4_DOWN(x) (((size_t)x)&MASK4)
+
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/include/thread.h b/src/kernel/include/thread.h
new file mode 100644
index 0000000..757ba00
--- /dev/null
+++ b/src/kernel/include/thread.h
@@ -0,0 +1,46 @@
+#pragma once
+
+#include <sys.h>
+#include <paging.h>
+#include <region.h>
+
+#define T_STATE_RUNNING 1
+#define T_STATE_PAUSED 2
+#define T_STATE_FINISHED 3
+
+#define KPROC_STACK_SIZE 0x8000 // 8Kb
+
+#define TASK_SWITCH_FREQUENCY 100 // in herz
+
+typedef struct saved_context {
+ uint32_t *esp;
+ void (*eip)();
+} saved_context_t;
+
+struct process;
+typedef struct thread {
+ saved_context_t ctx;
+ pagedir_t *current_pd_d;
+
+ uint32_t state;
+
+ region_info_t *stack_region;
+
+ struct process *proc; // process : L1 data structure
+
+ struct thread *next_in_queue;
+} thread_t;
+
+typedef void (*entry_t)(void*);
+
+void threading_setup(entry_t cont, void* data); // never returns
+thread_t *new_thread(entry_t entry, void* data); // thread is PAUSED, and must be resume_thread'ed
+
+extern thread_t *current_thread;
+
+void yield();
+void pause();
+
+void resume_thread(thread_t *thread, bool run_at_once);
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/linker.ld b/src/kernel/linker.ld
new file mode 100644
index 0000000..1203950
--- /dev/null
+++ b/src/kernel/linker.ld
@@ -0,0 +1,43 @@
+ENTRY (loader)
+
+SECTIONS{
+ k_highhalf_addr = 0xC0000000;
+
+ . = 0x00100000;
+
+ .setup : {
+ *(.setup)
+ }
+
+ . += k_highhalf_addr;
+
+ .text : AT(ADDR(.text) - k_highhalf_addr) {
+ *(.text)
+ }
+
+ .rodata ALIGN (0x1000) : AT(ADDR(.rodata) - k_highhalf_addr) {
+ *(.rodata)
+ }
+
+ .data ALIGN (0x1000) : AT(ADDR(.data) - k_highhalf_addr) {
+ start_ctors = .;
+ *(.ctor*)
+ end_ctors = .;
+ start_dtors = .;
+ *(.dtor*)
+ end_dtors = .;
+ *(.data)
+ *(.locks)
+ }
+
+ .bss : AT(ADDR(.bss) - k_highhalf_addr) {
+ sbss = .;
+ *(COMMON)
+ *(.bss)
+ ebss = .;
+ }
+
+ k_end_addr = .;
+}
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/
diff --git a/src/kernel/user/process.c b/src/kernel/user/process.c
new file mode 100644
index 0000000..7519dae
--- /dev/null
+++ b/src/kernel/user/process.c
@@ -0,0 +1,30 @@
+#include <mutex.h>
+#include <process.h>
+
+typedef struct process {
+ thread_t *thread;
+ int pid;
+
+ mutex_t com_mutex;
+
+ hashtbl_t *chans;
+ chan_id_t next_chan_id;
+
+ message_t mbox[PROCESS_MAILBOX_SIZE];
+ int mbox_size; // number of messages in queue
+ int mbox_first; // first message in queue (circular buffer)
+
+ // a process can be in several waiting states :
+ // - wait for any message
+ // - wait for a message on a particular channel
+ // in this case, if a message is pushed on this particular channel,
+ // then it is put in front of the queue, so that it is the next message read
+ // (it is guaranteed that doing await_message_on_chan() immediately followed by get_message()
+ // gets the message whose size was returned by await_...)
+ int wait_state;
+ chan_id_t wait_chan_id; // when waiting for a message on a particular channel
+} process_t;
+
+int push_message(process_t *proc, message_t msg); // nonnull on error (process queue is full)
+
+/* vim: set ts=4 sw=4 tw=0 noet :*/