diff options
Diffstat (limited to 'src/kernel/core')
-rw-r--r-- | src/kernel/core/context_switch.s | 58 | ||||
-rw-r--r-- | src/kernel/core/dbglog.c | 159 | ||||
-rw-r--r-- | src/kernel/core/frame.c | 85 | ||||
-rw-r--r-- | src/kernel/core/gdt.c | 90 | ||||
-rw-r--r-- | src/kernel/core/idt.c | 253 | ||||
-rw-r--r-- | src/kernel/core/interrupt.s | 126 | ||||
-rw-r--r-- | src/kernel/core/kmain.c | 215 | ||||
-rw-r--r-- | src/kernel/core/kmalloc.c | 52 | ||||
-rw-r--r-- | src/kernel/core/loader.s | 86 | ||||
-rw-r--r-- | src/kernel/core/paging.c | 298 | ||||
-rw-r--r-- | src/kernel/core/region.c | 397 | ||||
-rw-r--r-- | src/kernel/core/sys.c | 25 | ||||
-rw-r--r-- | src/kernel/core/thread.c | 208 |
13 files changed, 2052 insertions, 0 deletions
diff --git a/src/kernel/core/context_switch.s b/src/kernel/core/context_switch.s new file mode 100644 index 0000000..6738a03 --- /dev/null +++ b/src/kernel/core/context_switch.s @@ -0,0 +1,58 @@ +[EXTERN kernel_stack_top] +[EXTERN run_scheduler] + +[GLOBAL save_context_and_enter_scheduler] +; void save_context_and_enter_scheduler(struct saved_context *ctx); +save_context_and_enter_scheduler: + pushf + cli + pusha ; Pushes edi,esi,ebp,esp,ebx,edx,ecx,eax + + mov eax, cr3 + push eax + + mov eax, [esp+44] ; get address of saved_context structure + mov [eax], esp ; save esp + mov dword [eax+4], resume_saved_context ; save eip + + mov esp, kernel_stack_top + jmp run_scheduler + +resume_saved_context: + pop eax + mov cr3, eax + + popa + popf + ret + +[GLOBAL irq0_save_context_and_enter_scheduler] +; meant to be called on IRQ0 +; general registers already saved by IRQ handler stub +; flags already saved by interruption and interruptions disabled +; only saves CR3 +irq0_save_context_and_enter_scheduler: + mov eax, cr3 + push eax + + mov eax, [esp+8] ; get address of saved_context structure + mov [eax], esp ; save esp + mov dword [eax+4], resume_saved_irq0_context ; save eip + + mov esp, kernel_stack_top + jmp run_scheduler + +resume_saved_irq0_context: + pop eax + mov cr3, eax + ret + + +[GLOBAL resume_context] +resume_context: + mov eax, [esp+4] ; get address of saved context + mov esp, [eax] ; resume esp + mov ecx, [eax+4] ; jump to specified eip + jmp ecx + +; vim: set ts=4 sw=4 tw=0 noet : diff --git a/src/kernel/core/dbglog.c b/src/kernel/core/dbglog.c new file mode 100644 index 0000000..e042625 --- /dev/null +++ b/src/kernel/core/dbglog.c @@ -0,0 +1,159 @@ +#include <stdarg.h> +#include <string.h> +#include <printf.h> +#include <mutex.h> + +#include <dbglog.h> +#include <config.h> +#include <sys.h> + +// ================================================================== + +#ifdef DBGLOG_TO_SCREEN + +static const size_t VGA_WIDTH = 80; +static const size_t VGA_HEIGHT = 25; + +static uint8_t vga_color = 7; +static uint16_t* vga_buffer = 0; +static uint16_t vga_row = 0, vga_column = 0; + +static uint16_t make_vgaentry(char c, uint8_t color) { + uint16_t c16 = c; + uint16_t color16 = color; + return c16 | color16 << 8; +} + +static void vga_putentryat(char c, uint8_t color, size_t x, size_t y) { + const size_t index = y * VGA_WIDTH + x; + vga_buffer[index] = make_vgaentry(c, color); +} + +static void vga_update_cursor() { + uint16_t cursor_location = vga_row * 80 + vga_column; + outb(0x3D4, 14); //Sending high cursor byte + outb(0x3D5, cursor_location >> 8); + outb(0x3D4, 15); //Sending high cursor byte + outb(0x3D5, cursor_location); +} + +static void vga_init() { + vga_row = 0; + vga_column = 0; + vga_buffer = (uint16_t*) (K_HIGHHALF_ADDR + 0xB8000); + + for (size_t y = 0; y < VGA_HEIGHT; y++) { + for (size_t x = 0; x < VGA_WIDTH; x++) { + vga_putentryat(' ', vga_color, x, y); + } + } + + vga_update_cursor(); +} + +static void vga_scroll() { + for (size_t i = 0; i < VGA_WIDTH * (VGA_HEIGHT - 1); i++) { + vga_buffer[i] = vga_buffer[i + VGA_WIDTH]; + } + for (size_t x = 0; x < VGA_WIDTH; x++) { + vga_putentryat(' ', vga_color, x, VGA_HEIGHT - 1); + } + vga_row--; +} + +static void vga_newline() { + vga_column = 0; + if (++vga_row == VGA_HEIGHT) { + vga_scroll(); + } +} + +static void vga_putc(char c) { + if (c == '\n') { + vga_newline(); + } else if (c == '\t') { + vga_putc(' '); + while (vga_column % 4 != 0) vga_putc(' '); + } else { + vga_putentryat(c, vga_color, vga_column, vga_row); + if (++vga_column == VGA_WIDTH) { + vga_newline(); + } + } +} + +static void vga_puts(const char* data) { + size_t datalen = strlen(data); + for (size_t i = 0; i < datalen; i++) + vga_putc(data[i]); + + vga_update_cursor(); +} + +#endif // DBGLOG_TO_SCREEN + +// ================================================================== + +#ifdef DBGLOG_TO_SERIAL + +#define SER_PORT 0x3F8 // COM1 + +static void serial_init() { + outb(SER_PORT + 1, 0x00); // disable interrupts + outb(SER_PORT + 3, 0x80); // set baud rate + outb(SER_PORT + 0, 0x03); // set divisor to 3 (38400 baud) + outb(SER_PORT + 1, 0x00); + outb(SER_PORT + 3, 0x03); // 8 bits, no parity, one stop bit + outb(SER_PORT + 2, 0xC7); // enable FIFO, clear them, with 14-byte threshold +} + +static void serial_putc(const char c) { + while (!(inb(SER_PORT + 5) & 0x20)); + outb(SER_PORT, c); +} + +static void serial_puts(const char *c) { + while (*c) { + serial_putc(*c); + c++; + } +} + +#endif // DBGLOG_TO_SERIAL + +// ================================================================== + +STATIC_MUTEX(dbglog_mutex); + +void dbglog_setup() { + mutex_lock(&dbglog_mutex); +#ifdef DBGLOG_TO_SCREEN + vga_init(); +#endif +#ifdef DBGLOG_TO_SERIAL + serial_init(); +#endif + mutex_unlock(&dbglog_mutex); +} + +void dbg_print(const char* str) { +#ifdef DBGLOG_TO_SCREEN + vga_puts(str); +#endif +#ifdef DBGLOG_TO_SERIAL + serial_puts(str); +#endif +} + +void dbg_printf(const char* fmt, ...) { + va_list ap; + char buffer[256]; + + va_start(ap, fmt); + vsnprintf(buffer, 256, fmt, ap); + va_end(ap); + + dbg_print(buffer); +} + +/* vim: set ts=4 sw=4 tw=0 noet :*/ diff --git a/src/kernel/core/frame.c b/src/kernel/core/frame.c new file mode 100644 index 0000000..489d010 --- /dev/null +++ b/src/kernel/core/frame.c @@ -0,0 +1,85 @@ +#include <frame.h> +#include <dbglog.h> + +#include <mutex.h> + +// TODO: buddy allocator +// this is a simple bitmap allocator + +#define INDEX_FROM_BIT(a) ((a)/(8*4)) +#define OFFSET_FROM_BIT(a) ((a)%(8*4)) + +static uint32_t *frame_bitset; +static uint32_t nframes, nused_frames; +static uint32_t begin_search_at; + +void frame_init_allocator(size_t total_ram, void** kernel_data_end) { + nframes = PAGE_ID(total_ram); + + frame_bitset = (uint32_t*)ALIGN4_UP((size_t)*kernel_data_end); + *kernel_data_end = (void*)frame_bitset + ALIGN4_UP(nframes / 8); + + for (size_t i = 0; i < ALIGN4_UP(nframes / 8)/4; i++) + frame_bitset[i] = 0; + + nused_frames = 0; + + size_t kernel_pages = PAGE_ALIGN_UP((size_t)*kernel_data_end - K_HIGHHALF_ADDR)/PAGE_SIZE; + for (size_t i = 0; i < kernel_pages; i++) { + size_t idx = INDEX_FROM_BIT(i); + size_t ofs = OFFSET_FROM_BIT(i); + frame_bitset[idx] |= (0x1 << ofs); + nused_frames++; + } + begin_search_at = INDEX_FROM_BIT(kernel_pages); +} + +STATIC_MUTEX(frame_allocator_mutex); + +uint32_t frame_alloc(size_t n) { + if (n > 32) return 0; + + mutex_lock(&frame_allocator_mutex); + for (uint32_t i = begin_search_at; i < INDEX_FROM_BIT(nframes); i++) { + if (frame_bitset[i] == 0xFFFFFFFF) { + if (i == begin_search_at) begin_search_at++; + continue; + } + + for (uint32_t j = 0; j < 32 - n + 1; j++) { + uint32_t to_test = (0xFFFFFFFF >> (32 - n)) << j; + if (!(frame_bitset[i]&to_test)) { + frame_bitset[i] |= to_test; + nused_frames += n; + + mutex_unlock(&frame_allocator_mutex); + return i * 32 + j; + } + } + } + mutex_unlock(&frame_allocator_mutex); + return 0; +} + +void frame_free(uint32_t base, size_t n) { + mutex_lock(&frame_allocator_mutex); + + for (size_t i = 0; i < n; i++) { + uint32_t idx = INDEX_FROM_BIT(base + i); + uint32_t ofs = OFFSET_FROM_BIT(base + i); + if (frame_bitset[idx] & (0x1 << ofs)) { + frame_bitset[idx] &= ~(0x1 << ofs); + nused_frames--; + } + } + if (INDEX_FROM_BIT(base) < begin_search_at) + begin_search_at = INDEX_FROM_BIT(base); + + mutex_unlock(&frame_allocator_mutex); +} + +void dbg_print_frame_stats() { + dbg_printf("Used frames: %d/%d\n", nused_frames, nframes); +} + +/* vim: set ts=4 sw=4 tw=0 noet :*/ diff --git a/src/kernel/core/gdt.c b/src/kernel/core/gdt.c new file mode 100644 index 0000000..eadde5f --- /dev/null +++ b/src/kernel/core/gdt.c @@ -0,0 +1,90 @@ +#include <gdt.h> +#include <string.h> + +#define GDT_ENTRIES 6 // The contents of each entry is defined in gdt_init. + +/* One entry of the table */ +struct gdt_entry { + uint16_t limit_low; + uint16_t base_low; + uint8_t base_middle; + uint8_t access; + uint8_t granularity; + uint8_t base_high; +} __attribute__((packed)); +typedef struct gdt_entry gdt_entry_t; + +/* Structure defining the whole table : address and size (in bytes). */ +struct gdt_ptr { + uint16_t limit; + uint32_t base; +} __attribute__((packed)); +typedef struct gdt_ptr gdt_ptr_t; + +/* The TSS is used for hardware multitasking. + We don't use that, but we still need a TSS so that user mode process exceptions + can be handled correctly by the kernel. */ +struct tss_entry { + uint32_t prev_tss; // The previous TSS - if we used hardware task switching this would form a linked list. + uint32_t esp0; // The stack pointer to load when we change to kernel mode. + uint32_t ss0; // The stack segment to load when we change to kernel mode. + uint32_t esp1; // Unused... + uint32_t ss1; + uint32_t esp2; + uint32_t ss2; + uint32_t cr3; + uint32_t eip; + uint32_t eflags; + uint32_t eax; + uint32_t ecx; + uint32_t edx; + uint32_t ebx; + uint32_t esp; + uint32_t ebp; + uint32_t esi; + uint32_t edi; + uint32_t es; // The value to load into ES when we change to kernel mode. + uint32_t cs; // The value to load into CS when we change to kernel mode. + uint32_t ss; // The value to load into SS when we change to kernel mode. + uint32_t ds; // The value to load into DS when we change to kernel mode. + uint32_t fs; // The value to load into FS when we change to kernel mode. + uint32_t gs; // The value to load into GS when we change to kernel mode. + uint32_t ldt; // Unused... + uint16_t trap; + uint16_t iomap_base; +} __attribute__((packed)); +typedef struct tss_entry tss_entry_t; + +// ========================= // +// Actual definitions + +static gdt_entry_t gdt_entries[GDT_ENTRIES]; +static gdt_ptr_t gdt_ptr; + +/* For internal use only. Writes one entry of the GDT with given parameters. */ +static void gdt_set_gate(int num, uint32_t base, uint32_t limit, uint8_t access, uint8_t gran) { + gdt_entries[num].base_low = (base & 0xFFFF); + gdt_entries[num].base_middle = (base >> 16) & 0xFF; + gdt_entries[num].base_high = (base >> 24) & 0xFF; + + gdt_entries[num].limit_low = (limit & 0xFFFF); + gdt_entries[num].granularity = (limit >> 16) & 0x0F; + gdt_entries[num].granularity |= gran & 0xF0; + gdt_entries[num].access = access; +} + +/* Write data to the GDT and enable it. */ +void gdt_init() { + gdt_ptr.limit = (sizeof(gdt_entry_t) * GDT_ENTRIES) - 1; + gdt_ptr.base = (uint32_t)&gdt_entries; + + gdt_set_gate(0, 0, 0, 0, 0); //Null segment + gdt_set_gate(1, 0, 0xFFFFFFFF, 0x9A, 0xCF); //Kernel code segment 0x08 + gdt_set_gate(2, 0, 0xFFFFFFFF, 0x92, 0xCF); //Kernel data segment 0x10 + gdt_set_gate(3, 0, 0xFFFFFFFF, 0xFA, 0xCF); //User code segment 0x18 + gdt_set_gate(4, 0, 0xFFFFFFFF, 0xF2, 0xCF); //User data segment 0x20 + + asm volatile ("lgdt %0"::"m"(gdt_ptr):"memory"); +} + +/* vim: set ts=4 sw=4 tw=0 noet :*/ diff --git a/src/kernel/core/idt.c b/src/kernel/core/idt.c new file mode 100644 index 0000000..2f244e3 --- /dev/null +++ b/src/kernel/core/idt.c @@ -0,0 +1,253 @@ +#include <idt.h> +#include <gdt.h> +#include <sys.h> +#include <string.h> +#include <dbglog.h> + +struct idt_entry { + uint16_t base_lo; //Low part of address to jump to + uint16_t sel; //Kernel segment selector + uint8_t always0; + uint8_t type_attr; //Type + uint16_t base_hi; //High part of address to jump to +} __attribute__((packed)); +typedef struct idt_entry idt_entry_t; + +struct idt_ptr { + uint16_t limit; + uint32_t base; +} __attribute__((packed)); +typedef struct idt_ptr idt_ptr_t; + +#define GATE_TYPE_INTERRUPT 14 // IF is cleared on interrupt +#define GATE_TYPE_TRAP 15 // IF stays as is + +#define GATE_PRESENT (1<<7) +#define GATE_DPL_SHIFT 5 + + +void isr0(); +void isr1(); +void isr2(); +void isr3(); +void isr4(); +void isr5(); +void isr6(); +void isr7(); +void isr8(); +void isr9(); +void isr10(); +void isr11(); +void isr12(); +void isr13(); +void isr14(); +void isr15(); +void isr16(); +void isr17(); +void isr18(); +void isr19(); +void isr20(); +void isr21(); +void isr22(); +void isr23(); +void isr24(); +void isr25(); +void isr26(); +void isr27(); +void isr28(); +void isr29(); +void isr30(); +void isr31(); + +void irq0(); +void irq1(); +void irq2(); +void irq3(); +void irq4(); +void irq5(); +void irq6(); +void irq7(); +void irq8(); +void irq9(); +void irq10(); +void irq11(); +void irq12(); +void irq13(); +void irq14(); +void irq15(); + +void syscall64(); + +// ************************************************************ +// Handler code + +static idt_entry_t idt_entries[256]; +static idt_ptr_t idt_ptr; + +static isr_handler_t irq_handlers[16] = {0}; +static isr_handler_t ex_handlers[32] = {0}; + +/* Called in interrupt.s when an exception fires (interrupt 0 to 31) */ +void idt_exHandler(registers_t *regs) { + if (ex_handlers[regs->int_no] != 0) { + ex_handlers[regs->int_no](regs); + } else { + //TODO: make sure all exceptions happenning in userspace do not cause kernel panic... + dbg_printf("Unhandled exception: %i\n", regs->int_no); + dbg_dump_registers(regs); + PANIC("Unhandled exception"); + } +} + +/* Called in interrupt.s when an IRQ fires (interrupt 32 to 47) */ +void idt_irqHandler(registers_t *regs) { + if (regs->err_code > 7) { + outb(0xA0, 0x20); + } + outb(0x20, 0x20); + + dbg_printf("IRQ %i\n", regs->err_code); + if (irq_handlers[regs->err_code] != 0) { + irq_handlers[regs->err_code](regs); + } +} + +/* Caled in interrupt.s when a syscall is called */ +void idt_syscallHandler(registers_t *regs) { + dbg_printf("Syscall %i\n", regs->int_no); + // do nothing, yet. +} + +/* For internal use only. Sets up an entry of the IDT with given parameters. */ +static void idt_set_gate(uint8_t num, void (*fun)(), uint8_t type) { + uint32_t base = (uint32_t)fun; + + idt_entries[num].base_lo = base & 0xFFFF; + idt_entries[num].base_hi = (base >> 16) & 0xFFFF; + + idt_entries[num].sel = K_CODE_SEGMENT; + idt_entries[num].always0 = 0; + idt_entries[num].type_attr = GATE_PRESENT + | (3 << GATE_DPL_SHIFT) // accessible from user mode + | type; +} + +static const struct { + uint8_t num; + void (*fun)(); + uint8_t type; +} gates[] = { + // Most processor exceptions are traps and handling them + // should be preemptible + { 0, isr0, GATE_TYPE_TRAP }, + { 1, isr1, GATE_TYPE_TRAP }, + { 2, isr2, GATE_TYPE_TRAP }, + { 3, isr3, GATE_TYPE_TRAP }, + { 4, isr4, GATE_TYPE_TRAP }, + { 5, isr5, GATE_TYPE_TRAP }, + { 6, isr6, GATE_TYPE_TRAP }, + { 7, isr7, GATE_TYPE_TRAP }, + { 8, isr8, GATE_TYPE_TRAP }, + { 9, isr9, GATE_TYPE_TRAP }, + { 10, isr10, GATE_TYPE_TRAP }, + { 11, isr11, GATE_TYPE_TRAP }, + { 12, isr12, GATE_TYPE_TRAP }, + { 13, isr13, GATE_TYPE_TRAP }, + { 14, isr14, GATE_TYPE_INTERRUPT }, // reenables interrupts later on + { 15, isr15, GATE_TYPE_TRAP }, + { 16, isr16, GATE_TYPE_TRAP }, + { 17, isr17, GATE_TYPE_TRAP }, + { 18, isr18, GATE_TYPE_TRAP }, + { 19, isr19, GATE_TYPE_TRAP }, + { 20, isr20, GATE_TYPE_TRAP }, + { 21, isr21, GATE_TYPE_TRAP }, + { 22, isr22, GATE_TYPE_TRAP }, + { 23, isr23, GATE_TYPE_TRAP }, + { 24, isr24, GATE_TYPE_TRAP }, + { 25, isr25, GATE_TYPE_TRAP }, + { 26, isr26, GATE_TYPE_TRAP }, + { 27, isr27, GATE_TYPE_TRAP }, + { 28, isr28, GATE_TYPE_TRAP }, + { 29, isr29, GATE_TYPE_TRAP }, + { 30, isr30, GATE_TYPE_TRAP }, + { 31, isr31, GATE_TYPE_TRAP }, + + // IRQs are not preemptible ; an IRQ handler should do the bare minimum + // (communication with the hardware), and then pass a message to a worker + // process in order to do further processing + { 32, irq0, GATE_TYPE_INTERRUPT }, + { 33, irq1, GATE_TYPE_INTERRUPT }, + { 34, irq2, GATE_TYPE_INTERRUPT }, + { 35, irq3, GATE_TYPE_INTERRUPT }, + { 36, irq4, GATE_TYPE_INTERRUPT }, + { 37, irq5, GATE_TYPE_INTERRUPT }, + { 38, irq6, GATE_TYPE_INTERRUPT }, + { 39, irq7, GATE_TYPE_INTERRUPT }, + { 40, irq8, GATE_TYPE_INTERRUPT }, + { 41, irq9, GATE_TYPE_INTERRUPT }, + { 42, irq10, GATE_TYPE_INTERRUPT }, + { 43, irq11, GATE_TYPE_INTERRUPT }, + { 44, irq12, GATE_TYPE_INTERRUPT }, + { 45, irq13, GATE_TYPE_INTERRUPT }, + { 46, irq14, GATE_TYPE_INTERRUPT }, + { 47, irq15, GATE_TYPE_INTERRUPT }, + + // Of course, syscalls are preemptible + { 64, syscall64, GATE_TYPE_TRAP }, + + { 0, 0, 0 } +}; + +/* Remaps the IRQs. Sets up the IDT. */ +void idt_init() { + memset((uint8_t*)&idt_entries, 0, sizeof(idt_entry_t) * 256); + + //Remap the IRQ table + outb(0x20, 0x11); + outb(0xA0, 0x11); + outb(0x21, 0x20); + outb(0xA1, 0x28); + outb(0x21, 0x04); + outb(0xA1, 0x02); + outb(0x21, 0x01); + outb(0xA1, 0x01); + outb(0x21, 0x0); + outb(0xA1, 0x0); + + for (int i = 0; gates[i].type != 0; i++) { + idt_set_gate(gates[i].num, gates[i].fun, gates[i].type); + } + + idt_ptr.limit = (sizeof(idt_entry_t) * 256) - 1; + idt_ptr.base = (uint32_t)&idt_entries; + + asm volatile ("lidt %0"::"m"(idt_ptr):"memory"); + + // Some setup calls that come later on are not preemptible, + // so we wait until then to enable interrupts. +} + +/* Sets up an IRQ handler for given IRQ. */ +void idt_set_irq_handler(int number, isr_handler_t func) { + if (number < 16 && number >= 0) { + irq_handlers[number] = func; + } +} + +/* Sets up a handler for a processor exception */ +void idt_set_ex_handler(int number, isr_handler_t func) { + if (number >= 0 && number < 32) { + ex_handlers[number] = func; + } +} + +void dbg_dump_registers(registers_t *regs) { + dbg_printf("/ Exception %i\n", regs->int_no); + dbg_printf("| EAX: 0x%p EBX: 0x%p ECX: 0x%p EDX: 0x%p\n", regs->eax, regs->ebx, regs->ecx, regs->edx); + dbg_printf("| EDI: 0x%p ESI: 0x%p ESP: 0x%p EBP: 0x%p\n", regs->edi, regs->esi, regs->esp, regs->ebp); + dbg_printf("| EIP: 0x%p CS : 0x%p DS : 0x%p SS : 0x%p\n", regs->eip, regs->cs, regs->ds, regs->ss); + dbg_printf("\\ EFl: 0x%p I# : 0x%p Err: 0x%p\n", regs->eflags, regs->int_no, regs->err_code); +} + +/* vim: set ts=4 sw=4 tw=0 noet :*/ + diff --git a/src/kernel/core/interrupt.s b/src/kernel/core/interrupt.s new file mode 100644 index 0000000..d40fff0 --- /dev/null +++ b/src/kernel/core/interrupt.s @@ -0,0 +1,126 @@ +;************************************************************************************ + +%macro COMMONSTUB 1 +[EXTERN idt_%1Handler] +%1_common_stub: + + pusha ; Pushes edi,esi,ebp,esp,ebx,edx,ecx,eax + + mov ax, ds ; Lower 16-bits of eax = ds. + push eax ; save the data segment descriptor + + mov ax, 0x10 ; load the kernel data segment descriptor + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + + ; pass the register data structure as a pointer to the function + ; (passing it directly results in GCC trashing the data when doing optimisations) + mov eax, esp + push eax + call idt_%1Handler + add esp, 4 + + pop eax ; reload the original data segment descriptor + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + + popa ; Pops edi,esi,ebp... + add esp, 8 ; Cleans up the pushed error code and pushed ISR number + iret +%endmacro + +COMMONSTUB ex +COMMONSTUB irq +COMMONSTUB syscall + +;************************************************************************************ + +%macro EX_NOERRCODE 1 ; define a macro, taking one parameter + [GLOBAL isr%1] ; %1 accesses the first parameter. + isr%1: + push byte 0 + push byte %1 + jmp ex_common_stub +%endmacro + +%macro EX_ERRCODE 1 + [GLOBAL isr%1] + isr%1: + push byte %1 + jmp ex_common_stub +%endmacro + +%macro IRQ 2 + [GLOBAL irq%1] + irq%1: + push byte %1 ;push irq number + push byte %2 ;push int number + jmp irq_common_stub +%endmacro + +%macro SYSCALL 1 + [GLOBAL syscall%1] + syscall%1: + cli + push byte 0 + push byte %1 + jmp syscall_common_stub +%endmacro + +EX_NOERRCODE 0 +EX_NOERRCODE 1 +EX_NOERRCODE 2 +EX_NOERRCODE 3 +EX_NOERRCODE 4 +EX_NOERRCODE 5 +EX_NOERRCODE 6 +EX_NOERRCODE 7 +EX_ERRCODE 8 +EX_NOERRCODE 9 +EX_ERRCODE 10 +EX_ERRCODE 11 +EX_ERRCODE 12 +EX_ERRCODE 13 +EX_ERRCODE 14 +EX_NOERRCODE 15 +EX_NOERRCODE 16 +EX_NOERRCODE 17 +EX_NOERRCODE 18 +EX_NOERRCODE 19 +EX_NOERRCODE 20 +EX_NOERRCODE 21 +EX_NOERRCODE 22 +EX_NOERRCODE 23 +EX_NOERRCODE 24 +EX_NOERRCODE 25 +EX_NOERRCODE 26 +EX_NOERRCODE 27 +EX_NOERRCODE 28 +EX_NOERRCODE 29 +EX_NOERRCODE 30 +EX_NOERRCODE 31 + +IRQ 0, 32 +IRQ 1, 33 +IRQ 2, 34 +IRQ 3, 35 +IRQ 4, 36 +IRQ 5, 37 +IRQ 6, 38 +IRQ 7, 39 +IRQ 8, 40 +IRQ 9, 41 +IRQ 10, 42 +IRQ 11, 43 +IRQ 12, 44 +IRQ 13, 45 +IRQ 14, 46 +IRQ 15, 47 + +SYSCALL 64 + +; vim: set ts=4 sw=4 tw=0 noet : diff --git a/src/kernel/core/kmain.c b/src/kernel/core/kmain.c new file mode 100644 index 0000000..2169b7d --- /dev/null +++ b/src/kernel/core/kmain.c @@ -0,0 +1,215 @@ +#include <multiboot.h> +#include <config.h> +#include <dbglog.h> +#include <sys.h> +#include <malloc.h> + +#include <gdt.h> +#include <idt.h> +#include <frame.h> +#include <paging.h> +#include <region.h> +#include <kmalloc.h> + +#include <thread.h> + +#include <slab_alloc.h> +#include <hashtbl.h> + +extern const void k_end_addr; // defined in linker script : 0xC0000000 plus kernel stuff + +void breakpoint_handler(registers_t *regs) { + dbg_printf("Breakpoint! (int3)\n"); + BOCHS_BREAKPOINT; +} + +void region_test1() { + void* p = region_alloc(0x1000, "Test region", 0); + dbg_printf("Allocated one-page region: 0x%p\n", p); + dbg_print_region_info(); + void* q = region_alloc(0x1000, "Test region", 0); + dbg_printf("Allocated one-page region: 0x%p\n", q); + dbg_print_region_info(); + void* r = region_alloc(0x2000, "Test region", 0); + dbg_printf("Allocated two-page region: 0x%p\n", r); + dbg_print_region_info(); + void* s = region_alloc(0x10000, "Test region", 0); + dbg_printf("Allocated 16-page region: 0x%p\n", s); + dbg_print_region_info(); + region_free(p); + dbg_printf("Freed region 0x%p\n", p); + dbg_print_region_info(); + region_free(q); + dbg_printf("Freed region 0x%p\n", q); + dbg_print_region_info(); + region_free(r); + dbg_printf("Freed region 0x%p\n", r); + dbg_print_region_info(); + region_free(s); + dbg_printf("Freed region 0x%p\n", s); + dbg_print_region_info(); +} + +void region_test2() { + // allocate a big region and try to write into it + dbg_printf("Begin region test 2..."); + const size_t n = 200; + void* p0 = region_alloc(n * PAGE_SIZE, "Test big region", default_allocator_pf_handler); + for (size_t i = 0; i < n; i++) { + uint32_t *x = (uint32_t*)(p0 + i * PAGE_SIZE); + x[0] = 12; + x[1] = (i * 20422) % 122; + } + // unmap memory + for (size_t i = 0; i < n; i++) { + void* p = p0 + i * PAGE_SIZE; + uint32_t *x = (uint32_t*)p; + ASSERT(x[1] == (i * 20422) % 122); + + uint32_t f = pd_get_frame(p); + ASSERT(f != 0); + pd_unmap_page(p); + ASSERT(pd_get_frame(p) == 0); + + frame_free(f, 1); + } + region_free(p0); + dbg_printf("OK\n"); +} + +void kmalloc_test(void* kernel_data_end) { + // Test kmalloc ! + dbg_print_region_info(); + dbg_printf("Begin kmalloc test...\n"); + const int m = 200; + uint16_t** ptr = malloc(m * sizeof(uint32_t)); + for (int i = 0; i < m; i++) { + size_t s = 1 << ((i * 7) % 11 + 2); + ptr[i] = (uint16_t*)malloc(s); + ASSERT((void*)ptr[i] >= kernel_data_end && (size_t)ptr[i] < 0xFFC00000); + *ptr[i] = ((i * 211) % 1024); + } + dbg_printf("Fully allocated.\n"); + dbg_print_region_info(); + for (int i = 0; i < m; i++) { + for (int j = i; j < m; j++) { + ASSERT(*ptr[j] == (j * 211) % 1024); + } + free(ptr[i]); + } + free(ptr); + dbg_printf("Kmalloc test OK.\n"); + dbg_print_region_info(); +} + +void test_hashtbl_1() { + // hashtable test + hashtbl_t *ht = create_hashtbl(str_key_eq_fun, str_hash_fun, 0); + hashtbl_add(ht, "test1", "Hello, world [test1]"); + hashtbl_add(ht, "test2", "Hello, world [test2]"); + dbg_printf("ht[test1] = %s\n", hashtbl_find(ht, "test1")); + dbg_printf("ht[test] = %s\n", hashtbl_find(ht, "test")); + dbg_printf("ht[test2] = %s\n", hashtbl_find(ht, "test2")); + dbg_printf("adding test...\n"); + hashtbl_add(ht, "test", "Forever alone"); + dbg_printf("ht[test1] = %s\n", hashtbl_find(ht, "test1")); + dbg_printf("ht[test] = %s\n", hashtbl_find(ht, "test")); + dbg_printf("ht[test2] = %s\n", hashtbl_find(ht, "test2")); + dbg_printf("removing test1...\n"); + hashtbl_remove(ht, "test1"); + dbg_printf("ht[test1] = %s\n", hashtbl_find(ht, "test1")); + dbg_printf("ht[test] = %s\n", hashtbl_find(ht, "test")); + dbg_printf("ht[test2] = %s\n", hashtbl_find(ht, "test2")); + delete_hashtbl(ht); +} + +void test_hashtbl_2() { + hashtbl_t *ht = create_hashtbl(id_key_eq_fun, id_hash_fun, 0); + hashtbl_add(ht, (void*)12, "Hello, world [12]"); + hashtbl_add(ht, (void*)777, "Hello, world [777]"); + dbg_printf("ht[12] = %s\n", hashtbl_find(ht, (void*)12)); + dbg_printf("ht[144] = %s\n", hashtbl_find(ht, (void*)144)); + dbg_printf("ht[777] = %s\n", hashtbl_find(ht, (void*)777)); + dbg_printf("adding 144...\n"); + hashtbl_add(ht, (void*)144, "Forever alone"); + dbg_printf("ht[12] = %s\n", hashtbl_find(ht, (void*)12)); + dbg_printf("ht[144] = %s\n", hashtbl_find(ht, (void*)144)); + dbg_printf("ht[777] = %s\n", hashtbl_find(ht, (void*)777)); + dbg_printf("removing 12...\n"); + hashtbl_remove(ht, (void*)12); + dbg_printf("ht[12] = %s\n", hashtbl_find(ht, (void*)12)); + dbg_printf("ht[144] = %s\n", hashtbl_find(ht, (void*)144)); + dbg_printf("ht[777] = %s\n", hashtbl_find(ht, (void*)777)); + delete_hashtbl(ht); +} + +void test_thread(void* a) { + for(int i = 0; i < 120; i++) { + dbg_printf("b"); + for (int x = 0; x < 100000; x++) asm volatile("xor %%ebx, %%ebx":::"%ebx"); + if (i % 8 == 0) yield(); + } +} +void kernel_init_stage2(void* data) { + dbg_print_region_info(); + dbg_print_frame_stats(); + + test_hashtbl_1(); + test_hashtbl_2(); + + thread_t *tb = new_thread(test_thread, 0); + resume_thread(tb, false); + + for (int i = 0; i < 120; i++) { + dbg_printf("a"); + for (int x = 0; x < 100000; x++) asm volatile("xor %%ebx, %%ebx":::"%ebx"); + } + PANIC("Reached kmain end! Falling off the edge."); +} + +void kmain(struct multiboot_info_t *mbd, int32_t mb_magic) { + dbglog_setup(); + + dbg_printf("Hello, kernel world!\n"); + dbg_printf("This is %s, version %s.\n", OS_NAME, OS_VERSION); + + ASSERT(mb_magic == MULTIBOOT_BOOTLOADER_MAGIC); + + gdt_init(); dbg_printf("GDT set up.\n"); + + idt_init(); dbg_printf("IDT set up.\n"); + idt_set_ex_handler(EX_BREAKPOINT, breakpoint_handler); + asm volatile("int $0x3"); // test breakpoint + + size_t total_ram = ((mbd->mem_upper + mbd->mem_lower) * 1024); + dbg_printf("Total ram: %d Kb\n", total_ram / 1024); + + // used for allocation of data structures before malloc is set up + // a pointer to this pointer is passed to the functions that might have + // to allocate memory ; they just increment it of the allocated quantity + void* kernel_data_end = (void*)&k_end_addr; + + frame_init_allocator(total_ram, &kernel_data_end); + dbg_printf("kernel_data_end: 0x%p\n", kernel_data_end); + dbg_print_frame_stats(); + + paging_setup(kernel_data_end); + dbg_printf("Paging seems to be working!\n"); + + BOCHS_BREAKPOINT; + + region_allocator_init(kernel_data_end); + region_test1(); + region_test2(); + + kmalloc_setup(); + kmalloc_test(kernel_data_end); + + // enter multi-threading mode + // interrupts are enabled at this moment, so all + // code run from now on should be preemtible (ie thread-safe) + threading_setup(kernel_init_stage2, 0); + PANIC("Should never come here."); +} + +/* vim: set ts=4 sw=4 tw=0 noet :*/ diff --git a/src/kernel/core/kmalloc.c b/src/kernel/core/kmalloc.c new file mode 100644 index 0000000..e15572a --- /dev/null +++ b/src/kernel/core/kmalloc.c @@ -0,0 +1,52 @@ +#include <kmalloc.h> + +#include <slab_alloc.h> +#include <mutex.h> + +#include <frame.h> +#include <paging.h> +#include <region.h> + +static void* page_alloc_fun_for_kmalloc(size_t bytes) { + void* addr = region_alloc(bytes, "Core kernel heap", default_allocator_pf_handler); + return addr; +} + +static slab_type_t slab_sizes[] = { + { "8B kmalloc objects", 8, 2 }, + { "16B kmalloc objects", 16, 2 }, + { "32B kmalloc objects", 32, 2 }, + { "64B kmalloc objects", 64, 4 }, + { "128B kmalloc objects", 128, 4 }, + { "256B kmalloc objects", 256, 4 }, + { "512B kmalloc objects", 512, 8 }, + { "1KB kmalloc objects", 1024, 8 }, + { "2KB kmalloc objects", 2048, 16 }, + { "4KB kmalloc objects", 4096, 16 }, + { 0, 0, 0 } +}; + +static mem_allocator_t *kernel_allocator = 0; +STATIC_MUTEX(malloc_mutex); + +void kmalloc_setup() { + kernel_allocator = + create_slab_allocator(slab_sizes, page_alloc_fun_for_kmalloc, + region_free_unmap_free); +} + +void* malloc(size_t sz) { + void* res = 0; + + mutex_lock(&malloc_mutex); + res = slab_alloc(kernel_allocator, sz); + mutex_unlock(&malloc_mutex); + + return res; +} + +void free(void* ptr) { + mutex_lock(&malloc_mutex); + slab_free(kernel_allocator, ptr); + mutex_unlock(&malloc_mutex); +} diff --git a/src/kernel/core/loader.s b/src/kernel/core/loader.s new file mode 100644 index 0000000..447d82d --- /dev/null +++ b/src/kernel/core/loader.s @@ -0,0 +1,86 @@ +[EXTERN kmain] ; kmain is defined in kmain.c +[GLOBAL loader] ; making entry point visible to linker +[GLOBAL kernel_pd] ; make kernel page directory visible +[GLOBAL kernel_stack_protector] ; used to detect kernel stack overflow +[GLOBAL kernel_stack_top] ; stack re-used by scheduler + +; higher-half kernel setup +K_HIGHHALF_ADDR equ 0xC0000000 +K_PAGE_NUMBER equ (K_HIGHHALF_ADDR >> 22) + +; loader stack size +LOADER_STACK_SIZE equ 0x8000 ; 8Kb + +; setting up the Multiboot header - see GRUB docs for details +MODULEALIGN equ 1<<0 ; align loaded modules on page boundaries +MEMINFO equ 1<<1 ; provide memory map +FLAGS equ MODULEALIGN | MEMINFO ; this is the Multiboot 'flag' field +MAGIC equ 0x1BADB002 ; 'magic number' lets bootloader find the header +CHECKSUM equ -(MAGIC + FLAGS) ; checksum required + +[section .setup] +align 4 +multiboot_header: + dd MAGIC + dd FLAGS + dd CHECKSUM + +loader: + ; setup the boot page directory used for higher-half + mov ecx, kernel_pd + sub ecx, K_HIGHHALF_ADDR ; access its lower-half address + mov cr3, ecx + + ; Set PSE bit in CR4 to enable 4MB pages. + mov ecx, cr4 + or ecx, 0x00000010 + mov cr4, ecx + + ; Set PG bit in CR0 to enable paging. + mov ecx, cr0 + or ecx, 0x80000000 + mov cr0, ecx + + ; long jump required + lea ecx, [higherhalf] + jmp ecx + +[section .data] +align 0x1000 +kernel_pd: + ; uses 4MB pages + ; identity-maps the first 4Mb of RAM, and also maps them with offset += k_highhalf_addr + dd 0x00000083 + times (K_PAGE_NUMBER - 1) dd 0 + dd 0x00000083 + times (1024 - K_PAGE_NUMBER - 1) dd 0 + +[section .text] +higherhalf: ; now we're running in higher half + ; unmap first 4Mb + mov dword [kernel_pd], 0 + invlpg [0] + + mov esp, kernel_stack_top ; set up the stack + + push eax ; pass Multiboot magic number + add ebx, K_HIGHHALF_ADDR ; update the MB info structure so that it is in higher half + push ebx ; pass Multiboot info structure + + call kmain ; call kernel proper + +hang: + ; halt machine should kernel return + cli + hlt + jmp hang + +[section .bss] +align 0x1000 +kernel_stack_protector: + resb 0x1000 ; as soon as we have efficient paging, we WON'T map this page +kernel_stack_bottom: + resb LOADER_STACK_SIZE +kernel_stack_top: + +; vim: set ts=4 sw=4 tw=0 noet : diff --git a/src/kernel/core/paging.c b/src/kernel/core/paging.c new file mode 100644 index 0000000..e60ca53 --- /dev/null +++ b/src/kernel/core/paging.c @@ -0,0 +1,298 @@ +#include <paging.h> +#include <frame.h> +#include <idt.h> +#include <dbglog.h> +#include <region.h> +#include <mutex.h> +#include <thread.h> +#include <malloc.h> + +#define PAGE_OF_ADDR(x) (((size_t)x >> PAGE_SHIFT) % N_PAGES_IN_PT) +#define PT_OF_ADDR(x) ((size_t)x >> (PAGE_SHIFT + PT_SHIFT)) + +#define PTE_PRESENT (1<<0) +#define PTE_RW (1<<1) +#define PTE_USER (1<<2) +#define PTE_WRITE_THROUGH (1<<3) +#define PTE_DISABLE_CACHE (1<<4) +#define PTE_ACCESSED (1<<5) +#define PTE_DIRTY (1<<6) // only PTE +#define PTE_SIZE_4M (1<<7) // only PDE +#define PTE_GLOBAL (1<<8) // only PTE +#define PTE_FRAME_SHIFT 12 + +typedef struct page_table { + uint32_t page[1024]; +} pagetable_t; + +struct page_directory { + uint32_t phys_addr; // physical address of page directory + // to modify a page directory, we first map it + // then we can use mirroring to edit it + // (the last 4M of the address space are mapped to the PD itself) + + mutex_t mutex; +}; + + +// access kernel page directory page defined in loader.s +// (this is a correct higher-half address) +extern pagetable_t kernel_pd; + +// pre-allocate a page table so that we can map the first 4M of kernel memory +static pagetable_t __attribute__((aligned(PAGE_SIZE))) kernel_pt0; + +extern char kernel_stack_protector; + +static pagedir_t kernel_pd_d; + +#define current_pt ((pagetable_t*)PD_MIRROR_ADDR) +#define current_pd ((pagetable_t*)(PD_MIRROR_ADDR + (N_PAGES_IN_PT-1)*PAGE_SIZE)) + +void page_fault_handler(registers_t *regs) { + void* vaddr; + asm volatile("movl %%cr2, %0":"=r"(vaddr)); + + if ((size_t)vaddr >= K_HIGHHALF_ADDR) { + uint32_t pt = PT_OF_ADDR(vaddr); + + if (current_pd != &kernel_pd && current_pd->page[pt] != kernel_pd.page[pt]) { + current_pd->page[pt] = kernel_pd.page[pt]; + invlpg(¤t_pt[pt]); + return; + } + if (regs->eflags & EFLAGS_IF) asm volatile("sti"); // from now on we are preemptible + + if (vaddr >= (void*)&kernel_stack_protector && vaddr < (void*)&kernel_stack_protector + PAGE_SIZE) { + dbg_printf("Kernel stack overflow at 0x%p\n", vaddr); + PANIC("Kernel stack overflow."); + } + + if ((size_t)vaddr >= PD_MIRROR_ADDR) { + dbg_printf("Fault on access to mirrorred PD at 0x%p\n", vaddr); + dbg_print_region_info(); + PANIC("Unhandled kernel space page fault"); + } + + region_info_t *i = find_region(vaddr); + if (i == 0) { + dbg_printf("Kernel pagefault in non-existing region at 0x%p\n", vaddr); + dbg_dump_registers(regs); + PANIC("Unhandled kernel space page fault"); + } + if (i->pf == 0) { + dbg_printf("Kernel pagefault in region with no handler at 0x%p\n", vaddr); + dbg_dump_registers(regs); + dbg_print_region_info(); + PANIC("Unhandled kernel space page fault"); + } + i->pf(get_current_pagedir(), i, vaddr); + } else { + if (regs->eflags & EFLAGS_IF) asm volatile("sti"); // userspace PF handlers should always be preemptible + + dbg_printf("Userspace page fault at 0x%p\n", vaddr); + PANIC("Unhandled userspace page fault"); + // not handled yet + // TODO + } +} + +void paging_setup(void* kernel_data_end) { + size_t n_kernel_pages = + PAGE_ALIGN_UP((size_t)kernel_data_end - K_HIGHHALF_ADDR)/PAGE_SIZE; + + ASSERT(n_kernel_pages <= 1024); // we use less than 4M for kernel + + // setup kernel_pd_d structure + kernel_pd_d.phys_addr = (size_t)&kernel_pd - K_HIGHHALF_ADDR; + kernel_pd_d.mutex = MUTEX_UNLOCKED; + + // setup kernel_pt0 + ASSERT(PAGE_OF_ADDR(K_HIGHHALF_ADDR) == 0); // kernel is 4M-aligned + ASSERT(FIRST_KERNEL_PT == 768); + for (size_t i = 0; i < n_kernel_pages; i++) { + if ((i * PAGE_SIZE) + K_HIGHHALF_ADDR == (size_t)&kernel_stack_protector) { + kernel_pt0.page[i] = 0; // don't map kernel stack protector page + frame_free(i, 1); + } else { + kernel_pt0.page[i] = (i << PTE_FRAME_SHIFT) | PTE_PRESENT | PTE_RW | PTE_GLOBAL; + } + } + for (size_t i = n_kernel_pages; i < 1024; i++){ + kernel_pt0.page[i] = 0; + } + + // replace 4M mapping by kernel_pt0 + kernel_pd.page[FIRST_KERNEL_PT] = + (((size_t)&kernel_pt0 - K_HIGHHALF_ADDR) & PAGE_MASK) | PTE_PRESENT | PTE_RW; + // set up mirroring + kernel_pd.page[N_PAGES_IN_PT-1] = + (((size_t)&kernel_pd - K_HIGHHALF_ADDR) & PAGE_MASK) | PTE_PRESENT | PTE_RW; + + invlpg((void*)K_HIGHHALF_ADDR); + + // paging already enabled in loader, nothing to do. + + // disable 4M pages (remove PSE bit in CR4) + uint32_t cr4; + asm volatile("movl %%cr4, %0": "=r"(cr4)); + cr4 &= ~0x00000010; + asm volatile("movl %0, %%cr4":: "r"(cr4)); + + idt_set_ex_handler(EX_PAGE_FAULT, page_fault_handler); +} + +pagedir_t *get_current_pagedir() { + if (current_thread == 0) return &kernel_pd_d; + return current_thread->current_pd_d; +} + +pagedir_t *get_kernel_pagedir() { + return &kernel_pd_d; +} + +void switch_pagedir(pagedir_t *pd) { + asm volatile("movl %0, %%cr3":: "r"(pd->phys_addr)); + if (current_thread != 0) current_thread->current_pd_d = pd; +} + +// ============================== // +// Mapping and unmapping of pages // +// ============================== // + +uint32_t pd_get_frame(void* vaddr) { + uint32_t pt = PT_OF_ADDR(vaddr); + uint32_t page = PAGE_OF_ADDR(vaddr); + + pagetable_t *pd = ((size_t)vaddr >= K_HIGHHALF_ADDR ? &kernel_pd : current_pd); + + if (!pd->page[pt] & PTE_PRESENT) return 0; + if (!current_pt[pt].page[page] & PTE_PRESENT) return 0; + return current_pt[pt].page[page] >> PTE_FRAME_SHIFT; +} + +int pd_map_page(void* vaddr, uint32_t frame_id, bool rw) { + uint32_t pt = PT_OF_ADDR(vaddr); + uint32_t page = PAGE_OF_ADDR(vaddr); + + ASSERT((size_t)vaddr < PD_MIRROR_ADDR); + + pagedir_t *pdd = ((size_t)vaddr >= K_HIGHHALF_ADDR || current_thread == 0 + ? &kernel_pd_d : current_thread->current_pd_d); + pagetable_t *pd = ((size_t)vaddr >= K_HIGHHALF_ADDR ? &kernel_pd : current_pd); + mutex_lock(&pdd->mutex); + + if (!pd->page[pt] & PTE_PRESENT) { + uint32_t new_pt_frame = frame_alloc(1); + if (new_pt_frame == 0) { + mutex_unlock(&pdd->mutex); + return 1; // OOM + } + + current_pd->page[pt] = pd->page[pt] = + (new_pt_frame << PTE_FRAME_SHIFT) | PTE_PRESENT | PTE_RW; + invlpg(¤t_pt[pt]); + } + current_pt[pt].page[page] = + (frame_id << PTE_FRAME_SHIFT) + | PTE_PRESENT + | ((size_t)vaddr < K_HIGHHALF_ADDR ? PTE_USER : PTE_GLOBAL) + | (rw ? PTE_RW : 0); + invlpg(vaddr); + + mutex_unlock(&pdd->mutex); + return 0; +} + +void pd_unmap_page(void* vaddr) { + uint32_t pt = PT_OF_ADDR(vaddr); + uint32_t page = PAGE_OF_ADDR(vaddr); + + pagetable_t *pd = ((size_t)vaddr >= K_HIGHHALF_ADDR ? &kernel_pd : current_pd); + // no need to lock the PD's mutex + + if (!pd->page[pt] & PTE_PRESENT) return; + if (!current_pt[pt].page[page] & PTE_PRESENT) return; + + current_pt[pt].page[page] = 0; + invlpg(vaddr); + + // If the page table is completely empty we might want to free + // it, but we would actually lose a lot of time checking if + // the PT is really empty (since we don't store the + // number of used pages in each PT), so it's probably not worth it +} + +// Creation and deletion of page directories + +pagedir_t *create_pagedir() { + uint32_t pd_phys = 0; + pagedir_t *pd = 0; + void* temp = 0; + + pd_phys = frame_alloc(1); + if (pd_phys == 0) goto error; + + pd = (pagedir_t*)malloc(sizeof(pagedir_t)); + if (pd == 0) goto error; + + temp = region_alloc(PAGE_SIZE, 0, 0); + if (temp == 0) goto error; + + int error = pd_map_page(temp, pd_phys, true); + if (error) goto error; + + pd->phys_addr = pd_phys * PAGE_SIZE; + pd->mutex = MUTEX_UNLOCKED; + + // initialize PD with zeroes + pagetable_t *pt = (pagetable_t*)temp; + for (size_t i = 0; i < N_PAGES_IN_PT; i++) { + pt->page[i] = 0; + } + // use kernel page tables + for(size_t i = FIRST_KERNEL_PT; i < N_PAGES_IN_PT-1; i++) { + pt->page[i] = kernel_pd.page[i]; + } + // set up mirroring + pt->page[N_PAGES_IN_PT-1] = pd->phys_addr | PTE_PRESENT | PTE_RW; + + region_free_unmap(temp); + + return pd; + + error: + if (pd_phys != 0) frame_free(pd_phys, 1); + if (pd != 0) free(pd); + if (temp != 0) region_free(temp); + return 0; +} + +void delete_pagedir(pagedir_t *pd) { + pagedir_t *restore_pd = get_current_pagedir(); + if (restore_pd == pd) restore_pd = &kernel_pd_d; + + // make a copy of page directory on the stack + switch_pagedir(pd); + pagetable_t backup; + for (size_t i = 0; i < N_PAGES_IN_PT; i++) { + backup.page[i] = current_pd->page[i]; + } + switch_pagedir(restore_pd); + + // free the page tables + for (size_t i = 0; i < FIRST_KERNEL_PT; i++) { + if (backup.page[i] & PTE_PRESENT) + frame_free(backup.page[i] >> PTE_FRAME_SHIFT, 1); + } + // free the page directory page + uint32_t pd_phys = pd->phys_addr / PAGE_SIZE; + ASSERT(pd_phys == (backup.page[N_PAGES_IN_PT-1] >> PTE_FRAME_SHIFT)); + frame_free(pd_phys, 1); + // free the pagedir_t structure + free(pd); + + return; +} + +/* vim: set ts=4 sw=4 tw=0 noet :*/ diff --git a/src/kernel/core/region.c b/src/kernel/core/region.c new file mode 100644 index 0000000..3127048 --- /dev/null +++ b/src/kernel/core/region.c @@ -0,0 +1,397 @@ +#include <region.h> +#include <dbglog.h> +#include <frame.h> +#include <mutex.h> + +typedef union region_descriptor { + struct { + union region_descriptor *next; + } unused_descriptor; + struct { + void* addr; + size_t size; + union region_descriptor *next_by_size, *first_bigger; + union region_descriptor *next_by_addr; + } free; + struct { + region_info_t i; + union region_descriptor *next_by_addr; + } used; +} descriptor_t; + +#define N_RESERVE_DESCRIPTORS 2 // always keep at least 2 unused descriptors + +#define N_BASE_DESCRIPTORS 12 // pre-allocate memory for 12 descriptors +static descriptor_t base_descriptors[N_BASE_DESCRIPTORS]; + +static descriptor_t *first_unused_descriptor; +uint32_t n_unused_descriptors; +static descriptor_t *first_free_region_by_addr, *first_free_region_by_size; +static descriptor_t *first_used_region; + +STATIC_MUTEX(ra_mutex); // region allocator mutex + +// ========================================================= // +// HELPER FUNCTIONS FOR THE MANIPULATION OF THE REGION LISTS // +// ========================================================= // + +static void add_unused_descriptor(descriptor_t *d) { + n_unused_descriptors++; + d->unused_descriptor.next = first_unused_descriptor; + first_unused_descriptor = d; +} + +static descriptor_t *get_unused_descriptor() { + descriptor_t *r = first_unused_descriptor; + if (r != 0) { + first_unused_descriptor = r->unused_descriptor.next; + n_unused_descriptors--; + } + return r; +} + +static void remove_free_region(descriptor_t *d) { + if (first_free_region_by_size == d) { + first_free_region_by_size = d->free.next_by_size; + } else { + for (descriptor_t *i = first_free_region_by_size; i != 0; i = i->free.next_by_size) { + if (i->free.next_by_size == d) { + i->free.next_by_size = d->free.next_by_size; + break; + } + } + } + if (first_free_region_by_addr == d) { + first_free_region_by_addr = d->free.next_by_addr; + } else { + for (descriptor_t *i = first_free_region_by_addr; i != 0; i = i->free.next_by_addr) { + if (i->free.next_by_addr == d) { + i->free.next_by_addr = d->free.next_by_addr; + break; + } + } + } +} + +static void add_free_region(descriptor_t *d) { + /*dbg_printf("Add free region 0x%p - 0x%p\n", d->free.addr, d->free.size + d->free.addr);*/ + // Find position of region in address-ordered list + // Possibly concatenate free region + descriptor_t *i = first_free_region_by_addr; + if (i == 0) { + ASSERT(first_free_region_by_size == 0); + first_free_region_by_addr = first_free_region_by_size = d; + d->free.next_by_size = d->free.first_bigger = d->free.next_by_addr = 0; + return; + } else if (d->free.addr + d->free.size == i->free.addr) { + // concatenate d . i + remove_free_region(i); + d->free.size += i->free.size; + add_unused_descriptor(i); + add_free_region(d); + return; + } else if (i->free.addr > d->free.addr) { + // insert before i + d->free.next_by_addr = i; + first_free_region_by_addr = d; + } else { + while (i != 0) { + ASSERT(d->free.addr > i->free.addr); + if (i->free.addr + i->free.size == d->free.addr) { + // concatenate i . d + remove_free_region(i); + i->free.size += d->free.size; + add_unused_descriptor(d); + add_free_region(i); + return; + } else if (i->free.next_by_addr == 0 || i->free.next_by_addr->free.addr > d->free.addr) { + d->free.next_by_addr = i->free.next_by_addr; + i->free.next_by_addr = d; + break; + } else if (d->free.addr + d->free.size == i->free.next_by_addr->free.addr) { + // concatenate d . i->next_by_addr + descriptor_t *j = i->free.next_by_addr; + remove_free_region(j); + d->free.size += j->free.size; + add_unused_descriptor(j); + add_free_region(d); + return; + } else { + // continue + i = i->free.next_by_addr; + } + } + } + // Now add it in size-ordered list + i = first_free_region_by_size; + ASSERT(i != 0); + if (d->free.size <= i->free.size) { + d->free.next_by_size = i; + d->free.first_bigger = (i->free.size > d->free.size ? i : i->free.first_bigger); + first_free_region_by_size = d; + } else { + while (i != 0) { + ASSERT(d->free.size > i->free.size); + if (i->free.next_by_size == 0) { + d->free.next_by_size = 0; + d->free.first_bigger = 0; + i->free.next_by_size = d; + if (d->free.size > i->free.size) i->free.first_bigger = d; + break; + } else if (i->free.next_by_size->free.size >= d->free.size) { + d->free.next_by_size = i->free.next_by_size; + d->free.first_bigger = + (i->free.next_by_size->free.size > d->free.size + ? i->free.next_by_size + : i->free.next_by_size->free.first_bigger); + i->free.next_by_size = d; + if (d->free.size > i->free.size) i->free.first_bigger = d; + break; + } else { + // continue + i = i->free.next_by_size; + } + } + } +} + +static descriptor_t *find_used_region(void* addr) { + for (descriptor_t *i = first_used_region; i != 0; i = i->used.next_by_addr) { + if (addr >= i->used.i.addr && addr < i->used.i.addr + i->used.i.size) return i; + if (i->used.i.addr > addr) break; + } + return 0; +} + +static void add_used_region(descriptor_t *d) { + descriptor_t *i = first_used_region; + ASSERT(i->used.i.addr < d->used.i.addr); // first region by address is never free + + while (i != 0) { + ASSERT(i->used.i.addr < d->used.i.addr); + if (i->used.next_by_addr == 0 || i->used.next_by_addr->used.i.addr > d->used.i.addr) { + d->used.next_by_addr = i->used.next_by_addr; + i->used.next_by_addr = d; + return; + } else { + i = i->used.next_by_addr; + } + } + ASSERT(false); +} + +static void remove_used_region(descriptor_t *d) { + if (first_used_region == d) { + first_used_region = d->used.next_by_addr; + } else { + for (descriptor_t *i = first_used_region; i != 0; i = i->used.next_by_addr) { + if (i->used.i.addr > d->used.i.addr) break; + if (i->used.next_by_addr == d) { + i->used.next_by_addr = d->used.next_by_addr; + break; + } + } + } +} + +// =============== // +// THE ACTUAL CODE // +// =============== // + +void region_allocator_init(void* kernel_data_end) { + n_unused_descriptors = 0; + first_unused_descriptor = 0; + for (int i = 0; i < N_BASE_DESCRIPTORS; i++) { + add_unused_descriptor(&base_descriptors[i]); + } + + descriptor_t *f0 = get_unused_descriptor(); + f0->free.addr = (void*)PAGE_ALIGN_UP(kernel_data_end); + f0->free.size = ((void*)LAST_KERNEL_ADDR - f0->free.addr); + f0->free.next_by_size = 0; + f0->free.first_bigger = 0; + first_free_region_by_size = first_free_region_by_addr = f0; + + descriptor_t *u0 = get_unused_descriptor(); + u0->used.i.addr = (void*)K_HIGHHALF_ADDR; + u0->used.i.size = PAGE_ALIGN_UP(kernel_data_end) - K_HIGHHALF_ADDR; + u0->used.i.type = "Kernel code & data"; + u0->used.i.pf = 0; + u0->used.next_by_addr = 0; + first_used_region = u0; +} + +static void region_free_inner(void* addr) { + descriptor_t *d = find_used_region(addr); + if (d == 0) return; + + region_info_t i = d->used.i; + + remove_used_region(d); + d->free.addr = i.addr; + d->free.size = i.size; + add_free_region(d); +} +void region_free(void* addr) { + mutex_lock(&ra_mutex); + region_free_inner(addr); + mutex_unlock(&ra_mutex); +} + +static void* region_alloc_inner(size_t size, char* type, page_fault_handler_t pf, bool use_reserve) { + size = PAGE_ALIGN_UP(size); + + for (descriptor_t *i = first_free_region_by_size; i != 0; i = i->free.first_bigger) { + if (i->free.size >= size) { + // region i is the one we want to allocate in + descriptor_t *x = 0; + if (i->free.size > size) { + if (n_unused_descriptors <= N_RESERVE_DESCRIPTORS && !use_reserve) { + return 0; + } + + // this assert basically means that the allocation function + // is called less than N_RESERVE_DESCRIPTORS times with + // the use_reserve flag before more descriptors + // are allocated. + x = get_unused_descriptor(); + ASSERT(x != 0); + + x->free.size = i->free.size - size; + if (size >= 0x4000) { + x->free.addr = i->free.addr + size; + } else { + x->free.addr = i->free.addr; + i->free.addr += x->free.size; + } + } + // do the allocation + remove_free_region(i); + if (x != 0) add_free_region(x); + + void* addr = i->free.addr; + i->used.i.addr = addr; + i->used.i.size = size; + i->used.i.type = type; + i->used.i.pf = pf; + add_used_region(i); + + return addr; + } + } + return 0; //No big enough block found +} + +void* region_alloc(size_t size, char* type, page_fault_handler_t pf) { + void* result = 0; + mutex_lock(&ra_mutex); + + if (n_unused_descriptors <= N_RESERVE_DESCRIPTORS) { + uint32_t frame = frame_alloc(1); + if (frame == 0) goto try_anyway; + + void* descriptor_region = region_alloc_inner(PAGE_SIZE, "Region descriptors", 0, true); + ASSERT(descriptor_region != 0); + + int error = pd_map_page(descriptor_region, frame, 1); + if (error) { + // this can happen if we weren't able to allocate a frame for + // a new pagetable + frame_free(frame, 1); + region_free_inner(descriptor_region); + goto try_anyway; + } + + for (descriptor_t *d = (descriptor_t*)descriptor_region; + (void*)(d+1) <= (descriptor_region + PAGE_SIZE); + d++) { + add_unused_descriptor(d); + } + } + try_anyway: + // even if we don't have enough unused descriptors, we might find + // a free region that has exactly the right size and therefore + // does not require splitting, so we try the allocation in all cases + result = region_alloc_inner(size, type, pf, false); + + mutex_unlock(&ra_mutex); + return result; +} + +region_info_t *find_region(void* addr) { + region_info_t *r = 0; + mutex_lock(&ra_mutex); + + descriptor_t *d = find_used_region(addr); + if (d != 0) r = &d->used.i; + + mutex_unlock(&ra_mutex); + return r; +} + +// ========================================================= // +// HELPER FUNCTIONS : SIMPLE PF HANDLERS ; FREEING FUNCTIONS // +// ========================================================= // + +void default_allocator_pf_handler(pagedir_t *pd, struct region_info *r, void* addr) { + ASSERT(pd_get_frame(addr) == 0); // if error is of another type (RO, protected), we don't do anyting + + uint32_t f = frame_alloc(1); + if (f == 0) PANIC("Out Of Memory"); + + int error = pd_map_page(addr, f, 1); + if (error) PANIC("Could not map frame (OOM)"); +} + +void region_free_unmap_free(void* ptr) { + region_info_t *i = find_region(ptr); + ASSERT(i != 0); + + for (void* x = i->addr; x < i->addr + i->size; x += PAGE_SIZE) { + uint32_t f = pd_get_frame(x); + if (f != 0) { + pd_unmap_page(x); + frame_free(f, 1); + } + } + region_free(ptr); +} + +void region_free_unmap(void* ptr) { + region_info_t *i = find_region(ptr); + ASSERT(i != 0); + + for (void* x = i->addr; x < i->addr + i->size; x += PAGE_SIZE) { + pd_unmap_page(x); + } + region_free(ptr); +} + +// =========================== // +// DEBUG LOG PRINTING FUNCTION // +// =========================== // + +void dbg_print_region_info() { + mutex_lock(&ra_mutex); + + dbg_printf("/ Free kernel regions, by address:\n"); + for (descriptor_t *d = first_free_region_by_addr; d != 0; d = d->free.next_by_addr) { + dbg_printf("| 0x%p - 0x%p\n", d->free.addr, d->free.addr + d->free.size); + ASSERT(d != d->free.next_by_addr); + } + dbg_printf("- Free kernel regions, by size:\n"); + for (descriptor_t *d = first_free_region_by_size; d != 0; d = d->free.next_by_size) { + dbg_printf("| 0x%p - 0x%p\n", d->free.addr, d->free.addr + d->free.size); + ASSERT(d != d->free.next_by_size); + } + dbg_printf("- Used kernel regions:\n"); + for (descriptor_t *d = first_used_region; d != 0; d = d->used.next_by_addr) { + dbg_printf("| 0x%p - 0x%p %s\n", d->used.i.addr, d->used.i.addr + d->used.i.size, d->used.i.type); + ASSERT(d != d->used.next_by_addr); + } + dbg_printf("\\\n"); + + mutex_unlock(&ra_mutex); +} + +/* vim: set ts=4 sw=4 tw=0 noet :*/ diff --git a/src/kernel/core/sys.c b/src/kernel/core/sys.c new file mode 100644 index 0000000..2b77463 --- /dev/null +++ b/src/kernel/core/sys.c @@ -0,0 +1,25 @@ +#include <sys.h> +#include <dbglog.h> + + +// Kernel panic and kernel assert failure + +static void panic_do(const char* type, const char *msg, const char* file, int line) { + asm volatile("cli;"); + dbg_printf("/\n| %s:\t%s\n", type, msg); + dbg_printf("| File: \t%s:%i\n", file, line); + dbg_printf("| System halted -_-'\n"); + dbg_printf("\\---------------------------------------------------------/"); + BOCHS_BREAKPOINT; + asm volatile("hlt"); +} + +void panic(const char* message, const char* file, int line) { + panic_do("PANIC", message, file, line); +} + +void panic_assert(const char* assertion, const char* file, int line) { + panic_do("ASSERT FAILED", assertion, file, line); +} + +/* vim: set ts=4 sw=4 tw=0 noet :*/ diff --git a/src/kernel/core/thread.c b/src/kernel/core/thread.c new file mode 100644 index 0000000..7f0bb5b --- /dev/null +++ b/src/kernel/core/thread.c @@ -0,0 +1,208 @@ +#include <thread.h> +#include <malloc.h> +#include <dbglog.h> +#include <idt.h> + +#include <frame.h> +#include <paging.h> + +void save_context_and_enter_scheduler(saved_context_t *ctx); +void irq0_save_context_and_enter_scheduler(saved_context_t *ctx); +void resume_context(saved_context_t *ctx); + +thread_t *current_thread = 0; + +// ====================== // +// THE PROGRAMMABLE TIMER // +// ====================== // + +void set_pit_frequency(uint32_t freq) { + uint32_t divisor = 1193180 / freq; + ASSERT(divisor < 65536); // must fit on 16 bits + + uint8_t l = (divisor & 0xFF); + uint8_t h = ((divisor >> 8) & 0xFF); + + outb(0x43, 0x36); + outb(0x40, l); + outb(0x40, h); +} + +// ============================= // +// HELPER : IF FLAG MANIPULATION // +// ============================= // + +static inline bool disable_interrupts() { + uint32_t eflags; + asm volatile("pushf; pop %0" : "=r"(eflags)); + asm volatile("cli"); + return (eflags & EFLAGS_IF) != 0; +} + +static inline void resume_interrupts(bool st) { + if (st) asm volatile("sti"); +} + +// ================== // +// THE TASK SCHEDULER // +// ================== // + +static thread_t *queue_first_thread = 0, *queue_last_thread = 0; + +void enqueue_thread(thread_t *t, bool just_ran) { + ASSERT(t->state == T_STATE_RUNNING); + if (queue_first_thread == 0) { + queue_first_thread = queue_last_thread = t; + t->next_in_queue = 0; + } else if (just_ran) { + t->next_in_queue = 0; + queue_last_thread->next_in_queue = t; + queue_last_thread = t; + } else { + t->next_in_queue = queue_first_thread; + queue_first_thread = t; + } +} + +thread_t* dequeue_thread() { + thread_t *t = queue_first_thread; + if (t == 0) return 0; + + queue_first_thread = t->next_in_queue; + if (queue_first_thread == 0) queue_last_thread = 0; + + return t; +} + +// ================ // +// THE TASKING CODE // +// ================ // + +void run_scheduler() { + // At this point, interrupts are disabled + // This function is expected NEVER TO RETURN + + if (current_thread != 0 && current_thread->state == T_STATE_RUNNING) { + enqueue_thread(current_thread, true); + } + + current_thread = dequeue_thread(); + if (current_thread != 0) { + resume_context(¤t_thread->ctx); + } else { + // Wait for an IRQ + asm volatile("sti; hlt"); + // At this point an IRQ has happenned + // and has been processed. Loop around. + run_scheduler(); + ASSERT(false); + } +} + +static void run_thread(void (*entry)(void*), void* data) { + ASSERT(current_thread->state == T_STATE_RUNNING); + + switch_pagedir(get_kernel_pagedir()); + + asm volatile("sti"); + entry(data); + + current_thread->state = T_STATE_FINISHED; + // TODO : add job for deleting the thread, or whatever + yield(); // expected never to return! + ASSERT(false); +} +thread_t *new_thread(entry_t entry, void* data) { + thread_t *t = (thread_t*)malloc(sizeof(thread_t)); + if (t == 0) return 0; + + void* stack = region_alloc(KPROC_STACK_SIZE, "Stack", 0); + if (stack == 0) { + free(t); + return 0; + } + + for (void* i = stack + PAGE_SIZE; i < stack + KPROC_STACK_SIZE; i += PAGE_SIZE) { + uint32_t f = frame_alloc(1); + if (f == 0) { + region_free_unmap_free(stack); + free(t); + return 0; + } + pd_map_page(i, f, true); + } + + t->stack_region = find_region(stack); + + t->ctx.esp = (uint32_t*)(t->stack_region->addr + t->stack_region->size); + *(--t->ctx.esp) = (uint32_t)data; // push second argument : data + *(--t->ctx.esp) = (uint32_t)entry; // push first argument : entry point + *(--t->ctx.esp) = 0; // push invalid return address (the run_thread function never returns) + + t->ctx.eip = (void(*)())run_thread; + t->state = T_STATE_PAUSED; + + t->current_pd_d = get_kernel_pagedir(); + + t->proc = 0; // used by L1 functions + + return t; +} + +// ========== // +// SETUP CODE // +// ========== // + +static void irq0_handler(registers_t *regs) { + if (current_thread != 0) + irq0_save_context_and_enter_scheduler(¤t_thread->ctx); +} +void threading_setup(entry_t cont, void* arg) { + set_pit_frequency(TASK_SWITCH_FREQUENCY); + idt_set_irq_handler(IRQ0, irq0_handler); + + thread_t *t = new_thread(cont, arg); + ASSERT(t != 0); + + resume_thread(t, false); + + run_scheduler(); // never returns + ASSERT(false); +} + +// ======================= // +// TASK STATE MANIPULATION // +// ======================= // + +void yield() { + if (current_thread == 0) { + // might happen before threading is initialized + // (but should not...) + dbg_printf("Warning: probable deadlock.\n"); + } else { + save_context_and_enter_scheduler(¤t_thread->ctx); + } +} + +void pause() { + bool st = disable_interrupts(); + + current_thread->state = T_STATE_PAUSED; + save_context_and_enter_scheduler(¤t_thread->ctx); + + resume_interrupts(st); +} + +void resume_thread(thread_t *thread, bool run_at_once) { + bool st = disable_interrupts(); + + if (thread->state == T_STATE_PAUSED) { + thread->state = T_STATE_RUNNING; + enqueue_thread(thread, false); + } + if (run_at_once) yield(); + + resume_interrupts(st); +} + +/* vim: set ts=4 sw=4 tw=0 noet :*/ |