diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index 6f3bc6f84..5a8642d1b 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -24,7 +24,14 @@ int __cdecl main(int argc, char **argv) {
 
     System::Init(emu_window);
 
-    std::string boot_filename = "homebrew.elf";
+    std::string boot_filename;
+
+    if (argc < 2) {
+        ERROR_LOG(BOOT, "Failed to load ROM: No ROM specified");
+    }
+    else {
+        boot_filename = argv[1];
+    }
     std::string error_str;
 
     bool res = Loader::LoadFile(boot_filename, &error_str);
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index 76e0c68c3..9be982909 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -142,7 +142,7 @@ void GMainWindow::BootGame(const char* filename)
 
 void GMainWindow::OnMenuLoadFile()
 {
-    QString filename = QFileDialog::getOpenFileName(this, tr("Load file"), QString(), tr("3DS homebrew (*.elf *.dat *.bin)"));
+    QString filename = QFileDialog::getOpenFileName(this, tr("Load file"), QString(), tr("3DS homebrew (*.elf *.axf *.dat *.bin)"));
     if (filename.size())
        BootGame(filename.toLatin1().data());
 }
diff --git a/src/common/common.vcxproj b/src/common/common.vcxproj
index 5dc6ff790..86295a480 100644
--- a/src/common/common.vcxproj
+++ b/src/common/common.vcxproj
@@ -190,6 +190,7 @@
     <ClInclude Include="swap.h" />
     <ClInclude Include="symbols.h" />
     <ClInclude Include="thread.h" />
+    <ClInclude Include="thread_queue_list.h" />
     <ClInclude Include="thunk.h" />
     <ClInclude Include="timer.h" />
     <ClInclude Include="utf8.h" />
diff --git a/src/common/common.vcxproj.filters b/src/common/common.vcxproj.filters
index 268730228..84cfa8837 100644
--- a/src/common/common.vcxproj.filters
+++ b/src/common/common.vcxproj.filters
@@ -40,6 +40,7 @@
     <ClInclude Include="symbols.h" />
     <ClInclude Include="scm_rev.h" />
     <ClInclude Include="bit_field.h" />
+    <ClInclude Include="thread_queue_list.h" />
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="break_points.cpp" />
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index f8d10eb3e..dca4dc47f 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -22,6 +22,11 @@ template<> struct CompileTimeAssert<true> {};
 #define b32(x)  (b16(x) | (b16(x) >>16) )
 #define ROUND_UP_POW2(x)    (b32(x - 1) + 1)
 
+#define MIN(a, b)   ((a)<(b)?(a):(b))
+#define MAX(a, b)   ((a)>(b)?(a):(b))
+
+#define CLAMP(x, min, max)  (((x) > max) ? max : (((x) < min) ? min : (x)))
+
 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
 
 #ifndef _WIN32
diff --git a/src/common/log.h b/src/common/log.h
index d95f51f56..8b39b03a1 100644
--- a/src/common/log.h
+++ b/src/common/log.h
@@ -5,6 +5,8 @@
 #ifndef _LOG_H_
 #define _LOG_H_
 
+#define LOGGING
+
 #define    NOTICE_LEVEL  1  // VERY important information that is NOT errors. Like startup and OSReports.
 #define    ERROR_LEVEL   2  // Critical errors 
 #define    WARNING_LEVEL 3  // Something is suspicious.
@@ -53,7 +55,7 @@ enum LOG_TYPE {
     WII_IPC_ES,
     WII_IPC_FILEIO,
     WII_IPC_HID,
-    WII_IPC_HLE,
+    KERNEL,
     SVC,
     NDMA,
     HLE,
diff --git a/src/common/log_manager.cpp b/src/common/log_manager.cpp
index 80fd473b9..146472888 100644
--- a/src/common/log_manager.cpp
+++ b/src/common/log_manager.cpp
@@ -60,13 +60,13 @@ LogManager::LogManager()
     m_Log[LogTypes::LOADER]             = new LogContainer("Loader",            "Loader");
     m_Log[LogTypes::FILESYS]            = new LogContainer("FileSys",           "File System");
     m_Log[LogTypes::WII_IPC_HID]        = new LogContainer("WII_IPC_HID",       "WII IPC HID");
-    m_Log[LogTypes::WII_IPC_HLE]        = new LogContainer("WII_IPC_HLE",       "WII IPC HLE");
+    m_Log[LogTypes::KERNEL]             = new LogContainer("KERNEL",            "KERNEL HLE");
     m_Log[LogTypes::WII_IPC_DVD]        = new LogContainer("WII_IPC_DVD",       "WII IPC DVD");
     m_Log[LogTypes::WII_IPC_ES]         = new LogContainer("WII_IPC_ES",        "WII IPC ES");
     m_Log[LogTypes::WII_IPC_FILEIO]     = new LogContainer("WII_IPC_FILEIO",    "WII IPC FILEIO");
     m_Log[LogTypes::RENDER]             = new LogContainer("RENDER",            "RENDER");
     m_Log[LogTypes::LCD]                = new LogContainer("LCD",               "LCD");
-    m_Log[LogTypes::SVC]                = new LogContainer("SVC",               "Supervisor Call");
+    m_Log[LogTypes::SVC]                = new LogContainer("SVC",               "Supervisor Call HLE");
     m_Log[LogTypes::NDMA]               = new LogContainer("NDMA",              "NDMA");
     m_Log[LogTypes::HLE]                = new LogContainer("HLE",               "High Level Emulation");
     m_Log[LogTypes::HW]                 = new LogContainer("HW",                "Hardware");
diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h
new file mode 100644
index 000000000..4a89572f6
--- /dev/null
+++ b/src/common/thread_queue_list.h
@@ -0,0 +1,216 @@
+// Copyright 2014 Citra Emulator Project / PPSSPP Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#pragma once
+
+#include "common/common.h"
+
+namespace Common {
+
+template<class IdType>
+struct ThreadQueueList {
+    // Number of queues (number of priority levels starting at 0.)
+    static const int NUM_QUEUES = 128;
+    
+    // Initial number of threads a single queue can handle.
+    static const int INITIAL_CAPACITY = 32;
+
+    struct Queue {
+        // Next ever-been-used queue (worse priority.)
+        Queue *next;
+        // First valid item in data.
+        int first;
+        // One after last valid item in data.
+        int end;
+        // A too-large array with room on the front and end.
+        IdType *data;
+        // Size of data array.
+        int capacity;
+    };
+
+    ThreadQueueList() {
+        memset(queues, 0, sizeof(queues));
+        first = invalid();
+    }
+
+    ~ThreadQueueList() {
+        for (int i = 0; i < NUM_QUEUES; ++i)
+        {
+            if (queues[i].data != NULL)
+                free(queues[i].data);
+        }
+    }
+
+    // Only for debugging, returns priority level.
+    int contains(const IdType uid) {
+        for (int i = 0; i < NUM_QUEUES; ++i)
+        {
+            if (queues[i].data == NULL)
+                continue;
+
+            Queue *cur = &queues[i];
+            for (int j = cur->first; j < cur->end; ++j)
+            {
+                if (cur->data[j] == uid)
+                    return i;
+            }
+        }
+
+        return -1;
+    }
+
+    inline IdType pop_first() {
+        Queue *cur = first;
+        while (cur != invalid())
+        {
+            if (cur->end - cur->first > 0)
+                return cur->data[cur->first++];
+            cur = cur->next;
+        }
+
+        //_dbg_assert_msg_(SCEKERNEL, false, "ThreadQueueList should not be empty.");
+        return 0;
+    }
+
+    inline IdType pop_first_better(u32 priority) {
+        Queue *cur = first;
+        Queue *stop = &queues[priority];
+        while (cur < stop)
+        {
+            if (cur->end - cur->first > 0)
+                return cur->data[cur->first++];
+            cur = cur->next;
+        }
+
+        return 0;
+    }
+
+    inline void push_front(u32 priority, const IdType threadID) {
+        Queue *cur = &queues[priority];
+        cur->data[--cur->first] = threadID;
+        if (cur->first == 0)
+            rebalance(priority);
+    }
+
+    inline void push_back(u32 priority, const IdType threadID) {
+        Queue *cur = &queues[priority];
+        cur->data[cur->end++] = threadID;
+        if (cur->end == cur->capacity)
+            rebalance(priority);
+    }
+
+    inline void remove(u32 priority, const IdType threadID) {
+        Queue *cur = &queues[priority];
+        //_dbg_assert_msg_(SCEKERNEL, cur->next != NULL, "ThreadQueueList::Queue should already be linked up.");
+
+        for (int i = cur->first; i < cur->end; ++i)
+        {
+            if (cur->data[i] == threadID)
+            {
+                int remaining = --cur->end - i;
+                if (remaining > 0)
+                    memmove(&cur->data[i], &cur->data[i + 1], remaining * sizeof(IdType));
+                return;
+            }
+        }
+
+        // Wasn't there.
+    }
+
+    inline void rotate(u32 priority) {
+        Queue *cur = &queues[priority];
+        //_dbg_assert_msg_(SCEKERNEL, cur->next != NULL, "ThreadQueueList::Queue should already be linked up.");
+
+        if (cur->end - cur->first > 1)
+        {
+            cur->data[cur->end++] = cur->data[cur->first++];
+            if (cur->end == cur->capacity)
+                rebalance(priority);
+        }
+    }
+
+    inline void clear() {
+        for (int i = 0; i < NUM_QUEUES; ++i)
+        {
+            if (queues[i].data != NULL)
+                free(queues[i].data);
+        }
+        memset(queues, 0, sizeof(queues));
+        first = invalid();
+    }
+
+    inline bool empty(u32 priority) const {
+        const Queue *cur = &queues[priority];
+        return cur->first == cur->end;
+    }
+
+    inline void prepare(u32 priority) {
+        Queue *cur = &queues[priority];
+        if (cur->next == NULL)
+            link(priority, INITIAL_CAPACITY);
+    }
+
+private:
+    Queue *invalid() const {
+        return (Queue *) -1;
+    }
+
+    void link(u32 priority, int size) {
+        //_dbg_assert_msg_(SCEKERNEL, queues[priority].data == NULL, "ThreadQueueList::Queue should only be initialized once.");
+
+        if (size <= INITIAL_CAPACITY)
+            size = INITIAL_CAPACITY;
+        else
+        {
+            int goal = size;
+            size = INITIAL_CAPACITY;
+            while (size < goal)
+                size *= 2;
+        }
+        Queue *cur = &queues[priority];
+        cur->data = (IdType *) malloc(sizeof(IdType) * size);
+        cur->capacity = size;
+        cur->first = size / 2;
+        cur->end = size / 2;
+
+        for (int i = (int) priority - 1; i >= 0; --i)
+        {
+            if (queues[i].next != NULL)
+            {
+                cur->next = queues[i].next;
+                queues[i].next = cur;
+                return;
+            }
+        }
+
+        cur->next = first;
+        first = cur;
+    }
+
+    void rebalance(u32 priority) {
+        Queue *cur = &queues[priority];
+        int size = cur->end - cur->first;
+        if (size >= cur->capacity - 2)  {
+            IdType *new_data = (IdType *)realloc(cur->data, cur->capacity * 2 * sizeof(IdType));
+            if (new_data != NULL)  {
+                cur->capacity *= 2;
+                cur->data = new_data;
+            }
+        }
+
+        int newFirst = (cur->capacity - size) / 2;
+        if (newFirst != cur->first) {
+            memmove(&cur->data[newFirst], &cur->data[cur->first], size * sizeof(IdType));
+            cur->first = newFirst;
+            cur->end = newFirst + size;
+        }
+    }
+
+    // The first queue that's ever been used.
+    Queue *first;
+    // The priority level queues of thread ids.
+    Queue queues[NUM_QUEUES];
+};
+
+} // namespace
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 14c598bf3..4086b415b 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -33,7 +33,10 @@ set(SRCS    core.cpp
             hle/hle.cpp
             hle/config_mem.cpp
             hle/coprocessor.cpp
-            hle/syscall.cpp
+            hle/svc.cpp
+            hle/kernel/kernel.cpp
+            hle/kernel/mutex.cpp
+            hle/kernel/thread.cpp
             hle/service/apt.cpp
             hle/service/gsp.cpp
             hle/service/hid.cpp
@@ -75,7 +78,10 @@ set(HEADERS core.h
             hle/config_mem.h
             hle/coprocessor.h
             hle/hle.h
-            hle/syscall.h
+            hle/svc.h
+            hle/kernel/kernel.h
+            hle/kernel/mutex.h
+            hle/kernel/thread.h
             hle/function_wrappers.h
             hle/service/apt.h
             hle/service/gsp.h
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 9fdc7ba3c..b73786ccd 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -7,11 +7,13 @@
 #include "common/common.h"
 #include "common/common_types.h"
 
+#include "core/hle/svc.h"
+
 /// Generic ARM11 CPU interface
 class ARM_Interface : NonCopyable {
 public:
     ARM_Interface() {
-        m_num_instructions = 0;
+        num_instructions = 0;
     }
 
     ~ARM_Interface() {
@@ -23,7 +25,7 @@ public:
      */
     void Run(int num_instructions) {
         ExecuteInstructions(num_instructions);
-        m_num_instructions += num_instructions;
+        num_instructions += num_instructions;
     }
 
     /// Step CPU by one instruction
@@ -63,15 +65,33 @@ public:
      */
     virtual u32 GetCPSR() const = 0;  
 
+    /**
+     * Set the current CPSR register
+     * @param cpsr Value to set CPSR to
+     */
+    virtual void SetCPSR(u32 cpsr) = 0;
+
     /**
      * Returns the number of clock ticks since the last rese
      * @return Returns number of clock ticks
      */
     virtual u64 GetTicks() const = 0;
 
-    /// Getter for m_num_instructions
+    /**
+     * Saves the current CPU context
+     * @param ctx Thread context to save
+     */
+    virtual void SaveContext(ThreadContext& ctx) = 0;
+
+    /**
+     * Loads a CPU context
+     * @param ctx Thread context to load
+     */
+    virtual void LoadContext(const ThreadContext& ctx) = 0;
+
+    /// Getter for num_instructions
     u64 GetNumInstructions() {
-        return m_num_instructions;
+        return num_instructions;
     }
 
 protected:
@@ -84,6 +104,6 @@ protected:
 
 private:
 
-    u64 m_num_instructions;                     ///< Number of instructions executed
+    u64 num_instructions; ///< Number of instructions executed
 
 };
diff --git a/src/core/arm/interpreter/arm_interpreter.cpp b/src/core/arm/interpreter/arm_interpreter.cpp
index 23d96d292..17f787b86 100644
--- a/src/core/arm/interpreter/arm_interpreter.cpp
+++ b/src/core/arm/interpreter/arm_interpreter.cpp
@@ -9,30 +9,30 @@ const static cpu_config_t s_arm11_cpu_info = {
 };
 
 ARM_Interpreter::ARM_Interpreter()  {
-    m_state = new ARMul_State;
+    state = new ARMul_State;
 
     ARMul_EmulateInit();
-    ARMul_NewState(m_state);
+    ARMul_NewState(state);
 
-    m_state->abort_model = 0;
-    m_state->cpu = (cpu_config_t*)&s_arm11_cpu_info;
-    m_state->bigendSig = LOW;
+    state->abort_model = 0;
+    state->cpu = (cpu_config_t*)&s_arm11_cpu_info;
+    state->bigendSig = LOW;
 
-    ARMul_SelectProcessor(m_state, ARM_v6_Prop | ARM_v5_Prop | ARM_v5e_Prop);
-    m_state->lateabtSig = LOW;
-    mmu_init(m_state);
+    ARMul_SelectProcessor(state, ARM_v6_Prop | ARM_v5_Prop | ARM_v5e_Prop);
+    state->lateabtSig = LOW;
+    mmu_init(state);
 
     // Reset the core to initial state
-    ARMul_Reset(m_state);
-    m_state->NextInstr = 0;
-    m_state->Emulate = 3;
+    ARMul_Reset(state);
+    state->NextInstr = 0;
+    state->Emulate = 3;
 
-    m_state->pc = m_state->Reg[15] = 0x00000000;
-    m_state->Reg[13] = 0x10000000; // Set stack pointer to the top of the stack
+    state->pc = state->Reg[15] = 0x00000000;
+    state->Reg[13] = 0x10000000; // Set stack pointer to the top of the stack
 }
 
 ARM_Interpreter::~ARM_Interpreter() {
-    delete m_state;
+    delete state;
 }
 
 /**
@@ -40,7 +40,7 @@ ARM_Interpreter::~ARM_Interpreter() {
  * @param addr Address to set PC to
  */
 void ARM_Interpreter::SetPC(u32 pc) {
-    m_state->pc = m_state->Reg[15] = pc;
+    state->pc = state->Reg[15] = pc;
 }
 
 /*
@@ -48,7 +48,7 @@ void ARM_Interpreter::SetPC(u32 pc) {
  * @return Returns current PC
  */
 u32 ARM_Interpreter::GetPC() const {
-    return m_state->pc;
+    return state->pc;
 }
 
 /**
@@ -57,7 +57,7 @@ u32 ARM_Interpreter::GetPC() const {
  * @return Returns the value in the register
  */
 u32 ARM_Interpreter::GetReg(int index) const {
-    return m_state->Reg[index];
+    return state->Reg[index];
 }
 
 /**
@@ -66,7 +66,7 @@ u32 ARM_Interpreter::GetReg(int index) const {
  * @param value Value to set register to
  */
 void ARM_Interpreter::SetReg(int index, u32 value) {
-    m_state->Reg[index] = value;
+    state->Reg[index] = value;
 }
 
 /**
@@ -74,7 +74,15 @@ void ARM_Interpreter::SetReg(int index, u32 value) {
  * @return Returns the value of the CPSR register
  */
 u32 ARM_Interpreter::GetCPSR() const {
-    return m_state->Cpsr;
+    return state->Cpsr;
+}
+
+/**
+ * Set the current CPSR register
+ * @param cpsr Value to set CPSR to
+ */
+void ARM_Interpreter::SetCPSR(u32 cpsr) {
+    state->Cpsr = cpsr;
 }
 
 /**
@@ -82,7 +90,7 @@ u32 ARM_Interpreter::GetCPSR() const {
  * @return Returns number of clock ticks
  */
 u64 ARM_Interpreter::GetTicks() const {
-    return ARMul_Time(m_state);
+    return ARMul_Time(state);
 }
 
 /**
@@ -90,6 +98,45 @@ u64 ARM_Interpreter::GetTicks() const {
  * @param num_instructions Number of instructions to executes
  */
 void ARM_Interpreter::ExecuteInstructions(int num_instructions) {
-    m_state->NumInstrsToExecute = num_instructions;
-    ARMul_Emulate32(m_state);
+    state->NumInstrsToExecute = num_instructions;
+    ARMul_Emulate32(state);
+}
+
+/**
+ * Saves the current CPU context
+ * @param ctx Thread context to save
+ * @todo Do we need to save Reg[15] and NextInstr?
+ */
+void ARM_Interpreter::SaveContext(ThreadContext& ctx) {
+    memcpy(ctx.cpu_registers, state->Reg, sizeof(ctx.cpu_registers));
+    memcpy(ctx.fpu_registers, state->ExtReg, sizeof(ctx.fpu_registers));
+
+    ctx.sp = state->Reg[13];
+    ctx.lr = state->Reg[14];
+    ctx.pc = state->pc;
+    ctx.cpsr = state->Cpsr;
+
+    ctx.fpscr = state->VFP[1];
+    ctx.fpexc = state->VFP[2];
+}
+
+/**
+ * Loads a CPU context
+ * @param ctx Thread context to load
+ * @param Do we need to load Reg[15] and NextInstr?
+ */
+void ARM_Interpreter::LoadContext(const ThreadContext& ctx) {
+    memcpy(state->Reg, ctx.cpu_registers, sizeof(ctx.cpu_registers));
+    memcpy(state->ExtReg, ctx.fpu_registers, sizeof(ctx.fpu_registers));
+
+    state->Reg[13] = ctx.sp;
+    state->Reg[14] = ctx.lr;
+    state->pc = ctx.pc;
+    state->Cpsr = ctx.cpsr;
+
+    state->VFP[1] = ctx.fpscr;
+    state->VFP[2] = ctx.fpexc;
+
+    state->Reg[15] = ctx.pc;
+    state->NextInstr = RESUME;
 }
diff --git a/src/core/arm/interpreter/arm_interpreter.h b/src/core/arm/interpreter/arm_interpreter.h
index 509025080..6a531e497 100644
--- a/src/core/arm/interpreter/arm_interpreter.h
+++ b/src/core/arm/interpreter/arm_interpreter.h
@@ -48,12 +48,30 @@ public:
      */
     u32 GetCPSR() const;
 
+    /**
+     * Set the current CPSR register
+     * @param cpsr Value to set CPSR to
+     */
+    void SetCPSR(u32 cpsr);
+
     /**
      * Returns the number of clock ticks since the last reset
      * @return Returns number of clock ticks
      */
     u64 GetTicks() const;
 
+    /**
+     * Saves the current CPU context
+     * @param ctx Thread context to save
+     */
+    void SaveContext(ThreadContext& ctx);
+
+    /**
+     * Loads a CPU context
+     * @param ctx Thread context to load
+     */
+    void LoadContext(const ThreadContext& ctx);
+
 protected:
 
     /**
@@ -64,6 +82,6 @@ protected:
 
 private:
 
-    ARMul_State* m_state;
+    ARMul_State* state;
 
 };
diff --git a/src/core/arm/interpreter/armdefs.h b/src/core/arm/interpreter/armdefs.h
index 5b2abc7f7..d8eae4d3f 100644
--- a/src/core/arm/interpreter/armdefs.h
+++ b/src/core/arm/interpreter/armdefs.h
@@ -24,10 +24,6 @@
 
 #include "common/platform.h"
 
-#if EMU_PLATFORM == PLATFORM_WINDOWS
-#include <windows.h>
-#endif
-
 //teawater add for arm2x86 2005.02.14-------------------------------------------
 // koodailar remove it for mingw 2005.12.18----------------
 //anthonylee modify it for portable 2007.01.30
diff --git a/src/core/arm/interpreter/armemu.cpp b/src/core/arm/interpreter/armemu.cpp
index 32e315f4b..e5dc7bd44 100644
--- a/src/core/arm/interpreter/armemu.cpp
+++ b/src/core/arm/interpreter/armemu.cpp
@@ -4478,8 +4478,7 @@ ARMul_Emulate26 (ARMul_State * state)
                                  isize) &
                                 R15PCBITS));
 #endif
-                    }
-                    else
+                    } else if (instr != 0xDEADC0DE) // thumbemu uses 0xDEADCODE for debugging to catch non updates 
                         ARMul_MCR (state, instr,
                                DEST);
                 }
@@ -4549,7 +4548,7 @@ ARMul_Emulate26 (ARMul_State * state)
                 //    ARMul_OSHandleSWI (state, BITS (0, 23));
                 //    break;
                 //}
-                HLE::CallSyscall(instr);
+                HLE::CallSVC(instr);
                 ARMul_Abort (state, ARMul_SWIV);
                 break;
             }
diff --git a/src/core/arm/interpreter/arminit.cpp b/src/core/arm/interpreter/arminit.cpp
index 2c771cdda..e05667bea 100644
--- a/src/core/arm/interpreter/arminit.cpp
+++ b/src/core/arm/interpreter/arminit.cpp
@@ -17,8 +17,11 @@
 
 
 #include "common/platform.h"
+
 #if EMU_PLATFORM == PLATFORM_LINUX
 #include <unistd.h>
+#elif EMU_PLATFORM == PLATFORM_WINDOWS
+#include <windows.h>
 #endif
 
 #include <math.h>
diff --git a/src/core/arm/interpreter/vfp/vfp.h b/src/core/arm/interpreter/vfp/vfp.h
index f738a615b..bbf4caeb0 100644
--- a/src/core/arm/interpreter/vfp/vfp.h
+++ b/src/core/arm/interpreter/vfp/vfp.h
@@ -21,7 +21,7 @@
 #ifndef __VFP_H__
 #define __VFP_H__
 
-#define DBG(...) DEBUG_LOG(ARM11, __VA_ARGS__)
+#define DBG(...) //DEBUG_LOG(ARM11, __VA_ARGS__)
 
 #define vfpdebug //printf
 
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 61c237b2c..f88bcd704 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -12,6 +12,8 @@
 #include "core/arm/disassembler/arm_disasm.h"
 #include "core/arm/interpreter/arm_interpreter.h"
 
+#include "core/hle/kernel/thread.h"
+
 namespace Core {
 
 ARM_Disasm*     g_disasm    = NULL; ///< ARM disassembler
@@ -21,14 +23,17 @@ ARM_Interface*  g_sys_core  = NULL; ///< ARM11 system (OS) core
 /// Run the core CPU loop
 void RunLoop() {
     for (;;){
-        g_app_core->Run(10000);
+        g_app_core->Run(100);
         HW::Update();
+        Kernel::Reschedule();
     }
 }
 
 /// Step the CPU one instruction
 void SingleStep() {
     g_app_core->Step();
+    HW::Update();
+    Kernel::Reschedule();
 }
 
 /// Halt the core
diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj
index 41af5801d..f271d336e 100644
--- a/src/core/core.vcxproj
+++ b/src/core/core.vcxproj
@@ -168,12 +168,15 @@
     <ClCompile Include="hle\config_mem.cpp" />
     <ClCompile Include="hle\coprocessor.cpp" />
     <ClCompile Include="hle\hle.cpp" />
+    <ClCompile Include="hle\kernel\kernel.cpp" />
+    <ClCompile Include="hle\kernel\mutex.cpp" />
+    <ClCompile Include="hle\kernel\thread.cpp" />
     <ClCompile Include="hle\service\apt.cpp" />
     <ClCompile Include="hle\service\gsp.cpp" />
     <ClCompile Include="hle\service\hid.cpp" />
     <ClCompile Include="hle\service\service.cpp" />
     <ClCompile Include="hle\service\srv.cpp" />
-    <ClCompile Include="hle\syscall.cpp" />
+    <ClCompile Include="hle\svc.cpp" />
     <ClCompile Include="hw\hw.cpp" />
     <ClCompile Include="hw\lcd.cpp" />
     <ClCompile Include="hw\ndma.cpp" />
@@ -214,12 +217,15 @@
     <ClInclude Include="hle\coprocessor.h" />
     <ClInclude Include="hle\function_wrappers.h" />
     <ClInclude Include="hle\hle.h" />
+    <ClInclude Include="hle\kernel\kernel.h" />
+    <ClInclude Include="hle\kernel\mutex.h" />
+    <ClInclude Include="hle\kernel\thread.h" />
     <ClInclude Include="hle\service\apt.h" />
     <ClInclude Include="hle\service\gsp.h" />
     <ClInclude Include="hle\service\hid.h" />
     <ClInclude Include="hle\service\service.h" />
     <ClInclude Include="hle\service\srv.h" />
-    <ClInclude Include="hle\syscall.h" />
+    <ClInclude Include="hle\svc.h" />
     <ClInclude Include="hw\hw.h" />
     <ClInclude Include="hw\lcd.h" />
     <ClInclude Include="hw\ndma.h" />
diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters
index edf34ce2f..b6c1d5b93 100644
--- a/src/core/core.vcxproj.filters
+++ b/src/core/core.vcxproj.filters
@@ -31,6 +31,9 @@
     <Filter Include="arm\interpreter\mmu">
       <UniqueIdentifier>{13ef9860-2ba0-47e9-a93d-b4052adab269}</UniqueIdentifier>
     </Filter>
+    <Filter Include="hle\kernel">
+      <UniqueIdentifier>{8089d94b-5faa-43dc-854b-ffd2fa2e7fe3}</UniqueIdentifier>
+    </Filter>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="arm\disassembler\arm_disasm.cpp">
@@ -81,9 +84,6 @@
     <ClCompile Include="hle\hle.cpp">
       <Filter>hle</Filter>
     </ClCompile>
-    <ClCompile Include="hle\syscall.cpp">
-      <Filter>hle</Filter>
-    </ClCompile>
     <ClCompile Include="hle\service\service.cpp">
       <Filter>hle\service</Filter>
     </ClCompile>
@@ -147,12 +147,24 @@
     <ClCompile Include="arm\interpreter\mmu\wb.cpp">
       <Filter>arm\interpreter\mmu</Filter>
     </ClCompile>
-    <ClCompile Include="arm\interpreter\armcopro.cpp">
-      <Filter>arm</Filter>
-    </ClCompile>
     <ClCompile Include="arm\interpreter\mmu\maverick.cpp">
       <Filter>arm\interpreter\mmu</Filter>
     </ClCompile>
+    <ClCompile Include="hle\kernel\kernel.cpp">
+      <Filter>hle\kernel</Filter>
+    </ClCompile>
+    <ClCompile Include="hle\kernel\thread.cpp">
+      <Filter>hle\kernel</Filter>
+    </ClCompile>
+    <ClCompile Include="hle\svc.cpp">
+      <Filter>hle</Filter>
+    </ClCompile>
+    <ClCompile Include="hle\kernel\mutex.cpp">
+      <Filter>hle\kernel</Filter>
+    </ClCompile>
+    <ClCompile Include="arm\interpreter\armcopro.cpp">
+      <Filter>arm\interpreter</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="arm\disassembler\arm_disasm.h">
@@ -217,9 +229,6 @@
     <ClInclude Include="hle\service\service.h">
       <Filter>hle\service</Filter>
     </ClInclude>
-    <ClInclude Include="hle\syscall.h">
-      <Filter>hle</Filter>
-    </ClInclude>
     <ClInclude Include="hle\service\apt.h">
       <Filter>hle\service</Filter>
     </ClInclude>
@@ -274,6 +283,18 @@
     <ClInclude Include="arm\interpreter\mmu\sa_mmu.h">
       <Filter>arm\interpreter\mmu</Filter>
     </ClInclude>
+    <ClInclude Include="hle\kernel\kernel.h">
+      <Filter>hle\kernel</Filter>
+    </ClInclude>
+    <ClInclude Include="hle\kernel\thread.h">
+      <Filter>hle\kernel</Filter>
+    </ClInclude>
+    <ClInclude Include="hle\svc.h">
+      <Filter>hle</Filter>
+    </ClInclude>
+    <ClInclude Include="hle\kernel\mutex.h">
+      <Filter>hle\kernel</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <Text Include="CMakeLists.txt" />
diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h
index d934eafb4..801865d49 100644
--- a/src/core/hle/function_wrappers.h
+++ b/src/core/hle/function_wrappers.h
@@ -719,17 +719,27 @@ template<int func(void*, u32)> void WrapI_VU(){
     RETURN(retval);
 }
 
+template<int func(void*, void*, u32)> void WrapI_VVU(){
+    u32 retval = func(Memory::GetPointer(PARAM(0)), Memory::GetPointer(PARAM(1)), PARAM(2));
+    RETURN(retval);
+}
+
 template<int func(void*, u32, void*, int)> void WrapI_VUVI(){
     u32 retval = func(Memory::GetPointer(PARAM(0)), PARAM(1), Memory::GetPointer(PARAM(2)), PARAM(3));
     RETURN(retval);
 }
 
 template<int func(void*, u32, u32, u32, u32, u32)> void WrapI_VUUUUU(){
-    u32 retval = func(Memory::GetPointer(PARAM(0)), PARAM(1), PARAM(2), PARAM(3), PARAM(4), PARAM(5));
+    u32 retval = func(NULL, PARAM(0), PARAM(1), PARAM(2), PARAM(3), PARAM(4));
     RETURN(retval);
 }
 
 template<int func(u32, s64)> void WrapI_US64() {
-    int retval = func(PARAM(0), PARAM64(2));
+    int retval = func(PARAM(0), PARAM64(1));
+    RETURN(retval);
+}
+
+template<int func(void*, void*, u32, u32, s64)> void WrapI_VVUUS64() {
+    int retval = func(Memory::GetPointer(PARAM(0)), Memory::GetPointer(PARAM(1)), PARAM(2), PARAM(3), PARAM(4));
     RETURN(retval);
 }
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp
index be151665b..080c36abf 100644
--- a/src/core/hle/hle.cpp
+++ b/src/core/hle/hle.cpp
@@ -6,7 +6,7 @@
 
 #include "core/mem_map.h"
 #include "core/hle/hle.h"
-#include "core/hle/syscall.h"
+#include "core/hle/svc.h"
 #include "core/hle/service/service.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -15,17 +15,17 @@ namespace HLE {
 
 static std::vector<ModuleDef> g_module_db;
 
-const FunctionDef* GetSyscallInfo(u32 opcode) {
+const FunctionDef* GetSVCInfo(u32 opcode) {
     u32 func_num = opcode & 0xFFFFFF; // 8 bits
     if (func_num > 0xFF) {
-        ERROR_LOG(HLE,"Unknown syscall: 0x%02X", func_num); 
+        ERROR_LOG(HLE,"Unknown SVC: 0x%02X", func_num); 
         return NULL;
     }
     return &g_module_db[0].func_table[func_num];
 }
 
-void CallSyscall(u32 opcode) {
-    const FunctionDef *info = GetSyscallInfo(opcode);
+void CallSVC(u32 opcode) {
+    const FunctionDef *info = GetSVCInfo(opcode);
 
     if (!info) {
         return;
@@ -33,17 +33,28 @@ void CallSyscall(u32 opcode) {
     if (info->func) {
         info->func();
     } else {
-        ERROR_LOG(HLE, "Unimplemented SysCall function %s(..)", info->name.c_str());
+        ERROR_LOG(HLE, "Unimplemented SVC function %s(..)", info->name.c_str());
     }
 }
 
+void EatCycles(u32 cycles) {
+    // TODO: ImplementMe
+}
+
+void ReSchedule(const char *reason) {
+#ifdef _DEBUG
+    _dbg_assert_msg_(HLE, reason != 0 && strlen(reason) < 256, "ReSchedule: Invalid or too long reason.");
+#endif
+    // TODO: ImplementMe
+}
+
 void RegisterModule(std::string name, int num_functions, const FunctionDef* func_table) {
     ModuleDef module = {name, num_functions, func_table};
     g_module_db.push_back(module);
 }
 
 void RegisterAllModules() {
-    Syscall::Register();
+    SVC::Register();
 }
 
 void Init() {
diff --git a/src/core/hle/hle.h b/src/core/hle/hle.h
index 42f37e29c..c075147c3 100644
--- a/src/core/hle/hle.h
+++ b/src/core/hle/hle.h
@@ -34,7 +34,11 @@ struct ModuleDef {
 
 void RegisterModule(std::string name, int num_functions, const FunctionDef *func_table);
 
-void CallSyscall(u32 opcode);
+void CallSVC(u32 opcode);
+
+void EatCycles(u32 cycles);
+
+void ReSchedule(const char *reason);
 
 void Init();
 
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
new file mode 100644
index 000000000..de80de893
--- /dev/null
+++ b/src/core/hle/kernel/kernel.cpp
@@ -0,0 +1,158 @@
+// Copyright 2014 Citra Emulator Project / PPSSPP Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#pragma once
+
+#include <string.h>
+
+#include "common/common.h"
+
+#include "core/core.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/thread.h"
+
+namespace Kernel {
+
+ObjectPool g_object_pool;
+
+ObjectPool::ObjectPool() {
+    memset(occupied, 0, sizeof(bool) * MAX_COUNT);
+    next_id = INITIAL_NEXT_ID;
+}
+
+Handle ObjectPool::Create(Object* obj, int range_bottom, int range_top) {
+    if (range_top > MAX_COUNT) {
+        range_top = MAX_COUNT;
+    }
+    if (next_id >= range_bottom && next_id < range_top) {
+        range_bottom = next_id++;
+    }
+    for (int i = range_bottom; i < range_top; i++) {
+        if (!occupied[i]) {
+            occupied[i] = true;
+            pool[i] = obj;
+            pool[i]->handle = i + HANDLE_OFFSET;
+            return i + HANDLE_OFFSET;
+        }
+    }
+    ERROR_LOG(HLE, "Unable to allocate kernel object, too many objects slots in use.");
+    return 0;
+}
+
+bool ObjectPool::IsValid(Handle handle) {
+    int index = handle - HANDLE_OFFSET;
+    if (index < 0)
+        return false;
+    if (index >= MAX_COUNT)
+        return false;
+
+    return occupied[index];
+}
+
+void ObjectPool::Clear() {
+    for (int i = 0; i < MAX_COUNT; i++) {
+        //brutally clear everything, no validation
+        if (occupied[i])
+            delete pool[i];
+        occupied[i] = false;
+    }
+    memset(pool, 0, sizeof(Object*)*MAX_COUNT);
+    next_id = INITIAL_NEXT_ID;
+}
+
+Object* &ObjectPool::operator [](Handle handle)
+{
+    _dbg_assert_msg_(KERNEL, IsValid(handle), "GRABBING UNALLOCED KERNEL OBJ");
+    return pool[handle - HANDLE_OFFSET];
+}
+
+void ObjectPool::List() {
+    for (int i = 0; i < MAX_COUNT; i++) {
+        if (occupied[i]) {
+            if (pool[i]) {
+                INFO_LOG(KERNEL, "KO %i: %s \"%s\"", i + HANDLE_OFFSET, pool[i]->GetTypeName(), 
+                    pool[i]->GetName());
+            }
+        }
+    }
+}
+
+int ObjectPool::GetCount() {
+    int count = 0;
+    for (int i = 0; i < MAX_COUNT; i++) {
+        if (occupied[i])
+            count++;
+    }
+    return count;
+}
+
+Object* ObjectPool::CreateByIDType(int type) {
+    // Used for save states.  This is ugly, but what other way is there?
+    switch (type) {
+    //case SCE_KERNEL_TMID_Alarm:
+    //    return __KernelAlarmObject();
+    //case SCE_KERNEL_TMID_EventFlag:
+    //    return __KernelEventFlagObject();
+    //case SCE_KERNEL_TMID_Mbox:
+    //    return __KernelMbxObject();
+    //case SCE_KERNEL_TMID_Fpl:
+    //    return __KernelMemoryFPLObject();
+    //case SCE_KERNEL_TMID_Vpl:
+    //    return __KernelMemoryVPLObject();
+    //case PPSSPP_KERNEL_TMID_PMB:
+    //    return __KernelMemoryPMBObject();
+    //case PPSSPP_KERNEL_TMID_Module:
+    //    return __KernelModuleObject();
+    //case SCE_KERNEL_TMID_Mpipe:
+    //    return __KernelMsgPipeObject();
+    //case SCE_KERNEL_TMID_Mutex:
+    //    return __KernelMutexObject();
+    //case SCE_KERNEL_TMID_LwMutex:
+    //    return __KernelLwMutexObject();
+    //case SCE_KERNEL_TMID_Semaphore:
+    //    return __KernelSemaphoreObject();
+    //case SCE_KERNEL_TMID_Callback:
+    //    return __KernelCallbackObject();
+    //case SCE_KERNEL_TMID_Thread:
+    //    return __KernelThreadObject();
+    //case SCE_KERNEL_TMID_VTimer:
+    //    return __KernelVTimerObject();
+    //case SCE_KERNEL_TMID_Tlspl:
+    //    return __KernelTlsplObject();
+    //case PPSSPP_KERNEL_TMID_File:
+    //    return __KernelFileNodeObject();
+    //case PPSSPP_KERNEL_TMID_DirList:
+    //    return __KernelDirListingObject();
+
+    default:
+        ERROR_LOG(COMMON, "Unable to load state: could not find object type %d.", type);
+        return NULL;
+    }
+}
+
+void Init() {
+    Kernel::ThreadingInit();
+}
+
+void Shutdown() {
+    Kernel::ThreadingShutdown();
+}
+
+/**
+ * Loads executable stored at specified address
+ * @entry_point Entry point in memory of loaded executable
+ * @return True on success, otherwise false
+ */
+bool LoadExec(u32 entry_point) {
+    Init();
+    
+    Core::g_app_core->SetPC(entry_point);
+
+    // 0x30 is the typical main thread priority I've seen used so far
+    Handle thread = Kernel::SetupMainThread(0x30);
+
+    return true;
+}
+
+} // namespace
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
new file mode 100644
index 000000000..7cd79c2c4
--- /dev/null
+++ b/src/core/hle/kernel/kernel.h
@@ -0,0 +1,154 @@
+// Copyright 2014 Citra Emulator Project / PPSSPP Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#pragma once
+
+#include "common/common.h"
+
+typedef u32 Handle;
+typedef s32 Result;
+
+namespace Kernel {
+
+enum class HandleType : u32 {
+    Unknown         = 0,
+    Port            = 1,
+    Service         = 2,
+    Event           = 3,
+    Mutex           = 4,
+    SharedMemory    = 5,
+    Redirection     = 6,
+    Thread          = 7,
+    Process         = 8,
+    Arbiter         = 9,
+    File            = 10,
+    Semaphore       = 11,
+};
+    
+enum {
+    MAX_NAME_LENGTH     = 0x100,
+    DEFAULT_STACK_SIZE  = 0x4000,
+};
+
+class ObjectPool;
+
+class Object : NonCopyable {
+    friend class ObjectPool;
+    u32 handle;
+public:
+    virtual ~Object() {}
+    Handle GetHandle() const { return handle; }
+    virtual const char *GetTypeName() { return "[BAD KERNEL OBJECT TYPE]"; }
+    virtual const char *GetName() { return "[UNKNOWN KERNEL OBJECT]"; }
+    virtual Kernel::HandleType GetHandleType() const = 0;
+};
+
+class ObjectPool : NonCopyable {
+public:
+    ObjectPool();
+    ~ObjectPool() {}
+
+    // Allocates a handle within the range and inserts the object into the map.
+    Handle Create(Object* obj, int range_bottom=INITIAL_NEXT_ID, int range_top=0x7FFFFFFF);
+
+    static Object* CreateByIDType(int type);
+
+    template <class T>
+    u32 Destroy(Handle handle) {
+        u32 error;
+        if (Get<T>(handle, error)) {
+            occupied[handle - HANDLE_OFFSET] = false;
+            delete pool[handle - HANDLE_OFFSET];
+        }
+        return error;
+    };
+
+    bool IsValid(Handle handle);
+
+    template <class T>
+    T* Get(Handle handle, u32& outError) {
+        if (handle < HANDLE_OFFSET || handle >= HANDLE_OFFSET + MAX_COUNT || !occupied[handle - HANDLE_OFFSET]) {
+            // Tekken 6 spams 0x80020001 gets wrong with no ill effects, also on the real PSP
+            if (handle != 0 && (u32)handle != 0x80020001) {
+                WARN_LOG(KERNEL, "Kernel: Bad object handle %i (%08x)", handle, handle);
+            }
+            outError = 0;//T::GetMissingErrorCode();
+            return 0;
+        } else {
+            // Previously we had a dynamic_cast here, but since RTTI was disabled traditionally,
+            // it just acted as a static case and everything worked. This means that we will never
+            // see the Wrong type object error below, but we'll just have to live with that danger.
+            T* t = static_cast<T*>(pool[handle - HANDLE_OFFSET]);
+            if (t == 0 || t->GetHandleType() != T::GetStaticHandleType()) {
+                WARN_LOG(KERNEL, "Kernel: Wrong object type for %i (%08x)", handle, handle);
+                outError = 0;//T::GetMissingErrorCode();
+                return 0;
+            }
+            outError = 0;//SCE_KERNEL_ERROR_OK;
+            return t;
+        }
+    }
+
+    // ONLY use this when you know the handle is valid.
+    template <class T>
+    T *GetFast(Handle handle) {
+        const Handle realHandle = handle - HANDLE_OFFSET;
+        _dbg_assert_(KERNEL, realHandle >= 0 && realHandle < MAX_COUNT && occupied[realHandle]);
+        return static_cast<T*>(pool[realHandle]);
+    }
+
+    template <class T, typename ArgT>
+    void Iterate(bool func(T*, ArgT), ArgT arg) {
+        int type = T::GetStaticIDType();
+        for (int i = 0; i < MAX_COUNT; i++)
+        {
+            if (!occupied[i])
+                continue;
+            T* t = static_cast<T*>(pool[i]);
+            if (t->GetIDType() == type) {
+                if (!func(t, arg))
+                    break;
+            }
+        }
+    }
+
+    bool GetIDType(Handle handle, HandleType* type) const {
+        if ((handle < HANDLE_OFFSET) || (handle >= HANDLE_OFFSET + MAX_COUNT) || 
+            !occupied[handle - HANDLE_OFFSET]) {
+            ERROR_LOG(KERNEL, "Kernel: Bad object handle %i (%08x)", handle, handle);
+            return false;
+        }
+        Object* t = pool[handle - HANDLE_OFFSET];
+        *type = t->GetHandleType();
+        return true;
+    }
+
+    Object* &operator [](Handle handle);
+    void List();
+    void Clear();
+    int GetCount();
+
+private:
+    
+    enum {
+        MAX_COUNT       = 0x1000,
+        HANDLE_OFFSET   = 0x100,
+        INITIAL_NEXT_ID = 0x10,
+    };
+
+    Object* pool[MAX_COUNT];
+    bool    occupied[MAX_COUNT];
+    int     next_id;
+};
+
+extern ObjectPool g_object_pool;
+
+/**
+ * Loads executable stored at specified address
+ * @entry_point Entry point in memory of loaded executable
+ * @return True on success, otherwise false
+ */
+bool LoadExec(u32 entry_point);
+
+} // namespace
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
new file mode 100644
index 000000000..019efbc78
--- /dev/null
+++ b/src/core/hle/kernel/mutex.cpp
@@ -0,0 +1,132 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#include <map>
+#include <vector>
+
+#include "common/common.h"
+
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/thread.h"
+
+namespace Kernel {
+
+class Mutex : public Object {
+public:
+    const char* GetTypeName() { return "Mutex"; }
+
+    static Kernel::HandleType GetStaticHandleType() {  return Kernel::HandleType::Mutex; }
+    Kernel::HandleType GetHandleType() const { return Kernel::HandleType::Mutex; }
+
+    bool initial_locked;                        ///< Initial lock state when mutex was created
+    bool locked;                                ///< Current locked state
+    Handle lock_thread;                         ///< Handle to thread that currently has mutex
+    std::vector<Handle> waiting_threads;        ///< Threads that are waiting for the mutex
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+typedef std::multimap<Handle, Handle> MutexMap;
+static MutexMap g_mutex_held_locks;
+
+void MutexAcquireLock(Mutex* mutex, Handle thread) {
+    g_mutex_held_locks.insert(std::make_pair(thread, mutex->GetHandle()));
+    mutex->lock_thread = thread;
+}
+
+void MutexAcquireLock(Mutex* mutex) {
+    Handle thread = GetCurrentThreadHandle();
+    MutexAcquireLock(mutex, thread);
+}
+
+void MutexEraseLock(Mutex* mutex) {
+    Handle handle = mutex->GetHandle();
+    auto locked = g_mutex_held_locks.equal_range(mutex->lock_thread);
+    for (MutexMap::iterator iter = locked.first; iter != locked.second; ++iter) {
+        if ((*iter).second == handle) {
+            g_mutex_held_locks.erase(iter);
+            break;
+        }
+    }
+    mutex->lock_thread = -1;
+}
+
+bool LockMutex(Mutex* mutex) {
+    // Mutex alread locked?
+    if (mutex->locked) {
+        return false;
+    }
+    MutexAcquireLock(mutex);
+    return true;
+}
+
+bool ReleaseMutexForThread(Mutex* mutex, Handle thread) {
+    MutexAcquireLock(mutex, thread);
+    Kernel::ResumeThreadFromWait(thread);
+    return true;
+}
+
+bool ReleaseMutex(Mutex* mutex) {
+    MutexEraseLock(mutex);
+    bool woke_threads = false;
+    auto iter = mutex->waiting_threads.begin();
+
+    // Find the next waiting thread for the mutex...
+    while (!woke_threads && !mutex->waiting_threads.empty()) {
+        woke_threads |= ReleaseMutexForThread(mutex, *iter);
+        mutex->waiting_threads.erase(iter);
+    }
+    // Reset mutex lock thread handle, nothing is waiting
+    if (!woke_threads) {
+        mutex->locked = false;
+        mutex->lock_thread = -1;
+    }
+    return woke_threads;
+}
+
+/**
+ * Releases a mutex
+ * @param handle Handle to mutex to release
+ */
+Result ReleaseMutex(Handle handle) {
+    Mutex* mutex = Kernel::g_object_pool.GetFast<Mutex>(handle);
+    if (!ReleaseMutex(mutex)) {
+        return -1;
+    }
+    return 0;
+}
+
+/**
+ * Creates a mutex
+ * @param handle Reference to handle for the newly created mutex
+ * @param initial_locked Specifies if the mutex should be locked initially
+ */
+Mutex* CreateMutex(Handle& handle, bool initial_locked) {
+    Mutex* mutex = new Mutex;
+    handle = Kernel::g_object_pool.Create(mutex);
+
+    mutex->locked = mutex->initial_locked = initial_locked;
+
+    // Acquire mutex with current thread if initialized as locked...
+    if (mutex->locked) {
+        MutexAcquireLock(mutex);
+
+    // Otherwise, reset lock thread handle
+    } else {
+        mutex->lock_thread = -1;
+    }
+    return mutex;
+}
+
+/**
+ * Creates a mutex
+ * @param initial_locked Specifies if the mutex should be locked initially
+ */
+Handle CreateMutex(bool initial_locked) {
+    Handle handle;
+    Mutex* mutex = CreateMutex(handle, initial_locked);
+    return handle;
+}
+
+} // namespace
diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h
new file mode 100644
index 000000000..871e2e562
--- /dev/null
+++ b/src/core/hle/kernel/mutex.h
@@ -0,0 +1,26 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#pragma once
+
+#include "common/common_types.h"
+
+#include "core/hle/kernel/kernel.h"
+
+namespace Kernel {
+
+/**
+ * Releases a mutex
+ * @param handle Handle to mutex to release
+ */
+Result ReleaseMutex(Handle handle);
+
+/**
+ * Creates a mutex
+ * @param handle Reference to handle for the newly created mutex
+ * @param initial_locked Specifies if the mutex should be locked initially
+ */
+Handle CreateMutex(bool initial_locked);
+
+} // namespace
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
new file mode 100644
index 000000000..bf4c8353c
--- /dev/null
+++ b/src/core/hle/kernel/thread.cpp
@@ -0,0 +1,323 @@
+// Copyright 2014 Citra Emulator Project / PPSSPP Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#include <stdio.h>
+
+#include <list>
+#include <vector>
+#include <map>
+#include <string>
+
+#include "common/common.h"
+#include "common/thread_queue_list.h"
+
+#include "core/core.h"
+#include "core/mem_map.h"
+#include "core/hle/hle.h"
+#include "core/hle/svc.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/thread.h"
+
+namespace Kernel {
+
+class Thread : public Kernel::Object {
+public:
+
+    const char* GetName() { return name; }
+    const char* GetTypeName() { return "Thread"; }
+
+    static Kernel::HandleType GetStaticHandleType() {  return Kernel::HandleType::Thread; }
+    Kernel::HandleType GetHandleType() const { return Kernel::HandleType::Thread; }
+
+    inline bool IsRunning() const { return (status & THREADSTATUS_RUNNING) != 0; }
+    inline bool IsStopped() const { return (status & THREADSTATUS_DORMANT) != 0; }
+    inline bool IsReady() const { return (status & THREADSTATUS_READY) != 0; }
+    inline bool IsWaiting() const { return (status & THREADSTATUS_WAIT) != 0; }
+    inline bool IsSuspended() const { return (status & THREADSTATUS_SUSPEND) != 0; }
+
+    ThreadContext context;
+
+    u32 status;
+    u32 entry_point;
+    u32 stack_top;
+    u32 stack_size;
+
+    s32 initial_priority;
+    s32 current_priority;
+
+    s32 processor_id;
+
+    WaitType wait_type;
+
+    char name[Kernel::MAX_NAME_LENGTH + 1];
+};
+
+// Lists all thread ids that aren't deleted/etc.
+std::vector<Handle> g_thread_queue;
+
+// Lists only ready thread ids.
+Common::ThreadQueueList<Handle> g_thread_ready_queue;
+
+Handle g_current_thread_handle;
+Thread* g_current_thread;
+
+
+/// Gets the current thread
+inline Thread* GetCurrentThread() {
+    return g_current_thread;
+}
+
+/// Gets the current thread handle
+Handle GetCurrentThreadHandle() {
+    return GetCurrentThread()->GetHandle();
+}
+
+/// Sets the current thread
+inline void SetCurrentThread(Thread* t) {
+    g_current_thread = t;
+    g_current_thread_handle = t->GetHandle();
+}
+
+/// Saves the current CPU context
+void SaveContext(ThreadContext& ctx) {
+    Core::g_app_core->SaveContext(ctx);
+}
+
+/// Loads a CPU context
+void LoadContext(ThreadContext& ctx) {
+    Core::g_app_core->LoadContext(ctx);
+}
+
+/// Resets a thread
+void ResetThread(Thread* t, u32 arg, s32 lowest_priority) {
+    memset(&t->context, 0, sizeof(ThreadContext));
+
+    t->context.cpu_registers[0] = arg;
+    t->context.pc = t->entry_point;
+    t->context.sp = t->stack_top;
+    t->context.cpsr = 0x1F; // Usermode
+    
+    if (t->current_priority < lowest_priority) {
+        t->current_priority = t->initial_priority;
+    }
+        
+    t->wait_type = WAITTYPE_NONE;
+}
+
+/// Change a thread to "ready" state
+void ChangeReadyState(Thread* t, bool ready) {
+    Handle handle = t->GetHandle();
+    if (t->IsReady()) {
+        if (!ready) {
+            g_thread_ready_queue.remove(t->current_priority, handle);
+        }
+    }  else if (ready) {
+        if (t->IsRunning()) {
+            g_thread_ready_queue.push_front(t->current_priority, handle);
+        } else {
+            g_thread_ready_queue.push_back(t->current_priority, handle);
+        }
+        t->status = THREADSTATUS_READY;
+    }
+}
+
+/// Changes a threads state
+void ChangeThreadState(Thread* t, ThreadStatus new_status) {
+    if (!t || t->status == new_status) {
+        return;
+    }
+    ChangeReadyState(t, (new_status & THREADSTATUS_READY) != 0);
+    t->status = new_status;
+    
+    if (new_status == THREADSTATUS_WAIT) {
+        if (t->wait_type == WAITTYPE_NONE) {
+            printf("ERROR: Waittype none not allowed here\n");
+        }
+    }
+}
+
+/// Calls a thread by marking it as "ready" (note: will not actually execute until current thread yields)
+void CallThread(Thread* t) {
+    // Stop waiting
+    if (t->wait_type != WAITTYPE_NONE) {
+        t->wait_type = WAITTYPE_NONE;
+    }
+    ChangeThreadState(t, THREADSTATUS_READY);
+}
+
+/// Switches CPU context to that of the specified thread
+void SwitchContext(Thread* t) {
+    Thread* cur = GetCurrentThread();
+    
+    // Save context for current thread
+    if (cur) {
+        SaveContext(cur->context);
+        
+        if (cur->IsRunning()) {
+            ChangeReadyState(cur, true);
+        }
+    }
+    // Load context of new thread
+    if (t) {
+        SetCurrentThread(t);
+        ChangeReadyState(t, false);
+        t->status = (t->status | THREADSTATUS_RUNNING) & ~THREADSTATUS_READY;
+        t->wait_type = WAITTYPE_NONE;
+        LoadContext(t->context);
+    } else {
+        SetCurrentThread(NULL);
+    }
+}
+
+/// Gets the next thread that is ready to be run by priority
+Thread* NextThread() {
+    Handle next;
+    Thread* cur = GetCurrentThread();
+    
+    if (cur && cur->IsRunning()) {
+        next = g_thread_ready_queue.pop_first_better(cur->current_priority);
+    } else  {
+        next = g_thread_ready_queue.pop_first();
+    }
+    if (next == 0) {
+        return NULL;
+    }
+    return Kernel::g_object_pool.GetFast<Thread>(next);
+}
+
+/// Puts the current thread in the wait state for the given type
+void WaitCurrentThread(WaitType wait_type) {
+    Thread* t = GetCurrentThread();
+    t->wait_type = wait_type;
+    ChangeThreadState(t, ThreadStatus(THREADSTATUS_WAIT | (t->status & THREADSTATUS_SUSPEND)));
+}
+
+/// Resumes a thread from waiting by marking it as "ready"
+void ResumeThreadFromWait(Handle handle) {
+    u32 error;
+    Thread* t = Kernel::g_object_pool.Get<Thread>(handle, error);
+    if (t) {
+        t->status &= ~THREADSTATUS_WAIT;
+        if (!(t->status & (THREADSTATUS_WAITSUSPEND | THREADSTATUS_DORMANT | THREADSTATUS_DEAD))) {
+            ChangeReadyState(t, true);
+        }
+    }
+}
+
+/// Creates a new thread
+Thread* CreateThread(Handle& handle, const char* name, u32 entry_point, s32 priority,
+    s32 processor_id, u32 stack_top, int stack_size) {
+
+    _assert_msg_(KERNEL, (priority >= THREADPRIO_HIGHEST && priority <= THREADPRIO_LOWEST), 
+        "CreateThread priority=%d, outside of allowable range!", priority)
+
+    Thread* t = new Thread;
+    
+    handle = Kernel::g_object_pool.Create(t);
+    
+    g_thread_queue.push_back(handle);
+    g_thread_ready_queue.prepare(priority);
+    
+    t->status = THREADSTATUS_DORMANT;
+    t->entry_point = entry_point;
+    t->stack_top = stack_top;
+    t->stack_size = stack_size;
+    t->initial_priority = t->current_priority = priority;
+    t->processor_id = processor_id;
+    t->wait_type = WAITTYPE_NONE;
+    
+    strncpy(t->name, name, Kernel::MAX_NAME_LENGTH);
+    t->name[Kernel::MAX_NAME_LENGTH] = '\0';
+    
+    return t;
+}
+
+/// Creates a new thread - wrapper for external user
+Handle CreateThread(const char* name, u32 entry_point, s32 priority, u32 arg, s32 processor_id,
+    u32 stack_top, int stack_size) {
+    if (name == NULL) {
+        ERROR_LOG(KERNEL, "CreateThread(): NULL name");
+        return -1;
+    }
+    if ((u32)stack_size < 0x200) {
+        ERROR_LOG(KERNEL, "CreateThread(name=%s): invalid stack_size=0x%08X", name, 
+            stack_size);
+        return -1;
+    }
+    if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) {
+        s32 new_priority = CLAMP(priority, THREADPRIO_HIGHEST, THREADPRIO_LOWEST);
+        WARN_LOG(KERNEL, "CreateThread(name=%s): invalid priority=0x%08X, clamping to %08X",
+            name, priority, new_priority);
+        // TODO(bunnei): Clamping to a valid priority is not necessarily correct behavior... Confirm
+        // validity of this
+        priority = new_priority;
+    }
+    if (!Memory::GetPointer(entry_point)) {
+        ERROR_LOG(KERNEL, "CreateThread(name=%s): invalid entry %08x", name, entry_point);
+        return -1;
+    }
+    Handle handle;
+    Thread* t = CreateThread(handle, name, entry_point, priority, processor_id, stack_top, 
+        stack_size);
+
+    ResetThread(t, arg, 0);
+
+    HLE::EatCycles(32000);
+
+    // This won't schedule to the new thread, but it may to one woken from eating cycles.
+    // Technically, this should not eat all at once, and reschedule in the middle, but that's hard.
+    HLE::ReSchedule("thread created");
+
+    CallThread(t);
+    
+    return handle;
+}
+
+/// Sets up the primary application thread
+Handle SetupMainThread(s32 priority, int stack_size) {
+    Handle handle;
+    
+    // Initialize new "main" thread
+    Thread* t = CreateThread(handle, "main", Core::g_app_core->GetPC(), priority, 
+        THREADPROCESSORID_0, Memory::SCRATCHPAD_VADDR_END, stack_size);
+    
+    ResetThread(t, 0, 0);
+    
+    // If running another thread already, set it to "ready" state
+    Thread* cur = GetCurrentThread();
+    if (cur && cur->IsRunning()) {
+        ChangeReadyState(cur, true);
+    }
+    
+    // Run new "main" thread
+    SetCurrentThread(t);
+    t->status = THREADSTATUS_RUNNING;
+    LoadContext(t->context);
+
+    return handle;
+}
+
+/// Reschedules to the next available thread (call after current thread is suspended)
+void Reschedule() {
+    Thread* prev = GetCurrentThread();
+    Thread* next = NextThread();
+    if (next > 0) {
+        SwitchContext(next);
+
+        // Hack - automatically change previous thread (which would have been in "wait" state) to
+        // "ready" state, so that we can immediately resume to it when new thread yields. FixMe to
+        // actually wait for whatever event it is supposed to be waiting on.
+        ChangeReadyState(prev, true);
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+void ThreadingInit() {
+}
+
+void ThreadingShutdown() {
+}
+
+} // namespace
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
new file mode 100644
index 000000000..9628f165d
--- /dev/null
+++ b/src/core/hle/kernel/thread.h
@@ -0,0 +1,74 @@
+// Copyright 2014 Citra Emulator Project / PPSSPP Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#pragma once
+
+#include "common/common_types.h"
+#include "core/hle/kernel/kernel.h"
+
+enum ThreadPriority {
+    THREADPRIO_HIGHEST      = 0,    ///< Highest thread priority
+    THREADPRIO_DEFAULT      = 16,   ///< Default thread priority for userland apps
+    THREADPRIO_LOW          = 31,   ///< Low range of thread priority for userland apps
+    THREADPRIO_LOWEST       = 63,   ///< Thread priority max checked by svcCreateThread
+};
+
+enum ThreadProcessorId {
+    THREADPROCESSORID_0     = 0xFFFFFFFE,   ///< Enables core appcode
+    THREADPROCESSORID_1     = 0xFFFFFFFD,   ///< Enables core syscore
+    THREADPROCESSORID_ALL   = 0xFFFFFFFC,   ///< Enables both cores
+};
+
+enum ThreadStatus {
+    THREADSTATUS_RUNNING        = 1,
+    THREADSTATUS_READY          = 2,
+    THREADSTATUS_WAIT           = 4,
+    THREADSTATUS_SUSPEND        = 8,
+    THREADSTATUS_DORMANT        = 16,
+    THREADSTATUS_DEAD           = 32,
+    THREADSTATUS_WAITSUSPEND    = THREADSTATUS_WAIT | THREADSTATUS_SUSPEND
+};
+
+enum WaitType {
+    WAITTYPE_NONE,
+    WAITTYPE_SLEEP,
+    WAITTYPE_SEMA,
+    WAITTYPE_EVENTFLAG,
+    WAITTYPE_THREADEND,
+    WAITTYPE_VBLANK,
+    WAITTYPE_MUTEX,
+    WAITTYPE_SYNCH,
+};
+
+namespace Kernel {
+
+/// Creates a new thread - wrapper for external user
+Handle CreateThread(const char* name, u32 entry_point, s32 priority, u32 arg, s32 processor_id,
+    u32 stack_top, int stack_size=Kernel::DEFAULT_STACK_SIZE);
+
+/// Sets up the primary application thread
+Handle SetupMainThread(s32 priority, int stack_size=Kernel::DEFAULT_STACK_SIZE);
+
+/// Reschedules to the next available thread (call after current thread is suspended)
+void Reschedule();
+
+/// Puts the current thread in the wait state for the given type
+void WaitCurrentThread(WaitType wait_type);
+
+/// Resumes a thread from waiting by marking it as "ready"
+void ResumeThreadFromWait(Handle handle);
+
+/// Gets the current thread handle
+Handle GetCurrentThreadHandle();
+
+/// Put current thread in a wait state - on WaitSynchronization
+void WaitThread_Synchronization();
+
+/// Initialize threading
+void ThreadingInit();
+
+/// Shutdown threading
+void ThreadingShutdown();
+
+} // namespace
diff --git a/src/core/hle/service/apt.cpp b/src/core/hle/service/apt.cpp
index 709ac5493..32759a087 100644
--- a/src/core/hle/service/apt.cpp
+++ b/src/core/hle/service/apt.cpp
@@ -3,9 +3,10 @@
 // Refer to the license.txt file included.
 
 
-#include "common/log.h"
+#include "common/common.h"
 
 #include "core/hle/hle.h"
+#include "core/hle/kernel/mutex.h"
 #include "core/hle/service/apt.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -19,7 +20,10 @@ void Initialize(Service::Interface* self) {
 
 void GetLockHandle(Service::Interface* self) {
     u32* cmd_buff = Service::GetCommandBuffer();
-    cmd_buff[5] = 0x00000000; // TODO: This should be an actual mutex handle
+    u32 flags = cmd_buff[1]; // TODO(bunnei): Figure out the purpose of the flag field
+    cmd_buff[1] = 0; // No error
+    cmd_buff[5] = Kernel::CreateMutex(false);
+    DEBUG_LOG(KERNEL, "APT_U::GetLockHandle called : created handle 0x%08X", cmd_buff[5]);
 }
 
 const Interface::FunctionInfo FunctionTable[] = {
diff --git a/src/core/hle/service/apt.h b/src/core/hle/service/apt.h
index 4c7dd07e7..dca3097ed 100644
--- a/src/core/hle/service/apt.h
+++ b/src/core/hle/service/apt.h
@@ -29,7 +29,7 @@ public:
      * Gets the string port name used by CTROS for the service
      * @return Port name of service
      */
-    std::string GetPortName() const {
+    const char *GetPortName() const {
         return "APT:U";
     }
 };
diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp
index 12c7dabcd..50cee2c41 100644
--- a/src/core/hle/service/gsp.cpp
+++ b/src/core/hle/service/gsp.cpp
@@ -27,7 +27,7 @@ union GX_CmdBufferHeader {
     // <=15 when writing a command to shared memory. This is incremented by the application when 
     // writing a command to shared memory, after increasing this value TriggerCmdReqQueue is only 
     // used if this field is value 1.
-    BitField<8,8,u32>  number_commands;
+    BitField<8,8,u32>   number_commands;
 
 };
 
@@ -101,9 +101,7 @@ void RegisterInterruptRelayQueue(Service::Interface* self) {
     u32* cmd_buff = Service::GetCommandBuffer();
     u32 flags = cmd_buff[1];
     u32 event_handle = cmd_buff[3]; // TODO(bunnei): Implement event handling
-    
     cmd_buff[2] = g_thread_id;          // ThreadID
-    cmd_buff[4] = self->NewHandle();
 }
 
 /// This triggers handling of the GX command written to the command buffer in shared memory.
diff --git a/src/core/hle/service/gsp.h b/src/core/hle/service/gsp.h
index 5ba09ab70..eb5786cd1 100644
--- a/src/core/hle/service/gsp.h
+++ b/src/core/hle/service/gsp.h
@@ -23,7 +23,7 @@ public:
      * Gets the string port name used by CTROS for the service
      * @return Port name of service
      */
-    std::string GetPortName() const {
+    const char *GetPortName() const {
         return "gsp::Gpu";
     }
 
diff --git a/src/core/hle/service/hid.h b/src/core/hle/service/hid.h
index b17fcfa86..81c29eb2e 100644
--- a/src/core/hle/service/hid.h
+++ b/src/core/hle/service/hid.h
@@ -25,7 +25,7 @@ public:
      * Gets the string port name used by CTROS for the service
      * @return Port name of service
      */
-    std::string GetPortName() const {
+    const char *GetPortName() const {
         return "hid:USER";
     }
 
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index e6605a398..08d0c43ff 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -7,12 +7,15 @@
 #include "common/string_util.h"
 
 #include "core/hle/hle.h"
+
 #include "core/hle/service/service.h"
 #include "core/hle/service/apt.h"
 #include "core/hle/service/gsp.h"
 #include "core/hle/service/hid.h"
 #include "core/hle/service/srv.h"
 
+#include "core/hle/kernel/kernel.h"
+
 namespace Service {
 
 Manager* g_manager = NULL;  ///< Service manager
@@ -31,32 +34,21 @@ Manager::~Manager() {
 
 /// Add a service to the manager (does not create it though)
 void Manager::AddService(Interface* service) {
-    int index = m_services.size();
-    u32 new_uid = GetUIDFromIndex(index);
-
+    m_port_map[service->GetPortName()] = Kernel::g_object_pool.Create(service);
     m_services.push_back(service);
-
-    m_port_map[service->GetPortName()] = new_uid;
-    service->m_uid = new_uid;
 }
 
 /// Removes a service from the manager, also frees memory
 void Manager::DeleteService(std::string port_name) {
-    auto service = FetchFromPortName(port_name);
-
-    m_services.erase(m_services.begin() + GetIndexFromUID(service->m_uid));
+    Interface* service = FetchFromPortName(port_name);
+    m_services.erase(std::remove(m_services.begin(), m_services.end(), service), m_services.end());
     m_port_map.erase(port_name);
-
     delete service;
 }
 
-/// Get a Service Interface from its UID
-Interface* Manager::FetchFromUID(u32 uid) {
-    int index = GetIndexFromUID(uid);
-    if (index < (int)m_services.size()) {
-        return m_services[index];
-    }
-    return NULL;
+/// Get a Service Interface from its Handle
+Interface* Manager::FetchFromHandle(Handle handle) {
+    return Kernel::g_object_pool.GetFast<Interface>(handle);
 }
 
 /// Get a Service Interface from its port
@@ -65,7 +57,7 @@ Interface* Manager::FetchFromPortName(std::string port_name) {
     if (itr == m_port_map.end()) {
         return NULL;
     }
-    return FetchFromUID(itr->second);
+    return FetchFromHandle(itr->second);
 }
 
 
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index b260a290a..fab51753f 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -4,22 +4,22 @@
 
 #pragma once
 
+#include <algorithm>
 #include <vector>
 #include <map>
 #include <string>
 
 #include "common/common.h"
-#include "common/common_types.h"
 #include "core/mem_map.h"
-#include "core/hle/syscall.h"
+
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/svc.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // Namespace Service
 
 namespace Service {
 
-typedef s32 NativeUID;                          ///< Native handle for a service
-
 static const int kMaxPortSize           = 0x08; ///< Maximum size of a port name (8 characters)
 static const int kCommandHeaderOffset   = 0x80; ///< Offset into command buffer of header
 
@@ -35,15 +35,15 @@ inline static u32* GetCommandBuffer(const int offset=0) {
 class Manager;
 
 /// Interface to a CTROS service
-class Interface : NonCopyable {
+class Interface  : public Kernel::Object {
     friend class Manager;
 public:
+    
+    const char *GetName() { return GetPortName(); }
+    const char *GetTypeName() { return GetPortName(); }
 
-    Interface() {
-    }
-
-    virtual ~Interface() {
-    }
+    static Kernel::HandleType GetStaticHandleType() { return Kernel::HandleType::Service; }
+    Kernel::HandleType GetHandleType() const { return Kernel::HandleType::Service; }
 
     typedef void (*Function)(Interface*);
 
@@ -53,55 +53,44 @@ public:
         std::string name;
     };
 
-    /**
-     * Gets the UID for the serice
-     * @return UID of service in native format
-     */
-    NativeUID GetUID() const {
-        return (NativeUID)m_uid;
-    }
-
     /**
      * Gets the string name used by CTROS for a service
      * @return Port name of service
      */
-    virtual std::string GetPortName() const {
+    virtual const char *GetPortName() const {
         return "[UNKNOWN SERVICE PORT]";
     }
 
     /// Allocates a new handle for the service
-    Syscall::Handle NewHandle() {
-        Syscall::Handle handle = (m_handles.size() << 16) | m_uid;
+    Handle CreateHandle(Kernel::Object *obj) {
+        Handle handle = Kernel::g_object_pool.Create(obj);
         m_handles.push_back(handle);
         return handle;
     }
 
     /// Frees a handle from the service
-    void DeleteHandle(Syscall::Handle handle) {
-        for(auto iter = m_handles.begin(); iter != m_handles.end(); ++iter) {
-            if(*iter == handle) {
-                m_handles.erase(iter);
-                break;
-            }
-        }
+    template <class T>
+    void DeleteHandle(const Handle handle) {
+        Kernel::g_object_pool.Destroy<T>(handle);
+        m_handles.erase(std::remove(m_handles.begin(), m_handles.end(), handle), m_handles.end());
     }
 
     /**
      * Called when svcSendSyncRequest is called, loads command buffer and executes comand
      * @return Return result of svcSendSyncRequest passed back to user app
      */
-    Syscall::Result Sync() {
+    Result Sync() {
         u32* cmd_buff = GetCommandBuffer();
         auto itr = m_functions.find(cmd_buff[0]);
 
         if (itr == m_functions.end()) {
             ERROR_LOG(OSHLE, "Unknown/unimplemented function: port = %s, command = 0x%08X!", 
-                GetPortName().c_str(), cmd_buff[0]);
+                GetPortName(), cmd_buff[0]);
             return -1;
         }
         if (itr->second.func == NULL) {
             ERROR_LOG(OSHLE, "Unimplemented function: port = %s, name = %s!", 
-                GetPortName().c_str(), itr->second.name.c_str());
+                GetPortName(), itr->second.name.c_str());
             return -1;
         } 
 
@@ -122,10 +111,10 @@ protected:
     }
 
 private:
-    u32 m_uid;
-    
-    std::vector<Syscall::Handle>    m_handles;
-    std::map<u32, FunctionInfo>     m_functions;
+
+    std::vector<Handle>         m_handles;
+    std::map<u32, FunctionInfo> m_functions;
+
 };
 
 /// Simple class to manage accessing services from ports and UID handles
@@ -143,25 +132,16 @@ public:
     void DeleteService(std::string port_name);
 
     /// Get a Service Interface from its UID
-    Interface* FetchFromUID(u32 uid);
+    Interface* FetchFromHandle(u32 uid);
 
     /// Get a Service Interface from its port
     Interface* FetchFromPortName(std::string port_name);
 
 private:
 
-    /// Convert an index into m_services vector into a UID
-    static u32 GetUIDFromIndex(const int index) {
-        return index | 0x10000000;
-    }
-
-    /// Convert a UID into an index into m_services
-    static int GetIndexFromUID(const u32 uid) {
-        return uid & 0x0FFFFFFF;
-    }
-
     std::vector<Interface*>     m_services;
     std::map<std::string, u32>  m_port_map;
+
 };
 
 /// Initialize ServiceManager
diff --git a/src/core/hle/service/srv.cpp b/src/core/hle/service/srv.cpp
index 071741444..ff6da8f1c 100644
--- a/src/core/hle/service/srv.cpp
+++ b/src/core/hle/service/srv.cpp
@@ -16,18 +16,24 @@ void Initialize(Service::Interface* self) {
     NOTICE_LOG(OSHLE, "SRV::Sync - Initialize");
 }
 
+void GetProcSemaphore(Service::Interface* self) {
+    // Get process semaphore?
+    u32* cmd_buff = Service::GetCommandBuffer();
+    cmd_buff[3] = 0xDEADBEEF; // Return something... 0 == NULL, raises an exception
+}
+
 void GetServiceHandle(Service::Interface* self) {
-    Syscall::Result res = 0;
+    Result res = 0;
     u32* cmd_buff = Service::GetCommandBuffer();
 
     std::string port_name = std::string((const char*)&cmd_buff[1], 0, Service::kMaxPortSize);
     Service::Interface* service = Service::g_manager->FetchFromPortName(port_name);
 
     NOTICE_LOG(OSHLE, "SRV::Sync - GetHandle - port: %s, handle: 0x%08X", port_name.c_str(), 
-        service->GetUID());
+        service->GetHandle());
 
     if (NULL != service) {
-        cmd_buff[3] = service->GetUID();
+        cmd_buff[3] = service->GetHandle();
     } else {
         ERROR_LOG(OSHLE, "Service %s does not exist", port_name.c_str());
         res = -1;
@@ -39,7 +45,7 @@ void GetServiceHandle(Service::Interface* self) {
 
 const Interface::FunctionInfo FunctionTable[] = {
     {0x00010002, Initialize,        "Initialize"},
-    {0x00020000, NULL,              "GetProcSemaphore"},
+    {0x00020000, GetProcSemaphore,  "GetProcSemaphore"},
     {0x00030100, NULL,              "RegisterService"},
     {0x000400C0, NULL,              "UnregisterService"},
     {0x00050100, GetServiceHandle,  "GetServiceHandle"},
diff --git a/src/core/hle/service/srv.h b/src/core/hle/service/srv.h
index 760c976b4..1e35032ba 100644
--- a/src/core/hle/service/srv.h
+++ b/src/core/hle/service/srv.h
@@ -22,7 +22,7 @@ public:
      * Gets the string name used by CTROS for the service
      * @return Port name of service
      */
-    std::string GetPortName() const {
+    const char *GetPortName() const {
         return "srv:";
     }
 
@@ -30,7 +30,7 @@ public:
      * Called when svcSendSyncRequest is called, loads command buffer and executes comand
      * @return Return result of svcSendSyncRequest passed back to user app
      */
-    Syscall::Result Sync();
+    Result Sync();
 
 };
 
diff --git a/src/core/hle/syscall.cpp b/src/core/hle/svc.cpp
similarity index 74%
rename from src/core/hle/syscall.cpp
rename to src/core/hle/svc.cpp
index d47df6038..90c05cb74 100644
--- a/src/core/hle/syscall.cpp
+++ b/src/core/hle/svc.cpp
@@ -3,17 +3,25 @@
 // Refer to the license.txt file included.  
 
 #include <map>
+#include <string>
+
+#include "common/symbols.h"
 
 #include "core/mem_map.h"
 
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/mutex.h"
+#include "core/hle/kernel/thread.h"
+
 #include "core/hle/function_wrappers.h"
-#include "core/hle/syscall.h"
+#include "core/hle/svc.h"
 #include "core/hle/service/service.h"
+#include "core/hle/kernel/thread.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
-// Namespace Syscall
+// Namespace SVC
 
-namespace Syscall {
+namespace SVC {
 
 enum ControlMemoryOperation {
     MEMORY_OPERATION_HEAP       = 0x00000003,
@@ -26,7 +34,8 @@ enum MapMemoryPermission {
 };
 
 /// Map application or GSP heap memory
-Result ControlMemory(u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissions) {
+Result ControlMemory(void* _outaddr, u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissions) {
+    u32* outaddr = (u32*)_outaddr;
     u32 virtual_address = 0x00000000;
 
     DEBUG_LOG(SVC, "ControlMemory called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=%08X, permissions=0x%08X", 
@@ -48,7 +57,9 @@ Result ControlMemory(u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissi
     default:
         ERROR_LOG(SVC, "ControlMemory unknown operation=0x%08X", operation);
     }
-
+    if (NULL != outaddr) {
+        *outaddr = virtual_address;
+    }
     Core::g_app_core->SetReg(1, virtual_address);
 
     return 0;
@@ -72,17 +83,20 @@ Result MapMemoryBlock(Handle memblock, u32 addr, u32 mypermissions, u32 otherper
 
 /// Connect to an OS service given the port name, returns the handle to the port to out
 Result ConnectToPort(void* out, const char* port_name) {
-    
     Service::Interface* service = Service::g_manager->FetchFromPortName(port_name);
-    Core::g_app_core->SetReg(1, service->GetUID());
+    if (service) {
+        Core::g_app_core->SetReg(1, service->GetHandle());
+    } else {
+        PanicYesNo("ConnectToPort called port_name=%s, but it is not implemented!", port_name);
+    }
     DEBUG_LOG(SVC, "ConnectToPort called port_name=%s", port_name);
     return 0;
 }
 
 /// Synchronize to an OS service
-Result SendSyncRequest(Handle session) {
-    DEBUG_LOG(SVC, "SendSyncRequest called session=0x%08X");
-    Service::Interface* service = Service::g_manager->FetchFromUID(session);
+Result SendSyncRequest(Handle handle) {
+    DEBUG_LOG(SVC, "SendSyncRequest called handle=0x%08X");
+    Service::Interface* service = Service::g_manager->FetchFromHandle(handle);
     service->Sync();
     return 0;
 }
@@ -95,10 +109,25 @@ Result CloseHandle(Handle handle) {
 }
 
 /// Wait for a handle to synchronize, timeout after the specified nanoseconds
-Result WaitSynchronization1(Handle handle, s64 nanoseconds) {
-    // ImplementMe
+Result WaitSynchronization1(Handle handle, s64 nano_seconds) {
     DEBUG_LOG(SVC, "(UNIMPLEMENTED) WaitSynchronization1 called handle=0x%08X, nanoseconds=%d", 
-        handle, nanoseconds);
+        handle, nano_seconds);
+    Kernel::WaitCurrentThread(WAITTYPE_SYNCH); // TODO(bunnei): Is this correct?
+    return 0;
+}
+
+/// Wait for the given handles to synchronize, timeout after the specified nanoseconds
+Result WaitSynchronizationN(void* _out, void* _handles, u32 handle_count, u32 wait_all, s64 nano_seconds) {
+    s32* out = (s32*)_out;
+    Handle* handles = (Handle*)_handles;
+
+    DEBUG_LOG(SVC, "(UNIMPLEMENTED) WaitSynchronizationN called handle_count=%d, wait_all=%s, nanoseconds=%d %s", 
+        handle_count, (wait_all ? "true" : "false"), nano_seconds);
+
+    for (u32 i = 0; i < handle_count; i++) {
+        DEBUG_LOG(SVC, "\thandle[%d]=0x%08X", i, handles[i]);
+    }
+    Kernel::WaitCurrentThread(WAITTYPE_SYNCH); // TODO(bunnei): Is this correct?
     return 0;
 }
 
@@ -106,7 +135,7 @@ Result WaitSynchronization1(Handle handle, s64 nanoseconds) {
 Result CreateAddressArbiter(void* arbiter) {
     // ImplementMe
     DEBUG_LOG(SVC, "(UNIMPLEMENTED) CreateAddressArbiter called");
-    Core::g_app_core->SetReg(1, 0xDEADBEEF);
+    Core::g_app_core->SetReg(1, 0xFABBDADD);
     return 0;
 }
 
@@ -134,16 +163,79 @@ Result GetResourceLimitCurrentValues(void* _values, Handle resource_limit, void*
     return 0;
 }
 
-const HLE::FunctionDef Syscall_Table[] = {
+/// Creates a new thread
+Result CreateThread(u32 priority, u32 entry_point, u32 arg, u32 stack_top, u32 processor_id) {
+    std::string name;
+    if (Symbols::HasSymbol(entry_point)) {
+        TSymbol symbol = Symbols::GetSymbol(entry_point);
+        name = symbol.name;
+    } else {
+        char buff[100];
+        sprintf(buff, "%s", "unknown-%08X", entry_point);
+        name = buff;
+    }
+
+    Handle thread = Kernel::CreateThread(name.c_str(), entry_point, priority, arg, processor_id,
+        stack_top);
+
+    Core::g_app_core->SetReg(1, thread);
+
+    DEBUG_LOG(SVC, "CreateThread called entrypoint=0x%08X (%s), arg=0x%08X, stacktop=0x%08X, "
+        "threadpriority=0x%08X, processorid=0x%08X : created handle 0x%08X", entry_point, 
+        name.c_str(), arg, stack_top, priority, processor_id, thread);
+    
+    return 0;
+}
+
+/// Create a mutex
+Result CreateMutex(void* _mutex, u32 initial_locked) {
+    Handle* mutex = (Handle*)_mutex;
+    *mutex = Kernel::CreateMutex((initial_locked != 0));
+    Core::g_app_core->SetReg(1, *mutex);
+    DEBUG_LOG(SVC, "CreateMutex called initial_locked=%s : created handle 0x%08X", 
+        initial_locked ? "true" : "false", *mutex);
+    return 0;
+}
+
+/// Release a mutex
+Result ReleaseMutex(Handle handle) {
+    DEBUG_LOG(SVC, "ReleaseMutex called handle=0x%08X", handle);
+    Kernel::ReleaseMutex(handle);
+    return 0;
+}
+
+/// Get current thread ID
+Result GetThreadId(void* thread_id, u32 thread) {
+    DEBUG_LOG(SVC, "(UNIMPLEMENTED) GetThreadId called thread=0x%08X", thread);
+    return 0;
+}
+
+/// Query memory
+Result QueryMemory(void *_info, void *_out, u32 addr) {
+    MemoryInfo* info = (MemoryInfo*) _info;
+    PageInfo* out = (PageInfo*) _out;
+    DEBUG_LOG(SVC, "(UNIMPLEMENTED) QueryMemory called addr=0x%08X", addr);
+    return 0;
+}
+
+/// Create an event
+Result CreateEvent(void* _event, u32 reset_type) {
+    Handle* event = (Handle*)_event;
+    DEBUG_LOG(SVC, "(UNIMPLEMENTED) CreateEvent called reset_type=0x%08X", reset_type);
+    Core::g_app_core->SetReg(1, 0xBADC0DE0);
+    return 0;
+}
+
+const HLE::FunctionDef SVC_Table[] = {
     {0x00,  NULL,                                       "Unknown"},
-    {0x01,  WrapI_UUUUU<ControlMemory>,                 "ControlMemory"},
-    {0x02,  NULL,                                       "QueryMemory"},
+    {0x01,  WrapI_VUUUUU<ControlMemory>,                "ControlMemory"},
+    {0x02,  WrapI_VVU<QueryMemory>,                     "QueryMemory"},
     {0x03,  NULL,                                       "ExitProcess"},
     {0x04,  NULL,                                       "GetProcessAffinityMask"},
     {0x05,  NULL,                                       "SetProcessAffinityMask"},
     {0x06,  NULL,                                       "GetProcessIdealProcessor"},
     {0x07,  NULL,                                       "SetProcessIdealProcessor"},
-    {0x08,  NULL,                                       "CreateThread"},
+    {0x08,  WrapI_UUUUU<CreateThread>,                  "CreateThread"},
     {0x09,  NULL,                                       "ExitThread"},
     {0x0A,  NULL,                                       "SleepThread"},
     {0x0B,  NULL,                                       "GetThreadPriority"},
@@ -154,11 +246,11 @@ const HLE::FunctionDef Syscall_Table[] = {
     {0x10,  NULL,                                       "SetThreadIdealProcessor"},
     {0x11,  NULL,                                       "GetCurrentProcessorNumber"},
     {0x12,  NULL,                                       "Run"},
-    {0x13,  NULL,                                       "CreateMutex"},
-    {0x14,  NULL,                                       "ReleaseMutex"},
+    {0x13,  WrapI_VU<CreateMutex>,                      "CreateMutex"},
+    {0x14,  WrapI_U<ReleaseMutex>,                      "ReleaseMutex"},
     {0x15,  NULL,                                       "CreateSemaphore"},
     {0x16,  NULL,                                       "ReleaseSemaphore"},
-    {0x17,  NULL,                                       "CreateEvent"},
+    {0x17,  WrapI_VU<CreateEvent>,                      "CreateEvent"},
     {0x18,  NULL,                                       "SignalEvent"},
     {0x19,  NULL,                                       "ClearEvent"},
     {0x1A,  NULL,                                       "CreateTimer"},
@@ -172,7 +264,7 @@ const HLE::FunctionDef Syscall_Table[] = {
     {0x22,  NULL,                                       "ArbitrateAddress"},
     {0x23,  WrapI_U<CloseHandle>,                       "CloseHandle"},
     {0x24,  WrapI_US64<WaitSynchronization1>,           "WaitSynchronization1"},
-    {0x25,  NULL,                                       "WaitSynchronizationN"},
+    {0x25,  WrapI_VVUUS64<WaitSynchronizationN>,        "WaitSynchronizationN"},
     {0x26,  NULL,                                       "SignalAndWait"},
     {0x27,  NULL,                                       "DuplicateHandle"},
     {0x28,  NULL,                                       "GetSystemTick"},
@@ -190,7 +282,7 @@ const HLE::FunctionDef Syscall_Table[] = {
     {0x34,  NULL,                                       "OpenThread"},
     {0x35,  NULL,                                       "GetProcessId"},
     {0x36,  NULL,                                       "GetProcessIdOfThread"},
-    {0x37,  NULL,                                       "GetThreadId"},
+    {0x37,  WrapI_VU<GetThreadId>,                      "GetThreadId"},
     {0x38,  WrapI_VU<GetResourceLimit>,                 "GetResourceLimit"},
     {0x39,  NULL,                                       "GetResourceLimitLimitValues"},
     {0x3A,  WrapI_VUVI<GetResourceLimitCurrentValues>,  "GetResourceLimitCurrentValues"},
@@ -264,7 +356,7 @@ const HLE::FunctionDef Syscall_Table[] = {
 };
 
 void Register() {
-    HLE::RegisterModule("SyscallTable", ARRAY_SIZE(Syscall_Table), Syscall_Table);
+    HLE::RegisterModule("SVC_Table", ARRAY_SIZE(SVC_Table), SVC_Table);
 }
 
 } // namespace
diff --git a/src/core/hle/svc.h b/src/core/hle/svc.h
new file mode 100644
index 000000000..5c35977d1
--- /dev/null
+++ b/src/core/hle/svc.h
@@ -0,0 +1,48 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.  
+
+#pragma once
+
+#include "common/common_types.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// SVC types
+
+struct MemoryInfo {
+    u32 base_address;
+    u32 size;
+    u32 permission;
+    u32 state;
+};
+
+struct PageInfo {
+    u32 flags;
+};
+
+struct ThreadContext {
+    u32 cpu_registers[13];
+    u32 sp;
+    u32 lr;
+    u32 pc;
+    u32 cpsr;
+    u32 fpu_registers[32];
+    u32 fpscr;
+    u32 fpexc;
+};
+
+enum ResetType {
+    RESETTYPE_ONESHOT,
+    RESETTYPE_STICKY,
+    RESETTYPE_PULSE,
+    RESETTYPE_MAX_BIT = (1u << 31),
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Namespace SVC
+
+namespace SVC {
+
+void Register();
+
+} // namespace
diff --git a/src/core/hle/syscall.h b/src/core/hle/syscall.h
deleted file mode 100644
index 7a94e0136..000000000
--- a/src/core/hle/syscall.h
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2
-// Refer to the license.txt file included.  
-
-#pragma once
-
-#include "common/common_types.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// Namespace Syscall
-
-namespace Syscall {
-
-typedef u32 Handle;
-typedef s32 Result;
-
-void Register();
-
-} // namespace
diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp
index 6468053f2..b57563a73 100644
--- a/src/core/hw/lcd.cpp
+++ b/src/core/hw/lcd.cpp
@@ -11,6 +11,8 @@
 
 #include "video_core/video_core.h"
 
+#include "core/hle/kernel/thread.h"
+
 namespace LCD {
 
 Registers g_regs;
@@ -130,9 +132,11 @@ template void Write<u8>(u32 addr, const u8 data);
 void Update() {
     u64 current_ticks = Core::g_app_core->GetTicks();
 
+    // Fake a vertical blank
     if ((current_ticks - g_last_ticks) >= kFrameTicks) {
         g_last_ticks = current_ticks;
         VideoCore::g_renderer->SwapBuffers();
+        Kernel::WaitCurrentThread(WAITTYPE_VBLANK);
     }
 }
 
diff --git a/src/core/loader.cpp b/src/core/loader.cpp
index 8756588ae..ff1c873bb 100644
--- a/src/core/loader.cpp
+++ b/src/core/loader.cpp
@@ -10,7 +10,7 @@
 #include "core/core.h"
 #include "core/file_sys/directory_file_system.h"
 #include "core/elf/elf_reader.h"
-
+#include "core/hle/kernel/kernel.h"
 #include "core/mem_map.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -56,7 +56,7 @@ bool Load_ELF(std::string &filename) {
         elf_reader = new ElfReader(buffer);
         elf_reader->LoadInto(0x00100000);
 
-        Core::g_app_core->SetPC(elf_reader->GetEntryPoint());
+        Kernel::LoadExec(elf_reader->GetEntryPoint());
 
         delete[] buffer;
         delete elf_reader;
@@ -89,11 +89,11 @@ bool Load_DAT(std::string &filename) {
         * but for the sake of making it easier... we'll temporarily/hackishly
         * allow it. No sense in making a proper reader for this.
         */
-        u32 entrypoint = 0x00100000; // write to same entrypoint as elf
+        u32 entry_point = 0x00100000; // write to same entrypoint as elf
         u32 payload_offset = 0xA150;
         
         const u8 *src = &buffer[payload_offset];
-        u8 *dst = Memory::GetPointer(entrypoint);
+        u8 *dst = Memory::GetPointer(entry_point);
         u32 srcSize = size - payload_offset; //just load everything...
         u32 *s = (u32*)src;
         u32 *d = (u32*)dst;
@@ -102,7 +102,8 @@ bool Load_DAT(std::string &filename) {
             *d++ = (*s++);
         }
         
-        Core::g_app_core->SetPC(entrypoint);
+        Kernel::LoadExec(entry_point);
+
 
         delete[] buffer;
     }
@@ -131,10 +132,10 @@ bool Load_BIN(std::string &filename) {
 
         f.ReadBytes(buffer, size);
 
-        u32 entrypoint = 0x00100000; // Hardcoded, read from exheader
+        u32 entry_point = 0x00100000; // Hardcoded, read from exheader
         
         const u8 *src = buffer;
-        u8 *dst = Memory::GetPointer(entrypoint);
+        u8 *dst = Memory::GetPointer(entry_point);
         u32 srcSize = size;
         u32 *s = (u32*)src;
         u32 *d = (u32*)dst;
@@ -143,7 +144,7 @@ bool Load_BIN(std::string &filename) {
             *d++ = (*s++);
         }
         
-        Core::g_app_core->SetPC(entrypoint);
+        Kernel::LoadExec(entry_point);
 
         delete[] buffer;
     }
@@ -186,6 +187,9 @@ FileType IdentifyFile(std::string &filename) {
     else if (!strcasecmp(extension.c_str(), ".elf")) {
         return FILETYPE_CTR_ELF; // TODO(bunnei): Do some filetype checking :p
     }
+    else if (!strcasecmp(extension.c_str(), ".axf")) {
+        return FILETYPE_CTR_ELF; // TODO(bunnei): Do some filetype checking :p
+    }
     else if (!strcasecmp(extension.c_str(), ".bin")) {
         return FILETYPE_CTR_BIN;
     }
diff --git a/src/core/mem_map.cpp b/src/core/mem_map.cpp
index 59560b87d..c45746be9 100644
--- a/src/core/mem_map.cpp
+++ b/src/core/mem_map.cpp
@@ -17,6 +17,7 @@ u8*    g_base                   = NULL;         ///< The base pointer to the aut
 MemArena g_arena;                               ///< The MemArena class
 
 u8* g_exefs_code                = NULL;         ///< ExeFS:/.code is loaded here
+u8* g_system_mem                = NULL;         ///< System memory
 u8* g_heap                      = NULL;         ///< Application heap (main memory)
 u8* g_heap_gsp                  = NULL;         ///< GSP heap (main memory)
 u8* g_vram                      = NULL;         ///< Video memory (VRAM) pointer
@@ -27,6 +28,7 @@ u8* g_physical_bootrom          = NULL;         ///< Bootrom physical memory
 u8* g_uncached_bootrom          = NULL;
 
 u8* g_physical_exefs_code       = NULL;         ///< Phsical ExeFS:/.code is loaded here
+u8* g_physical_system_mem       = NULL;         ///< System physical memory
 u8* g_physical_fcram            = NULL;         ///< Main physical memory (FCRAM)
 u8* g_physical_heap_gsp         = NULL;         ///< GSP heap physical memory
 u8* g_physical_vram             = NULL;         ///< Video physical memory (VRAM)
@@ -39,6 +41,7 @@ static MemoryView g_views[] = {
     {&g_vram,       &g_physical_vram,       VRAM_VADDR,             VRAM_SIZE,          0},
     {&g_heap,       &g_physical_fcram,      HEAP_VADDR,             HEAP_SIZE,          MV_IS_PRIMARY_RAM},
     {&g_shared_mem, &g_physical_shared_mem, SHARED_MEMORY_VADDR,    SHARED_MEMORY_SIZE, 0},
+    {&g_system_mem, &g_physical_system_mem, SYSTEM_MEMORY_VADDR,    SYSTEM_MEMORY_SIZE,    0},
     {&g_kernel_mem, &g_physical_kernel_mem, KERNEL_MEMORY_VADDR,    KERNEL_MEMORY_SIZE, 0},
     {&g_heap_gsp,   &g_physical_heap_gsp,   HEAP_GSP_VADDR,         HEAP_GSP_SIZE,      0},
 };
diff --git a/src/core/mem_map.h b/src/core/mem_map.h
index af2212a5f..12d497ef3 100644
--- a/src/core/mem_map.h
+++ b/src/core/mem_map.h
@@ -47,6 +47,12 @@ enum {
     EXEFS_CODE_VADDR_END    = (EXEFS_CODE_VADDR + EXEFS_CODE_SIZE),
     EXEFS_CODE_MASK         = 0x03FFFFFF,
 
+    // Region of FCRAM used by system
+    SYSTEM_MEMORY_SIZE      = 0x02C00000,   ///< 44MB
+    SYSTEM_MEMORY_VADDR     = 0x04000000,
+    SYSTEM_MEMORY_VADDR_END = (SYSTEM_MEMORY_VADDR + SYSTEM_MEMORY_SIZE),
+    SYSTEM_MEMORY_MASK      = 0x03FFFFFF,
+
     HEAP_SIZE               = FCRAM_SIZE,   ///< Application heap size
     //HEAP_PADDR              = HEAP_GSP_SIZE,
     //HEAP_PADDR_END          = (HEAP_PADDR + HEAP_SIZE),
@@ -116,6 +122,7 @@ extern u8* g_heap;          ///< Application heap (main memory)
 extern u8* g_vram;          ///< Video memory (VRAM)
 extern u8* g_shared_mem;    ///< Shared memory
 extern u8* g_kernel_mem;    ///< Kernel memory
+extern u8* g_system_mem;    ///< System memory
 extern u8* g_exefs_code;    ///< ExeFS:/.code is loaded here
 
 void Init();
diff --git a/src/core/mem_map_funcs.cpp b/src/core/mem_map_funcs.cpp
index 8ab647714..86e9eaa20 100644
--- a/src/core/mem_map_funcs.cpp
+++ b/src/core/mem_map_funcs.cpp
@@ -73,6 +73,10 @@ inline void _Read(T &var, const u32 addr) {
     } else if ((vaddr >= SHARED_MEMORY_VADDR)  && (vaddr < SHARED_MEMORY_VADDR_END)) {
         var = *((const T*)&g_shared_mem[vaddr & SHARED_MEMORY_MASK]);
 
+    // System memory
+    } else if ((vaddr >= SYSTEM_MEMORY_VADDR)  && (vaddr < SYSTEM_MEMORY_VADDR_END)) {
+        var = *((const T*)&g_system_mem[vaddr & SYSTEM_MEMORY_MASK]);
+
     // Config memory
     } else if ((vaddr >= CONFIG_MEMORY_VADDR)  && (vaddr < CONFIG_MEMORY_VADDR_END)) {
         ConfigMem::Read<T>(var, vaddr);
@@ -115,6 +119,10 @@ inline void _Write(u32 addr, const T data) {
     } else if ((vaddr >= SHARED_MEMORY_VADDR)  && (vaddr < SHARED_MEMORY_VADDR_END)) {
         *(T*)&g_shared_mem[vaddr & SHARED_MEMORY_MASK] = data;
 
+    // System memory
+    } else if ((vaddr >= SYSTEM_MEMORY_VADDR)  && (vaddr < SYSTEM_MEMORY_VADDR_END)) {
+         *(T*)&g_system_mem[vaddr & SYSTEM_MEMORY_MASK] = data;
+
     // VRAM
     } else if ((vaddr >= VRAM_VADDR)  && (vaddr < VRAM_VADDR_END)) {
         *(T*)&g_vram[vaddr & VRAM_MASK] = data;
@@ -153,9 +161,13 @@ u8 *GetPointer(const u32 addr) {
         return g_heap + (vaddr & HEAP_MASK);
 
     // Shared memory
-    } else if ((vaddr > SHARED_MEMORY_VADDR)  && (vaddr < SHARED_MEMORY_VADDR_END)) {
+    } else if ((vaddr >= SHARED_MEMORY_VADDR)  && (vaddr < SHARED_MEMORY_VADDR_END)) {
         return g_shared_mem + (vaddr & SHARED_MEMORY_MASK);
 
+    // System memory
+    } else if ((vaddr >= SYSTEM_MEMORY_VADDR)  && (vaddr < SYSTEM_MEMORY_VADDR_END)) {
+         return g_system_mem + (vaddr & SYSTEM_MEMORY_MASK);
+
     // VRAM
     } else if ((vaddr > VRAM_VADDR)  && (vaddr < VRAM_VADDR_END)) {
         return g_vram + (vaddr & VRAM_MASK);