userland: replace legacy cc with native TCC and vendor submodule

2026-05-30 10:26:59 +00:00 · 2026-05-09 01:54:09 +02:00
parent e2ecef39e6
commit 93722faa91
546 changed files with 145840 additions and 564 deletions
--- a/src/userland/cli/third_party/tcc/lib/Makefile
+++ b/src/userland/cli/third_party/tcc/lib/Makefile
@@ -0,0 +1,106 @@
+#
+# Tiny C Compiler Makefile for libtcc1.a
+#
+
+TOP = ..
+include $(TOP)/Makefile
+VPATH = $(TOPSRC)/lib $(TOPSRC)/win32/lib
+T = $(or $(CROSS_TARGET),$(NATIVE_TARGET),unknown)
+X = $(if $(CROSS_TARGET),$(CROSS_TARGET)-)
+XCFG = $(or $(findstring -win,$T),-unx)
+S = $(if $(findstring yes,$(SILENT)),@$(info * $@))
+
+TCC = $(TOP)/$(X)tcc$(EXESUF)
+XTCC ?= $(TOP)/$(X)tcc$(EXESUF)
+XCC = $(XTCC)
+XAR = $(XTCC) -ar
+XFLAGS-unx = -B$(TOPSRC)
+XFLAGS-win = -B$(TOPSRC)/win32 -I$(TOPSRC)/include
+XFLAGS = $(XFLAGS$(XCFG))
+BFLAGS = -bt
+
+# in order to use gcc, type: make <target>-libtcc1-usegcc=yes
+arm-libtcc1-usegcc ?= no
+
+# This makes bounds checking 40%..60% faster.
+#x86_64-libtcc1-usegcc=yes
+#i386-libtcc1-usegcc=yes
+
+ifeq "$($(T)-libtcc1-usegcc)" "yes"
+ XCC = $(CC)
+ XAR = $(AR)
+ XFLAGS = $(CFLAGS) -fPIC -fno-omit-frame-pointer -Wno-unused-function -Wno-unused-variable
+ BFLAGS = $(if $(CONFIG_dwarf),-gdwarf,-gstabs)
+endif
+
+XFLAGS += -I$(TOP)
+
+I386_O = libtcc1.o $(COMMON_O)
+X86_64_O = libtcc1.o $(COMMON_O)
+ARM_O = libtcc1.o armeabi.o armflush.o $(COMMON_O)
+ARM64_O = lib-arm64.o $(COMMON_O)
+RISCV64_O = lib-arm64.o $(COMMON_O)
+COMMON_O = stdatomic.o atomic.o builtin.o alloca.o alloca-bt.o
+WIN_O = crt1.o crt1w.o wincrt1.o wincrt1w.o dllcrt1.o dllmain.o winex.o
+LIN_O = dsohandle.o
+OSX_O =
+
+# backtrace/bcheck/run only for native compiler
+Nat = $(if $X,no,)
+Cbt = $(Nat)$(subst yes,,$(CONFIG_backtrace))
+Cbc = $(Cbt)$(subst yes,,$(CONFIG_bcheck))
+
+$(Nat)COMMON_O += runmain.o tcov.o
+$(Cbt)COMMON_O += bt-exe.o bt-log.o
+$(Cbt)WIN_O += bt-dll.o
+$(Cbc)COMMON_O += bcheck.o
+
+# not in libtcc1.a
+EXTRA_O = runmain.o bt-exe.o bt-dll.o bt-log.o bcheck.o
+
+OBJ-i386 = $(I386_O) pic86.o $(LIN_O)
+OBJ-x86_64 = $(X86_64_O) va_list.o $(LIN_O)
+OBJ-x86_64-osx = $(X86_64_O) va_list.o $(OSX_O)
+OBJ-i386-win32 = $(I386_O) chkstk.o $(WIN_O)
+OBJ-x86_64-win32 = $(X86_64_O) chkstk.o $(WIN_O)
+OBJ-arm64 = $(ARM64_O) armflush.o $(LIN_O)
+OBJ-arm64-osx = $(ARM64_O) $(OSX_O)
+OBJ-arm64-win32 = $(ARM64_O) chkstk.o $(WIN_O)
+OBJ-arm = $(ARM_O) $(LIN_O)
+OBJ-arm-fpa = $(OBJ-arm)
+OBJ-arm-fpa-ld = $(OBJ-arm)
+OBJ-arm-vfp = $(OBJ-arm)
+OBJ-arm-eabi = $(OBJ-arm)
+OBJ-arm-eabihf = $(OBJ-arm)
+OBJ-arm-wince = $(ARM_O) $(WIN_O)
+OBJ-riscv64 = $(RISCV64_O) $(LIN_O)
+
+OBJ-extra = $(filter $(EXTRA_O),$(OBJ-$T))
+OBJ-libtcc1 = $(addprefix $(X),$(filter-out $(OBJ-extra),$(OBJ-$T)))
+
+ALL = $(addprefix $(TOP)/,$(X)libtcc1.a $(OBJ-extra))
+
+all: $(ALL)
+
+$(TOP)/$(X)libtcc1.a : $(OBJ-libtcc1) $(TCC)
+	$S$(XAR) rcs $@ $(OBJ-libtcc1)
+
+$(X)%.o : %.c $(TCC)
+	$S$(XCC) -c $< -o $@ $(XFLAGS)
+
+$(X)%.o : %.S $(TCC)
+	$S$(XCC) -c $< -o $@ $(XFLAGS)
+
+$(TOP)/%.o : %.c $(TCC)
+	$S$(XCC) -c $< -o $@ $(XFLAGS)
+
+$(TOP)/bcheck.o : XFLAGS += $(BFLAGS)
+
+$(X)crt1w.o : crt1.c
+$(X)wincrt1w.o : wincrt1.c
+
+# don't try to make it
+$(TCC) : ;
+
+clean :
+	rm -f *.o $(addprefix $(TOP)/,*libtcc1.a $(EXTRA_O))
--- a/src/userland/cli/third_party/tcc/lib/alloca-bt.S
+++ b/src/userland/cli/third_party/tcc/lib/alloca-bt.S
@@ -0,0 +1,200 @@
+/* ---------------------------------------------- */
+/* alloca-bt.S */
+
+#ifdef __leading_underscore
+# define _(s) _##s
+#else
+# define _(s) s
+#endif
+
+/* ---------------------------------------------- */
+#if defined __i386__
+
+.globl _(__bound_alloca)
+_(__bound_alloca):
+    pop     %edx
+    pop     %eax
+    mov     %eax, %ecx
+    test    %eax,%eax
+    jz      p6
+    add     $3 + 1,%eax
+    and     $-4,%eax
+
+#ifdef _WIN32
+p4:
+    cmp     $4096,%eax
+    jb      p5
+    test    %eax,-4096(%esp)
+    sub     $4096,%esp
+    sub     $4096,%eax
+    jmp p4
+
+p5:
+#endif
+
+    sub     %eax,%esp
+    mov     %esp,%eax
+
+    push    %edx
+    push    %eax
+    push    %ecx
+    push    %eax
+    call    _(__bound_new_region)
+    add     $8, %esp
+    pop     %eax
+    pop     %edx
+
+p6:
+    push    %edx
+    push    %edx
+    ret
+
+/* ---------------------------------------------- */
+#elif defined __x86_64__
+
+.globl _(__bound_alloca)
+_(__bound_alloca):
+#ifdef _WIN32
+    inc %rcx            # add one extra to separate regions
+    jmp _(alloca)
+.globl _(__bound_alloca_nr)
+_(__bound_alloca_nr):
+    dec     %rcx
+    push    %rax
+    mov     %rcx,%rdx
+    mov     %rax,%rcx
+    sub     $32,%rsp
+    call    _(__bound_new_region)
+    add     $32,%rsp
+    pop     %rax
+    ret
+#else
+    pop     %rdx
+    mov     %rdi,%rax
+    and     %eax,%eax
+    jz      p3
+    mov     %rax,%rsi	# size, a second parm to the __bound_new_region
+    add     $15 + 1,%rax  # add one extra to separate regions
+    and     $-16,%rax
+
+    sub     %rax,%rsp
+    mov     %rsp,%rdi	# pointer, a first parm to the __bound_new_region
+    mov     %rsp,%rax
+
+    push    %rdx
+    push    %rax
+    call    _(__bound_new_region)
+    pop     %rax
+    pop     %rdx
+
+p3:
+    push    %rdx
+    ret
+#endif
+
+/* ---------------------------------------------- */
+#elif defined __arm__
+
+.globl _(__bound_alloca)
+_(__bound_alloca):
+    mov r1, r0
+    add r0, r0, #1
+    rsb sp, r0, sp
+    bic sp, sp, #7
+    mov r0, sp
+    push { lr }
+    bl _(__bound_new_region)
+    pop { lr }
+    mov r0, sp
+    mov pc, lr
+
+/* ---------------------------------------------- */
+#elif defined __aarch64__ || defined __arm64__
+
+.globl _(__bound_alloca)
+_(__bound_alloca):
+#ifdef __TINYC__
+    .int 0xaa0003e1
+    .int 0x91004000
+    .int 0x927cec00
+#ifdef _WIN32
+    .int 0xb4000160
+    .int 0xd2820002
+    .int 0xeb02001f
+    .int 0x540000c3
+    .int 0xcb2263e3
+    .int 0xf940007f
+    .int 0xcb2263ff
+    .int 0xcb020000
+    .int 0x17fffffa
+    .int 0xb4000040
+#endif
+    .int 0xcb2063ff
+    .int 0x910003e0
+    .int 0xa9bf7bfd
+    .reloc ., R_AARCH64_CALL26,  _(__bound_new_region)
+    .int 0x94000000
+    .int 0xa8c17bfd
+    .int 0x910003e0
+    .int 0xd65f03c0
+#else
+    mov x1, x0
+    add x0, x0, #16     // Round up to 16-byte boundary
+    and x0, x0, #-16    // Ensure 16-byte alignment
+#ifdef _WIN32
+    cbz x0, p100        // If size is 0, skip to return
+    // Windows requires page-wise allocation with stack probing
+    mov x2, #4096       // Page size = 4096 bytes
+
+p101:
+    cmp x0, x2          // Compare remaining size with page size
+    b.lo    p102        // If less than page, jump to remainder
+
+    // Probe first, then allocate
+    sub x3, sp, x2      // Calculate guard page address (sp - 4096)
+    ldr xzr, [x3]       // Touch guard page FIRST
+    sub sp, sp, x2      // THEN allocate the page
+
+    sub x0, x0, x2      // Decrement remaining size
+    b   p101            // Continue loop
+
+p102:
+    // Allocate remaining bytes (less than one page)
+    cbz x0, p100        // If no remaining bytes, skip
+    sub sp, sp, x0      // Allocate remaining space
+#else
+    // Non-Windows: simple one-time allocation
+    sub sp, sp, x0      // Allocate space on stack
+#endif
+
+p100:
+    mov x0, sp          // Return allocated address
+    stp x29, x30, [sp, #-16]!
+    bl _(__bound_new_region)
+    ldp x29, x30, [sp], #16
+    mov x0, sp          // Return allocated address
+    ret                 // Return to caller
+#endif
+
+/* ---------------------------------------------- */
+#elif defined __riscv
+
+.globl _(__bound_alloca)
+_(__bound_alloca):
+    mv     a1, a0
+    sub    sp, sp, a0
+    addi   sp, sp, -16
+    andi   sp, sp, -16
+    add    a0, sp, zero
+    addi   sp,sp,-16
+    sd     s0,0(sp)
+    sd     ra,8(sp)
+    jal    _(__bound_new_region)
+    ld     s0,0(sp)
+    ld     ra,8(sp)
+    addi   sp,sp,16
+    add    a0, sp, zero
+    ret
+
+/* ---------------------------------------------- */
+#endif
--- a/src/userland/cli/third_party/tcc/lib/alloca.S
+++ b/src/userland/cli/third_party/tcc/lib/alloca.S
@@ -0,0 +1,148 @@
+/* ---------------------------------------------- */
+/* alloca.S */
+
+#ifdef __leading_underscore
+# define _(s) _##s
+#else
+# define _(s) s
+#endif
+
+/* ---------------------------------------------- */
+#if defined __i386__
+
+.globl _(alloca), _(__alloca)
+_(alloca):
+_(__alloca):
+    pop     %edx
+    pop     %eax
+    add     $3,%eax
+    and     $-4,%eax
+    jz      p3
+
+#ifdef _WIN32
+p1:
+    cmp     $4096,%eax
+    jb      p2
+    test    %eax,-4096(%esp)
+    sub     $4096,%esp
+    sub     $4096,%eax
+    jmp     p1
+p2:
+#endif
+    sub     %eax,%esp
+    mov     %esp,%eax
+p3:
+    push    %edx
+    push    %edx
+    ret
+
+/* ---------------------------------------------- */
+#elif defined __x86_64__
+
+.globl _(alloca)
+_(alloca):
+    pop     %rdx
+#ifdef _WIN32
+    mov     %rcx,%rax
+#else
+    mov     %rdi,%rax
+#endif
+    add     $15,%rax
+    and     $-16,%rax
+    jz      p3
+
+#ifdef _WIN32
+p1:
+    cmp     $4096,%rax
+    jb      p2
+    test    %rax,-4096(%rsp)
+    sub     $4096,%rsp
+    sub     $4096,%rax
+    jmp p1
+p2:
+#endif
+    sub     %rax,%rsp
+    mov     %rsp,%rax
+p3:
+    push    %rdx
+    ret
+
+/* ---------------------------------------------- */
+#elif defined __arm__
+
+.globl _(alloca)
+_(alloca):
+    rsb sp, r0, sp
+    bic sp, sp, #7
+    mov r0, sp
+    mov pc, lr
+
+/* ---------------------------------------------- */
+#elif defined __aarch64__ || defined __arm64__
+
+.globl _(alloca)
+_(alloca):
+#ifdef __TINYC__
+    .int 0x91003c00
+    .int 0x927cec00
+#ifdef _WIN32
+    .int 0xb4000160
+    .int 0xd2820001
+    .int 0xeb01001f
+    .int 0x540000c3
+    .int 0xcb2163e2
+    .int 0xf940005f
+    .int 0xcb2163ff
+    .int 0xcb010000
+    .int 0x17fffffa
+    .int 0xb4000040
+#endif
+    .int 0xcb2063ff
+    .int 0x910003e0
+    .int 0xd65f03c0
+#else
+    add x0, x0, #15     // Round up to 16-byte boundary
+    and x0, x0, #-16    // Ensure 16-byte alignment
+#ifdef _WIN32
+    cbz x0, p100        // If size is 0, skip to return
+    // Windows requires page-wise allocation with stack probing
+    mov x1, #4096       // Page size = 4096 bytes
+
+p101:
+    cmp x0, x1          // Compare remaining size with page size
+    b.lo    p102        // If less than page, jump to remainder
+
+    // Probe first, then allocate
+    sub x2, sp, x1      // Calculate guard page address (sp - 4096)
+    ldr xzr, [x2]       // Touch guard page FIRST
+    sub sp, sp, x1      // THEN allocate the page
+
+    sub x0, x0, x1      // Decrement remaining size
+    b   p101            // Continue loop
+
+p102:
+    // Allocate remaining bytes (less than one page)
+    cbz x0, p100        // If no remaining bytes, skip
+    sub sp, sp, x0      // Allocate remaining space
+#else
+    // Non-Windows: simple one-time allocation
+    sub sp, sp, x0      // Allocate space on stack
+#endif
+
+p100:
+    mov x0, sp          // Return allocated address
+    ret                 // Return to caller
+#endif
+
+/* ---------------------------------------------- */
+#elif defined __riscv
+
+.globl _(alloca)
+_(alloca):
+    sub    sp, sp, a0
+    addi   sp, sp, -15
+    andi   sp, sp, -16
+    add    a0, sp, zero
+    ret
+
+#endif
--- a/src/userland/cli/third_party/tcc/lib/armeabi.c
+++ b/src/userland/cli/third_party/tcc/lib/armeabi.c
@@ -0,0 +1,642 @@
+/* TCC ARM runtime EABI
+   Copyright (C) 2013 Thomas Preud'homme
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.*/
+
+#ifdef __TINYC__
+#define INT_MIN (-2147483647 - 1)
+#define INT_MAX 2147483647
+#define UINT_MAX 0xffffffff
+#define LONG_MIN (-2147483647L - 1)
+#define LONG_MAX 2147483647L
+#define ULONG_MAX 0xffffffffUL
+#define LLONG_MAX 9223372036854775807LL
+#define LLONG_MIN (-9223372036854775807LL - 1)
+#define ULLONG_MAX 0xffffffffffffffffULL
+#else
+#include <limits.h>
+#endif
+
+/* We rely on the little endianness and EABI calling convention for this to
+   work */
+
+typedef struct double_unsigned_struct {
+    unsigned low;
+    unsigned high;
+} double_unsigned_struct;
+
+typedef struct unsigned_int_struct {
+    unsigned low;
+    int high;
+} unsigned_int_struct;
+
+#define REGS_RETURN(name, type) \
+    static void name ## _return(type ret) {}
+
+
+/* Float helper functions */
+
+#define FLOAT_EXP_BITS 8
+#define FLOAT_FRAC_BITS 23
+
+#define DOUBLE_EXP_BITS 11
+#define DOUBLE_FRAC_BITS 52
+
+#define ONE_EXP(type) ((1 << (type ## _EXP_BITS - 1)) - 1)
+
+REGS_RETURN(unsigned_int_struct, unsigned_int_struct)
+REGS_RETURN(double_unsigned_struct, double_unsigned_struct)
+
+/* float -> integer: (sign) 1.fraction x 2^(exponent - exp_for_one) */
+
+
+/* float to [unsigned] long long conversion */
+#define DEFINE__AEABI_F2XLZ(name, with_sign)                                 \
+void __aeabi_ ## name(unsigned val)                                          \
+{                                                                            \
+    int exp, high_shift, sign;                                               \
+    double_unsigned_struct ret;                                              \
+                                                                             \
+    /* compute sign */                                                       \
+    sign = val >> 31;                                                        \
+                                                                             \
+    /* compute real exponent */                                              \
+    exp = val >> FLOAT_FRAC_BITS;                                            \
+    exp &= (1 << FLOAT_EXP_BITS) - 1;                                        \
+    exp -= ONE_EXP(FLOAT);                                                   \
+                                                                             \
+    /* undefined behavior if truncated value cannot be represented */        \
+    if (with_sign) {                                                         \
+        if (exp > 62) /* |val| too big, double cannot represent LLONG_MAX */ \
+            return;                                                          \
+    } else {                                                                 \
+        if ((sign && exp >= 0) || exp > 63) /* if val < 0 || val too big */  \
+            return;                                                          \
+    }                                                                        \
+                                                                             \
+    val &= (1 << FLOAT_FRAC_BITS) - 1;                                       \
+    if (exp >= 32) {                                                         \
+        ret.high = 1 << (exp - 32);                                          \
+        if (exp - 32 >= FLOAT_FRAC_BITS) {                                   \
+            ret.high |= val << (exp - 32 - FLOAT_FRAC_BITS);                 \
+            ret.low = 0;                                                     \
+        } else {                                                             \
+            high_shift = FLOAT_FRAC_BITS - (exp - 32);                       \
+            ret.high |= val >> high_shift;                                   \
+            ret.low = val << (32 - high_shift);                              \
+        }                                                                    \
+    } else {                                                                 \
+        ret.high = 0;                                                        \
+        ret.low = 1 << exp;                                                  \
+        if (exp > FLOAT_FRAC_BITS)                                           \
+            ret.low |= val << (exp - FLOAT_FRAC_BITS);                       \
+        else                                                                 \
+            ret.low |= val >> (FLOAT_FRAC_BITS - exp);                       \
+    }                                                                        \
+                                                                             \
+    /* encode negative integer using 2's complement */                       \
+    if (with_sign && sign) {                                                 \
+        ret.low = ~ret.low;                                                  \
+        ret.high = ~ret.high;                                                \
+        if (ret.low == UINT_MAX) {                                           \
+            ret.low = 0;                                                     \
+            ret.high++;                                                      \
+        } else                                                               \
+            ret.low++;                                                       \
+    }                                                                        \
+                                                                             \
+    double_unsigned_struct_return(ret);                                      \
+}
+
+/* float to unsigned long long conversion */
+DEFINE__AEABI_F2XLZ(f2ulz, 0)
+
+/* float to long long conversion */
+DEFINE__AEABI_F2XLZ(f2lz, 1)
+
+/* double to [unsigned] long long conversion */
+#define DEFINE__AEABI_D2XLZ(name, with_sign)                                 \
+void __aeabi_ ## name(double_unsigned_struct val)                            \
+{                                                                            \
+    int exp, high_shift, sign;                                               \
+    double_unsigned_struct ret;                                              \
+                                                                             \
+    if ((val.high & ~0x80000000) == 0 && val.low == 0) {                     \
+        ret.low = ret.high = 0;                                              \
+        goto _ret_;                                                          \
+    }                                                                        \
+                                                                             \
+    /* compute sign */                                                       \
+    sign = val.high >> 31;                                                   \
+                                                                             \
+    /* compute real exponent */                                              \
+    exp = (val.high >> (DOUBLE_FRAC_BITS - 32));                             \
+    exp &= (1 << DOUBLE_EXP_BITS) - 1;                                       \
+    exp -= ONE_EXP(DOUBLE);                                                  \
+                                                                             \
+    /* undefined behavior if truncated value cannot be represented */        \
+    if (with_sign) {                                                         \
+        if (exp > 62) /* |val| too big, double cannot represent LLONG_MAX */ \
+            return;                                                          \
+    } else {                                                                 \
+        if ((sign && exp >= 0) || exp > 63) /* if val < 0 || val too big */  \
+            return;                                                          \
+    }                                                                        \
+                                                                             \
+    val.high &= (1 << (DOUBLE_FRAC_BITS - 32)) - 1;                          \
+    if (exp >= 32) {                                                         \
+        ret.high = 1 << (exp - 32);                                          \
+        if (exp >= DOUBLE_FRAC_BITS) {                                       \
+            high_shift = exp - DOUBLE_FRAC_BITS;                             \
+            ret.high |= val.high << high_shift;                              \
+            ret.high |= val.low >> (32 - high_shift);                        \
+            ret.low = val.low << high_shift;                                 \
+        } else {                                                             \
+            high_shift = DOUBLE_FRAC_BITS - exp;                             \
+            ret.high |= val.high >> high_shift;                              \
+            ret.low = val.high << (32 - high_shift);                         \
+            ret.low |= val.low >> high_shift;                                \
+        }                                                                    \
+    } else {                                                                 \
+        ret.high = 0;                                                        \
+        ret.low = 1 << exp;                                                  \
+        if (exp > DOUBLE_FRAC_BITS - 32) {                                   \
+            high_shift = exp - (DOUBLE_FRAC_BITS - 32);                      \
+            ret.low |= val.high << high_shift;                               \
+            ret.low |= val.low >> (32 - high_shift);                         \
+        } else                                                               \
+            ret.low |= val.high >> (DOUBLE_FRAC_BITS - 32 - exp);            \
+    }                                                                        \
+                                                                             \
+    /* encode negative integer using 2's complement */                       \
+    if (with_sign && sign) {                                                 \
+        ret.low = ~ret.low;                                                  \
+        ret.high = ~ret.high;                                                \
+        if (ret.low == UINT_MAX) {                                           \
+            ret.low = 0;                                                     \
+            ret.high++;                                                      \
+        } else                                                               \
+            ret.low++;                                                       \
+    }                                                                        \
+                                                                             \
+_ret_:                                                                       \
+    double_unsigned_struct_return(ret);                                      \
+}
+
+/* double to unsigned long long conversion */
+DEFINE__AEABI_D2XLZ(d2ulz, 0)
+
+/* double to long long conversion */
+DEFINE__AEABI_D2XLZ(d2lz, 1)
+
+/* long long to float conversion */
+#define DEFINE__AEABI_XL2F(name, with_sign)                             \
+unsigned __aeabi_ ## name(unsigned long long v)                         \
+{                                                                       \
+    int s /* shift */, flb /* first lost bit */, sign = 0;              \
+    unsigned p = 0 /* power */, ret;                                    \
+    double_unsigned_struct val;                                         \
+                                                                        \
+    /* fraction in negative float is encoded in 1's complement */       \
+    if (with_sign && (v & (1ULL << 63))) {                              \
+        sign = 1;                                                       \
+        v = ~v + 1;                                                     \
+    }                                                                   \
+    val.low = v;                                                        \
+    val.high = v >> 32;                                                 \
+    /* fill fraction bits */                                            \
+    for (s = 31, p = 1 << 31; p && !(val.high & p); s--, p >>= 1);      \
+    if (p) {                                                            \
+        ret = val.high & (p - 1);                                       \
+        if (s < FLOAT_FRAC_BITS) {                                      \
+            ret <<= FLOAT_FRAC_BITS - s;                                \
+            ret |= val.low >> (32 - (FLOAT_FRAC_BITS - s));             \
+            flb = (val.low >> (32 - (FLOAT_FRAC_BITS - s - 1))) & 1;    \
+        } else {                                                        \
+            flb = (ret >> (s - FLOAT_FRAC_BITS - 1)) & 1;               \
+            ret >>= s - FLOAT_FRAC_BITS;                                \
+        }                                                               \
+        s += 32;                                                        \
+    } else {                                                            \
+        for (s = 31, p = 1 << 31; p && !(val.low & p); s--, p >>= 1);   \
+        if (p) {                                                        \
+            ret = val.low & (p - 1);                                    \
+            if (s <= FLOAT_FRAC_BITS) {                                 \
+                ret <<= FLOAT_FRAC_BITS - s;                            \
+                flb = 0;                                                \
+	    } else {                                                    \
+                flb = (ret >> (s - FLOAT_FRAC_BITS - 1)) & 1;           \
+                ret >>= s - FLOAT_FRAC_BITS;                            \
+	    }                                                           \
+        } else                                                          \
+            return 0;                                                   \
+    }                                                                   \
+    if (flb)                                                            \
+        ret++;                                                          \
+                                                                        \
+    /* fill exponent bits */                                            \
+    ret |= (s + ONE_EXP(FLOAT)) << FLOAT_FRAC_BITS;                     \
+                                                                        \
+    /* fill sign bit */                                                 \
+    ret |= sign << 31;                                                  \
+                                                                        \
+    return ret;                                                         \
+}
+
+/* unsigned long long to float conversion */
+DEFINE__AEABI_XL2F(ul2f, 0)
+
+/* long long to float conversion */
+DEFINE__AEABI_XL2F(l2f, 1)
+
+/* long long to double conversion */
+#define __AEABI_XL2D(name, with_sign)                                   \
+void __aeabi_ ## name(unsigned long long v)                             \
+{                                                                       \
+    int s /* shift */, high_shift, sign = 0;                            \
+    unsigned tmp, p = 0;                                                \
+    double_unsigned_struct val, ret;                                    \
+                                                                        \
+    /* fraction in negative float is encoded in 1's complement */       \
+    if (with_sign && (v & (1ULL << 63))) {                              \
+        sign = 1;                                                       \
+        v = ~v + 1;                                                     \
+    }                                                                   \
+    val.low = v;                                                        \
+    val.high = v >> 32;                                                 \
+                                                                        \
+    /* fill fraction bits */                                            \
+    for (s = 31, p = 1 << 31; p && !(val.high & p); s--, p >>= 1);      \
+    if (p) {                                                            \
+        tmp = val.high & (p - 1);                                       \
+        if (s < DOUBLE_FRAC_BITS - 32) {                                \
+            high_shift = DOUBLE_FRAC_BITS - 32 - s;                     \
+            ret.high = tmp << high_shift;                               \
+            ret.high |= val.low >> (32 - high_shift);                   \
+            ret.low = val.low << high_shift;                            \
+        } else {                                                        \
+            high_shift = s - (DOUBLE_FRAC_BITS - 32);                   \
+            ret.high = tmp >> high_shift;                               \
+            ret.low = tmp << (32 - high_shift);                         \
+            ret.low |= val.low >> high_shift;                           \
+            if ((val.low >> (high_shift - 1)) & 1) {                    \
+                if (ret.low == UINT_MAX) {                              \
+                    ret.high++;                                         \
+                    ret.low = 0;                                        \
+		} else                                                  \
+                    ret.low++;                                          \
+            }                                                           \
+        }                                                               \
+        s += 32;                                                        \
+    } else {                                                            \
+        for (s = 31, p = 1 << 31; p && !(val.low & p); s--, p >>= 1);   \
+        if (p) {                                                        \
+            tmp = val.low & (p - 1);                                    \
+            if (s <= DOUBLE_FRAC_BITS - 32) {                           \
+                high_shift = DOUBLE_FRAC_BITS - 32 - s;                 \
+                ret.high = tmp << high_shift;                           \
+                ret.low = 0;                                            \
+	    } else {                                                    \
+                high_shift = s - (DOUBLE_FRAC_BITS - 32);               \
+                ret.high = tmp >> high_shift;                           \
+                ret.low = tmp << (32 - high_shift);                     \
+            }                                                           \
+        } else {                                                        \
+            ret.high = ret.low = 0;                                     \
+            goto _ret_;                                                 \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    /* fill exponent bits */                                            \
+    ret.high |= (s + ONE_EXP(DOUBLE)) << (DOUBLE_FRAC_BITS - 32);       \
+                                                                        \
+    /* fill sign bit */                                                 \
+    ret.high |= sign << 31;                                             \
+                                                                        \
+_ret_:                                                                  \
+    double_unsigned_struct_return(ret);                                 \
+}
+
+/* unsigned long long to double conversion */
+__AEABI_XL2D(ul2d, 0)
+
+/* long long to double conversion */
+__AEABI_XL2D(l2d, 1)
+
+#if 1
+/* Long long helper functions */
+
+/* TODO: add error in case of den == 0 (see §4.3.1 and §4.3.2) */
+
+#define define_aeabi_xdivmod_signed_type(basetype, type) \
+typedef struct type {                                    \
+    basetype quot;                                       \
+    unsigned basetype rem;                               \
+} type
+
+#define define_aeabi_xdivmod_unsigned_type(basetype, type) \
+typedef struct type {                                      \
+    basetype quot;                                         \
+    basetype rem;                                          \
+} type
+
+#define AEABI_UXDIVMOD(name,type, rettype, typemacro)                     \
+static inline rettype aeabi_ ## name (type num, type den)                 \
+{                                                                         \
+    rettype ret;                                                          \
+    type quot = 0;                                                        \
+                                                                          \
+    /* Increase quotient while it is less than numerator */               \
+    while (num >= den) {                                                  \
+        type q = 1;                                                       \
+                                                                          \
+        /* Find closest power of two */                                   \
+        while ((q << 1) * den <= num && q * den <= typemacro ## _MAX / 2) \
+            q <<= 1;                                                      \
+                                                                          \
+        /* Compute difference between current quotient and numerator */   \
+        num -= q * den;                                                   \
+        quot += q;                                                        \
+    }                                                                     \
+    ret.quot = quot;                                                      \
+    ret.rem = num;                                                        \
+    return ret;                                                           \
+}
+
+#define __AEABI_XDIVMOD(name, type, uiname, rettype, urettype, typemacro)     \
+void __aeabi_ ## name(type numerator, type denominator)                       \
+{                                                                             \
+    unsigned type num, den;                                                   \
+    urettype uxdiv_ret;                                                       \
+    rettype ret;                                                              \
+                                                                              \
+    if (numerator >= 0)                                                       \
+      num = numerator;                                                        \
+    else                                                                      \
+      num = 0 - numerator;                                                    \
+    if (denominator >= 0)                                                     \
+      den = denominator;                                                      \
+    else                                                                      \
+      den = 0 - denominator;                                                  \
+    uxdiv_ret = aeabi_ ## uiname(num, den);                                   \
+    /* signs differ */                                                        \
+    if ((numerator & typemacro ## _MIN) != (denominator & typemacro ## _MIN)) \
+        ret.quot = 0 - uxdiv_ret.quot;                                        \
+    else                                                                      \
+        ret.quot = uxdiv_ret.quot;                                            \
+    if (numerator < 0)                                                        \
+        ret.rem = 0 - uxdiv_ret.rem;                                          \
+    else                                                                      \
+        ret.rem = uxdiv_ret.rem;                                              \
+                                                                              \
+    rettype ## _return(ret);                                                  \
+}
+
+define_aeabi_xdivmod_signed_type(long long, lldiv_t);
+define_aeabi_xdivmod_unsigned_type(unsigned long long, ulldiv_t);
+define_aeabi_xdivmod_signed_type(int, idiv_t);
+define_aeabi_xdivmod_unsigned_type(unsigned, uidiv_t);
+
+REGS_RETURN(lldiv_t, lldiv_t)
+REGS_RETURN(ulldiv_t, ulldiv_t)
+REGS_RETURN(idiv_t, idiv_t)
+REGS_RETURN(uidiv_t, uidiv_t)
+
+AEABI_UXDIVMOD(uldivmod, unsigned long long, ulldiv_t, ULLONG)
+
+__AEABI_XDIVMOD(ldivmod, long long, uldivmod, lldiv_t, ulldiv_t, LLONG)
+
+void __aeabi_uldivmod(unsigned long long num, unsigned long long den)
+{
+    ulldiv_t_return(aeabi_uldivmod(num, den));
+}
+
+void __aeabi_llsl(double_unsigned_struct val, int shift)
+{
+    double_unsigned_struct ret;
+
+    if (shift >= 32) {
+        val.high = val.low;
+        val.low = 0;
+        shift -= 32;
+    }
+    if (shift > 0) {
+        ret.low = val.low << shift;
+        ret.high = (val.high << shift) | (val.low >> (32 - shift));
+        double_unsigned_struct_return(ret);
+	return;
+    }
+    double_unsigned_struct_return(val);
+}
+
+#define aeabi_lsr(val, shift, fill, type)                          \
+    type ## _struct ret;                                           \
+                                                                   \
+    if (shift >= 32) {                                             \
+        val.low = val.high;                                        \
+        val.high = fill;                                           \
+        shift -= 32;                                               \
+    }                                                              \
+    if (shift > 0) {                                               \
+        ret.high = val.high >> shift;                              \
+        ret.low = (val.high << (32 - shift)) | (val.low >> shift); \
+        type ## _struct_return(ret);                               \
+	return;                                                    \
+    }                                                              \
+    type ## _struct_return(val);
+
+void __aeabi_llsr(double_unsigned_struct val, int shift)
+{
+    aeabi_lsr(val, shift, 0, double_unsigned);
+}
+
+void __aeabi_lasr(unsigned_int_struct val, int shift)
+{
+    aeabi_lsr(val, shift, val.high >> 31, unsigned_int);
+}
+
+
+/* Integer division functions */
+
+#if 0 /* very slow */
+AEABI_UXDIVMOD(uidivmod, unsigned, uidiv_t, UINT)
+
+int __aeabi_idiv(int numerator, int denominator)
+{
+    unsigned num, den;
+    uidiv_t ret;
+
+    if (numerator >= 0)
+        num = numerator;
+    else
+        num = 0 - numerator;
+    if (denominator >= 0)
+        den = denominator;
+    else
+        den = 0 - denominator;
+    ret = aeabi_uidivmod(num, den);
+    if ((numerator & INT_MIN) != (denominator & INT_MIN)) /* signs differ */
+        ret.quot *= -1;
+    return ret.quot;
+}
+
+unsigned __aeabi_uidiv(unsigned num, unsigned den)
+{
+    return aeabi_uidivmod(num, den).quot;
+}
+
+__AEABI_XDIVMOD(idivmod, int, uidivmod, idiv_t, uidiv_t, INT)
+
+void __aeabi_uidivmod(unsigned num, unsigned den)
+{
+    uidiv_t_return(aeabi_uidivmod(num, den));
+}
+#else
+# define UIDIVMOD_ASM 1
+#endif
+
+/* Some targets do not have all eabi calls (OpenBSD) */
+typedef __SIZE_TYPE__ size_t;
+extern void *memcpy(void *dest, const void *src, size_t n);
+extern void *memmove(void *dest, const void *src, size_t n);
+extern void *memset(void *s, int c, size_t n);
+
+void *
+__aeabi_memcpy (void *dest, const void *src, size_t n)
+{
+    return memcpy (dest, src, n);
+}
+
+void *
+__aeabi_memmove (void *dest, const void *src, size_t n)
+{
+    return memmove (dest, src, n);
+}
+
+void *
+__aeabi_memmove4 (void *dest, const void *src, size_t n)
+{
+    return memmove (dest, src, n);
+}
+
+void *
+__aeabi_memmove8 (void *dest, const void *src, size_t n)
+{
+    return memmove (dest, src, n);
+}
+
+void *
+__aeabi_memset (void *s, size_t n, int c)
+{
+    return memset (s, c, n);
+}
+
+/* ***************************************************************** */
+#if UIDIVMOD_ASM
+#include <config.h>
+__asm__(
+   "\n  .global __aeabi_idiv, __aeabi_idivmod"
+   "\n  .global __aeabi_uidiv, __aeabi_uidivmod"
+#if __ARM_FEATURE_IDIV
+   "\n__aeabi_idiv:"
+   "\n__aeabi_idivmod:"
+   "\n  mov     r2, r0"
+   "\n  sdiv    r0, r0, r1"
+   "\n  mls     r1, r1, r0, r2"
+   "\n  bx      lr"
+
+   "\n__aeabi_uidiv:"
+   "\n__aeabi_uidivmod:"
+   "\n  mov     r2, r0"
+   "\n  udiv    r0, r0, r1"
+   "\n  mls     r1, r1, r0, r2"
+   "\n  bx      lr"
+#else
+/* Runtime ABI for the ARM Cortex-M0
+ * idivmod.S: signed 32 bit division (quotient and remainder)
+ *
+ * Copyright (c) 2012 Jörg Mische <bobbl@gmx.de>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ */
+   "\n__aeabi_idiv:"
+   "\n__aeabi_idivmod:"
+   "\n  cmp     r0, #0"
+   "\n  bge     .Lnumerator_pos"
+   "\n  rsb     r0, r0, #0" // num = -num
+   "\n  cmp     r1, #0"
+   "\n  bge     .Lboth_neg"
+   "\n  rsb     r1, r1, #0" // den = -den
+   "\n  push    {lr}"
+   "\n  bl      __aeabi_uidivmod"
+   "\n  rsb     r1, r1, #0" // rem = -rem
+   "\n  pop     {pc}"
+   "\n.Lboth_neg:"
+   "\n  push    {lr}"
+   "\n  bl      __aeabi_uidivmod"
+   "\n  rsb     r0, r0, #0" // quot = -quot
+   "\n  rsb     r1, r1, #0" // rem = -rem
+   "\n  pop     {pc}"
+   "\n.Ldenom_neg:"
+   "\n  rsb     r1, r1, #0" // den = -den
+   "\n  push    {lr}"
+   "\n  bl      __aeabi_uidivmod"
+   "\n  rsb     r0, r0, #0" // quot = -quot
+   "\n  pop     {pc}"
+   "\n.Lnumerator_pos:"
+   "\n  cmp     r1, #0"
+   "\n  blt     .Ldenom_neg"
+
+   // Divide r0 by r1 and return the quotient in r0 and the remainder in r1
+   "\n__aeabi_uidiv:"
+   "\n__aeabi_uidivmod:"
+   // Shift left the denominator until it is greater than the numerator
+   "\n  mov     r2, #1"	    // counter
+   "\n  mov     r3, #0"	    // result
+   "\n  cmp     r0, r1"
+   "\n  bls     .Lsub_loop"
+   "\n  adds    r1, #0"	    // dont shift if denominator would overflow
+   "\n  bmi     .Lsub_loop"
+   "\n  beq     .Luidiv0"
+   "\n.Ldenom_shift_loop:"
+   "\n  lsl     r2, #1"
+   "\n  lsls    r1, #1"
+   "\n  bmi     .Lsub_loop"
+   "\n  cmp     r0, r1"
+   "\n  bhi     .Ldenom_shift_loop"
+   "\n.Lsub_loop:"
+   "\n  cmp     r0, r1"     // if (num >= den)...
+   "\n  subcs   r0, r1"     // numerator -= denom
+   "\n  orrcs   r3, r2"     // result(r3) |= bitmask(r2)
+   "\n  lsr     r1, #1"	    // denom(r1) >>= 1
+   "\n  lsrs    r2, #1"	    // bitmask(r2) >>= 1
+   "\n  bne     .Lsub_loop"
+   "\n  mov     r1, r0"	    // remainder(r1) = numerator(r0)
+   "\n  mov     r0, r3"	    // quotient(r0) = result(r3)
+   "\n  bx      lr"
+   "\n.Luidiv0:"            // XXX: division by zero
+   "\n  mov     r0, #0"
+   "\n  bx      lr"
+#endif
+);
+#endif /* UIDIVMOD_ASM */
+/* ***************************************************************** */
+#endif
--- a/src/userland/cli/third_party/tcc/lib/armflush.c
+++ b/src/userland/cli/third_party/tcc/lib/armflush.c
@@ -0,0 +1,64 @@
+/* armflush.c - flush the instruction cache
+
+   __clear_cache is used in tccrun.c,  It is a built-in
+   intrinsic with gcc.  However tcc in order to compile
+   itself needs this function */
+
+/* ------------------------------------------------------------- */
+#if defined __arm__
+
+#ifdef __TINYC__
+
+/* syscall wrapper */
+unsigned _tccsyscall(unsigned syscall_nr, ...);
+
+/* arm-tcc supports only fake asm currently */
+__asm__(
+    ".global _tccsyscall\n"
+    "_tccsyscall:\n"
+    "push    {r7, lr}\n\t"
+    "mov     r7, r0\n\t"
+    "mov     r0, r1\n\t"
+    "mov     r1, r2\n\t"
+    "mov     r2, r3\n\t"
+    "svc     #0\n\t"
+    "pop     {r7, pc}"
+    );
+
+/* from unistd.h: */
+#if defined(__thumb__) || defined(__ARM_EABI__)
+# define __NR_SYSCALL_BASE      0x0
+#else
+# define __NR_SYSCALL_BASE      0x900000
+#endif
+#define __ARM_NR_BASE           (__NR_SYSCALL_BASE+0x0f0000)
+#define __ARM_NR_cacheflush     (__ARM_NR_BASE+2)
+
+#define syscall _tccsyscall
+
+#else
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdio.h>
+
+#endif
+
+/* Flushing for tccrun */
+void __clear_cache(void *beginning, void *end)
+{
+/* __ARM_NR_cacheflush is kernel private and should not be used in user space.
+ * However, there is no ARM asm parser in tcc so we use it for now */
+    syscall(__ARM_NR_cacheflush, beginning, end, 0);
+}
+
+/* ------------------------------------------------------------- */
+#elif defined __aarch64__
+void __clear_cache(void *beg, void *end)
+{
+    __arm64_clear_cache(beg, end);
+}
+
+/* ------------------------------------------------------------- */
+#endif
--- a/src/userland/cli/third_party/tcc/lib/atomic.S
+++ b/src/userland/cli/third_party/tcc/lib/atomic.S
--- a/src/userland/cli/third_party/tcc/lib/bcheck.c
+++ b/src/userland/cli/third_party/tcc/lib/bcheck.c
--- a/src/userland/cli/third_party/tcc/lib/bt-dll.c
+++ b/src/userland/cli/third_party/tcc/lib/bt-dll.c
@@ -0,0 +1,95 @@
+/* ------------------------------------------------------------- */
+/* stubs for calling bcheck functions from a dll. */
+
+#include <windows.h>
+#include <stdio.h>
+
+#define REDIR_ALL \
+  REDIR(__bt_init) \
+  REDIR(__bt_exit) \
+  REDIR(tcc_backtrace) \
+  \
+  REDIR(__bound_ptr_add) \
+  REDIR(__bound_ptr_indir1) \
+  REDIR(__bound_ptr_indir2) \
+  REDIR(__bound_ptr_indir4) \
+  REDIR(__bound_ptr_indir8) \
+  REDIR(__bound_ptr_indir12) \
+  REDIR(__bound_ptr_indir16) \
+  REDIR(__bound_local_new) \
+  REDIR(__bound_local_delete) \
+  REDIR(__bound_new_region) \
+  \
+  REDIR(__bound_free) \
+  REDIR(__bound_malloc) \
+  REDIR(__bound_realloc) \
+  REDIR(__bound_memcpy) \
+  REDIR(__bound_memcmp) \
+  REDIR(__bound_memmove) \
+  REDIR(__bound_memset) \
+  REDIR(__bound_strlen) \
+  REDIR(__bound_strcpy) \
+  REDIR(__bound_strncpy) \
+  REDIR(__bound_strcmp) \
+  REDIR(__bound_strncmp) \
+  REDIR(__bound_strcat) \
+  REDIR(__bound_strchr) \
+  REDIR(__bound_strdup) \
+  REDIR(__bound_strncat) \
+  REDIR(__bound_strrchr) \
+  REDIR(__bound_setjmp) \
+  REDIR(__bound_longjmp)
+
+#ifdef __leading_underscore
+#define _(s) "_"#s
+#else
+#define _(s) #s
+#endif
+
+#define REDIR(s) void *s;
+static struct { REDIR_ALL } all_ptrs;
+#undef REDIR
+
+#define REDIR(s) #s"\0"
+static const char all_names[] = REDIR_ALL;
+#undef REDIR
+
+#if __aarch64__
+# define REDIR(s) \
+    __asm__(".global "_(s)";"_(s)":"); \
+    __asm__(".int 0x58000090"); /* ldr x16, [pc, #16] */ \
+    __asm__(".int 0xf9400210"); /* ldr x16, [x16] */ \
+    __asm__(".int 0xd61f0200"); /* br x16 */ \
+    __asm__(".int 0xd503201f"); /* nop for alignment */ \
+    __asm__(".quad all_ptrs + (. - all_jmps - 16) / 24 * 8"); \
+    __asm__(".type "_(s)",function\n.size "_(s)",.-"_(s));
+
+    __asm__(".text\n.align 8\nall_jmps:");
+    REDIR_ALL
+#else
+# define REDIR(s) \
+    __asm__(".global "_(s)";"_(s)":"); goto *all_ptrs.s;
+    static void all_jmps() { REDIR_ALL }
+#endif
+#undef REDIR
+
+void __bt_init_dll(int bcheck)
+{
+    const char *s = all_names;
+    void **p = (void**)&all_ptrs;
+    do {
+        *p = (void*)GetProcAddress(GetModuleHandle(NULL), (char*)s);
+        if (NULL == *p) {
+            char buf[100];
+            sprintf(buf,
+                "Error: function '%s()' not found in executable. "
+                "(Need -bt or -b for linking the exe.)", s);
+            if (GetStdHandle(STD_ERROR_HANDLE))
+                fprintf(stderr, "TCC/BCHECK: %s\n", buf), fflush(stderr);
+            else
+                MessageBox(NULL, buf, "TCC/BCHECK", MB_ICONERROR);
+            ExitProcess(1);
+        }
+        s = strchr(s,'\0') + 1, ++p;
+    } while (*s && (bcheck || p < &all_ptrs.__bound_ptr_add));
+}
--- a/src/userland/cli/third_party/tcc/lib/bt-exe.c
+++ b/src/userland/cli/third_party/tcc/lib/bt-exe.c
@@ -0,0 +1,97 @@
+/* ------------------------------------------------------------- */
+/* for linking rt_printline and the signal/exception handler
+   from tccrun.c into executables. */
+
+#define CONFIG_TCC_BACKTRACE_ONLY
+#define ONE_SOURCE 1
+#define pstrcpy tcc_pstrcpy
+#include "../tccrun.c"
+
+#ifndef _WIN32
+# define __declspec(n)
+#endif
+
+#ifdef _WIN64
+static void bt_init_pe_prog_base(rt_context *p)
+{
+    MEMORY_BASIC_INFORMATION mbi;
+    addr_t imagebase;
+
+    if (!p->prog_base)
+        return;
+    if (!VirtualQuery(p, &mbi, sizeof(mbi)) || !mbi.AllocationBase)
+        return;
+    imagebase = (addr_t)mbi.AllocationBase - p->prog_base;
+    p->prog_base = (addr_t)mbi.AllocationBase - (imagebase & 0xffffffffu);
+}
+#endif
+
+__declspec(dllexport)
+void __bt_init(rt_context *p, int is_exe)
+{
+    __attribute__((weak)) int main();
+    __attribute__((weak)) void __bound_init(void*, int);
+
+    //fprintf(stderr, "__bt_init %d %p %p %p\n", is_exe, p, p->stab_sym, p->bounds_start), fflush(stderr);
+
+    /* call __bound_init here due to redirection of sigaction */
+    /* needed to add global symbols */
+    if (p->bounds_start)
+	__bound_init(p->bounds_start, -1);
+
+#ifdef _WIN64
+    bt_init_pe_prog_base(p);
+#endif
+
+    /* add to chain */
+    rt_wait_sem();
+    p->next = g_rc, g_rc = p;
+    rt_post_sem();
+    if (is_exe) {
+        /* we are the executable (not a dll) */
+        p->top_func = main;
+        set_exception_handler();
+    }
+}
+
+__declspec(dllexport)
+void __bt_exit(rt_context *p)
+{
+    struct rt_context *rc, **pp;
+    __attribute__((weak)) void __bound_exit_dll(void*);
+
+    //fprintf(stderr, "__bt_exit %d %p\n", !!p->top_func, p);
+
+    if (p->bounds_start)
+	__bound_exit_dll(p->bounds_start);
+
+    /* remove from chain */
+    rt_wait_sem();
+    for (pp = &g_rc; rc = *pp, rc; pp = &rc->next)
+        if (rc == p) {
+            *pp = rc->next;
+            break;
+        }
+    rt_post_sem();
+}
+
+/* copy a string and truncate it. */
+ST_FUNC char *pstrcpy(char *buf, size_t buf_size, const char *s)
+{
+    int l = strlen(s);
+    if (l >= buf_size)
+        l = buf_size - 1;
+    memcpy(buf, s, l);
+    buf[l] = 0;
+    return buf;
+}
+
+#if defined(_WIN64) && defined(__aarch64__)
+/* The bt-only Windows ARM64 build should not rely on importing this helper. */
+LONG InterlockedExchange(LONG volatile *Target, LONG Value)
+{
+    LONG Old = *Target;
+    *Target = Value;
+    return Old;
+}
+#endif
--- a/src/userland/cli/third_party/tcc/lib/bt-log.c
+++ b/src/userland/cli/third_party/tcc/lib/bt-log.c
@@ -0,0 +1,56 @@
+/* ------------------------------------------------------------- */
+/* function to get a stack backtrace on demand with a message */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#undef __attribute__
+
+#ifdef _WIN32
+# define DLL_EXPORT __declspec(dllexport)
+#else
+# define DLL_EXPORT
+#endif
+
+/* Needed when using ...libtcc1-usegcc=yes in lib/Makefile */
+#if (defined(__GNUC__) && (__GNUC__ >= 6)) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wframe-address"
+#endif
+
+typedef struct rt_frame {
+    void *ip, *fp, *sp;
+} rt_frame;
+
+__attribute__((weak))
+int _tcc_backtrace(rt_frame *f, const char *fmt, va_list ap);
+
+DLL_EXPORT int tcc_backtrace(const char *fmt, ...)
+{
+    va_list ap;
+    int ret;
+
+    if (_tcc_backtrace) {
+        rt_frame f;
+        f.fp = __builtin_frame_address(1);
+        f.ip = __builtin_return_address(0);
+        va_start(ap, fmt);
+        ret = _tcc_backtrace(&f, fmt, ap);
+        va_end(ap);
+    } else {
+        const char *p, *nl = "\n";
+        if (fmt[0] == '^' && (p = strchr(fmt + 1, fmt[0])))
+            fmt = p + 1;
+        if (fmt[0] == '\001')
+            ++fmt, nl = "";
+        va_start(ap, fmt);
+        ret = vfprintf(stderr, fmt, ap);
+        va_end(ap);
+        fprintf(stderr, "%s", nl), fflush(stderr);
+    }
+    return ret;
+}
+
+#if (defined(__GNUC__) && (__GNUC__ >= 6)) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
--- a/src/userland/cli/third_party/tcc/lib/builtin.c
+++ b/src/userland/cli/third_party/tcc/lib/builtin.c
@@ -0,0 +1,164 @@
+/* uses alias to allow building with gcc/clang */
+#ifdef __TINYC__
+#define	BUILTIN(x)	__builtin_##x
+#define	BUILTINN(x)	"__builtin_" # x
+#else
+#define	BUILTIN(x)	__tcc_builtin_##x
+#define	BUILTINN(x)	"__tcc_builtin_" # x
+#endif
+
+/* ---------------------------------------------- */
+/* This file implements:
+ * __builtin_ffs
+ * __builtin_clz
+ * __builtin_ctz
+ * __builtin_clrsb
+ * __builtin_popcount
+ * __builtin_parity
+ * for int, long and long long
+ */
+
+static const unsigned char table_1_32[] = {
+     0,  1, 28,  2, 29, 14, 24,  3, 30, 22, 20, 15, 25, 17,  4,  8, 
+    31, 27, 13, 23, 21, 19, 16,  7, 26, 12, 18,  6, 11,  5, 10,  9
+};
+static const unsigned char table_2_32[32] = {
+    31, 22, 30, 21, 18, 10, 29,  2, 20, 17, 15, 13,  9,  6, 28,  1,
+    23, 19, 11,  3, 16, 14,  7, 24, 12,  4,  8, 25,  5, 26, 27,  0
+};
+static const unsigned char table_1_64[] = {
+     0,  1,  2, 53,  3,  7, 54, 27,  4, 38, 41,  8, 34, 55, 48, 28,
+    62,  5, 39, 46, 44, 42, 22,  9, 24, 35, 59, 56, 49, 18, 29, 11,
+    63, 52,  6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
+    51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
+};
+static const unsigned char table_2_64[] = {
+    63, 16, 62,  7, 15, 36, 61,  3,  6, 14, 22, 26, 35, 47, 60,  2,
+     9,  5, 28, 11, 13, 21, 42, 19, 25, 31, 34, 40, 46, 52, 59,  1,
+    17,  8, 37,  4, 23, 27, 48, 10, 29, 12, 43, 20, 32, 41, 53, 18,
+    38, 24, 49, 30, 44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58,  0
+};
+
+#define FFSI(x) \
+    return table_1_32[((x & -x) * 0x077cb531u) >> 27] + (x != 0);
+#define FFSL(x) \
+    return table_1_64[((x & -x) * 0x022fdd63cc95386dull) >> 58] + (x != 0);
+#define CTZI(x) \
+    return table_1_32[((x & -x) * 0x077cb531u) >> 27];
+#define CTZL(x) \
+    return table_1_64[((x & -x) * 0x022fdd63cc95386dull) >> 58];
+#define CLZI(x)   \
+    x |= x >> 1;  \
+    x |= x >> 2;  \
+    x |= x >> 4;  \
+    x |= x >> 8;  \
+    x |= x >> 16; \
+    return table_2_32[(x * 0x07c4acddu) >> 27];
+#define CLZL(x)   \
+    x |= x >> 1;  \
+    x |= x >> 2;  \
+    x |= x >> 4;  \
+    x |= x >> 8;  \
+    x |= x >> 16; \
+    x |= x >> 32; \
+    return table_2_64[x * 0x03f79d71b4cb0a89ull >> 58];
+#define POPCOUNTI(x, m)                                                   \
+    x = x - ((x >> 1) & 0x55555555);                                      \
+    x = (x & 0x33333333) + ((x >> 2) & 0x33333333);                       \
+    x = (x + (x >> 4)) & 0xf0f0f0f;                                       \
+    return ((x * 0x01010101) >> 24) & m; 
+#define POPCOUNTL(x, m)                                                   \
+    x = x - ((x >> 1) & 0x5555555555555555ull);                           \
+    x = (x & 0x3333333333333333ull) + ((x >> 2) & 0x3333333333333333ull); \
+    x = (x + (x >> 4)) & 0xf0f0f0f0f0f0f0full;                            \
+    return ((x * 0x0101010101010101ull) >> 56) & m;
+
+/* Returns one plus the index of the least significant 1-bit of x,
+   or if x is zero, returns zero. */
+int BUILTIN(ffs) (int x) { FFSI(x) }
+int BUILTIN(ffsll) (long long x) { FFSL(x) }
+#if __SIZEOF_LONG__ == 4
+int BUILTIN(ffsl) (long x) __attribute__((alias(BUILTINN(ffs))));
+#else
+int BUILTIN(ffsl) (long x) __attribute__((alias(BUILTINN(ffsll))));
+#endif
+
+/* Returns the number of leading 0-bits in x, starting at the most significant
+   bit position. If x is 0, the result is undefined.  */
+int BUILTIN(clz) (unsigned int x) { CLZI(x) }
+int BUILTIN(clzll) (unsigned long long x) { CLZL(x) }
+#if __SIZEOF_LONG__ == 4
+int BUILTIN(clzl) (unsigned long x) __attribute__((alias(BUILTINN(clz))));
+#else
+int BUILTIN(clzl) (unsigned long x) __attribute__((alias(BUILTINN(clzll))));
+#endif
+
+/* Returns the number of trailing 0-bits in x, starting at the least
+   significant bit position. If x is 0, the result is undefined. */
+int BUILTIN(ctz) (unsigned int x) { CTZI(x) }
+int BUILTIN(ctzll) (unsigned long long x) { CTZL(x) }
+#if __SIZEOF_LONG__ == 4
+int BUILTIN(ctzl) (unsigned long x) __attribute__((alias(BUILTINN(ctz))));
+#else
+int BUILTIN(ctzl) (unsigned long x) __attribute__((alias(BUILTINN(ctzll))));
+#endif
+
+/* Returns the number of leading redundant sign bits in x, i.e. the number
+   of bits following the most significant bit that are identical to it.
+   There are no special cases for 0 or other values. */
+int BUILTIN(clrsb) (int x) { if (x < 0) x = ~x; x <<= 1; CLZI(x) }
+int BUILTIN(clrsbll) (long long x) { if (x < 0) x = ~x; x <<= 1; CLZL(x) }
+#if __SIZEOF_LONG__ == 4
+int BUILTIN(clrsbl) (long x) __attribute__((alias(BUILTINN(clrsb))));
+#else
+int BUILTIN(clrsbl) (long x) __attribute__((alias(BUILTINN(clrsbll))));
+#endif
+
+/* Returns the number of 1-bits in x.*/
+int BUILTIN(popcount) (unsigned int x) { POPCOUNTI(x, 0x3f) }
+int BUILTIN(popcountll) (unsigned long long x) { POPCOUNTL(x, 0x7f) }
+#if __SIZEOF_LONG__ == 4
+int BUILTIN(popcountl) (unsigned long x) __attribute__((alias(BUILTINN(popcount))));
+#else
+int BUILTIN(popcountl ) (unsigned long x) __attribute__((alias(BUILTINN(popcountll))));
+#endif
+
+/* Returns the parity of x, i.e. the number of 1-bits in x modulo 2. */
+int BUILTIN(parity) (unsigned int x) { POPCOUNTI(x, 0x01) }
+int BUILTIN(parityll) (unsigned long long x) { POPCOUNTL(x, 0x01) }
+#if __SIZEOF_LONG__ == 4
+int BUILTIN(parityl) (unsigned long x) __attribute__((alias(BUILTINN(parity))));
+#else
+int BUILTIN(parityl) (unsigned long x) __attribute__((alias(BUILTINN(parityll))));
+#endif
+
+#ifndef __TINYC__
+#if defined(__GNUC__) && (__GNUC__ >= 6)
+/* gcc overrides alias from __builtin_ffs... to ffs.. so use assembly code */
+__asm__(".globl  __builtin_ffs");
+__asm__(".set __builtin_ffs,__tcc_builtin_ffs");
+__asm__(".globl  __builtin_ffsl");
+__asm__(".set __builtin_ffsl,__tcc_builtin_ffsl");
+__asm__(".globl  __builtin_ffsll");
+__asm__(".set __builtin_ffsll,__tcc_builtin_ffsll");
+#else
+int __builtin_ffs(int x) __attribute__((alias("__tcc_builtin_ffs")));
+int __builtin_ffsl(long x) __attribute__((alias("__tcc_builtin_ffsl")));
+int __builtin_ffsll(long long x) __attribute__((alias("__tcc_builtin_ffsll")));
+#endif
+int __builtin_clz(unsigned int x) __attribute__((alias("__tcc_builtin_clz")));
+int __builtin_clzl(unsigned long x) __attribute__((alias("__tcc_builtin_clzl")));
+int __builtin_clzll(unsigned long long x) __attribute__((alias("__tcc_builtin_clzll")));
+int __builtin_ctz(unsigned int x) __attribute__((alias("__tcc_builtin_ctz")));
+int __builtin_ctzl(unsigned long x) __attribute__((alias("__tcc_builtin_ctzl")));
+int __builtin_ctzll(unsigned long long x) __attribute__((alias("__tcc_builtin_ctzll")));
+int __builtin_clrsb(int x) __attribute__((alias("__tcc_builtin_clrsb")));
+int __builtin_clrsbl(long x) __attribute__((alias("__tcc_builtin_clrsbl")));
+int __builtin_clrsbll(long long x) __attribute__((alias("__tcc_builtin_clrsbll")));
+int __builtin_popcount(unsigned int x) __attribute__((alias("__tcc_builtin_popcount")));
+int __builtin_popcountl(unsigned long x) __attribute__((alias("__tcc_builtin_popcountl")));
+int __builtin_popcountll(unsigned long long x) __attribute__((alias("__tcc_builtin_popcountll")));
+int __builtin_parity(unsigned int x) __attribute__((alias("__tcc_builtin_parity")));
+int __builtin_parityl(unsigned long x) __attribute__((alias("__tcc_builtin_parityl")));
+int __builtin_parityll(unsigned long long x) __attribute__((alias("__tcc_builtin_parityll")));
+#endif
--- a/src/userland/cli/third_party/tcc/lib/dsohandle.c
+++ b/src/userland/cli/third_party/tcc/lib/dsohandle.c
@@ -0,0 +1 @@
+void * __dso_handle __attribute((visibility("hidden"))) = &__dso_handle;
--- a/src/userland/cli/third_party/tcc/lib/lib-arm64.c
+++ b/src/userland/cli/third_party/tcc/lib/lib-arm64.c
@@ -0,0 +1,678 @@
+/*
+ *  TCC runtime library for arm64.
+ *
+ *  Copyright (c) 2015 Edmund Grimley Evans
+ *
+ * Copying and distribution of this file, with or without modification,
+ * are permitted in any medium without royalty provided the copyright
+ * notice and this notice are preserved.  This file is offered as-is,
+ * without any warranty.
+ */
+
+#ifdef __TINYC__
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef short int16_t;
+typedef unsigned short uint16_t;
+typedef int int32_t;
+typedef unsigned uint32_t;
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+#else
+#include <stdint.h>
+#include <string.h>
+#endif
+
+typedef union {
+    struct { uint64_t x0, x1; };
+    long double f;
+} u128_t;
+
+typedef union {
+    uint64_t x;
+    double f;
+} u64_t;
+
+typedef union {
+    uint32_t x;
+    float f;
+} u32_t;
+
+static long double f3_zero(int sgn)
+{
+    u128_t x = { 0, (uint64_t)sgn << 63 };
+    return x.f;
+}
+
+static long double f3_infinity(int sgn)
+{
+    u128_t x = { 0, (uint64_t)sgn << 63 | 0x7fff000000000000 };
+    return x.f;
+}
+
+static long double f3_NaN(void)
+{
+#if 0
+    // ARM's default NaN usually has just the top fraction bit set:
+    u128_t x = {  0, 0x7fff800000000000 };
+#else
+    // GCC's library sets all fraction bits:
+    u128_t x = { -1, 0x7fffffffffffffff };
+#endif
+    return x.f;
+}
+
+static int fp3_convert_NaN(long double *f, int sgn, u128_t *mnt)
+{
+    u128_t x = { mnt->x0,
+                 mnt->x1 | 0x7fff800000000000 | (uint64_t)sgn << 63 };
+    *f = x.f;
+    return 1;
+#define fp3_convert_NaN(a,b,c) fp3_convert_NaN(a,b,&c)
+}
+
+static int fp3_detect_NaNs(long double *f,
+                           int a_sgn, int a_exp, u128_t *a,
+                           int b_sgn, int b_exp, u128_t *b)
+#define a (*a)
+#define b (*b)
+{
+#if 0
+    // Detect signalling NaNs:
+    if (a_exp == 32767 && (a.x0 | a.x1 << 16) && !(a.x1 >> 47 & 1))
+        return fp3_convert_NaN(f, a_sgn, a);
+    if (b_exp == 32767 && (b.x0 | b.x1 << 16) && !(b.x1 >> 47 & 1))
+        return fp3_convert_NaN(f, b_sgn, b);
+#endif
+    // Detect quiet NaNs:
+    if (a_exp == 32767 && (a.x0 | a.x1 << 16))
+        return fp3_convert_NaN(f, a_sgn, a);
+    if (b_exp == 32767 && (b.x0 | b.x1 << 16))
+        return fp3_convert_NaN(f, b_sgn, b);
+
+    return 0;
+#undef a
+#undef b
+#define fp3_detect_NaNs(a,b,c,d,e,f,g) fp3_detect_NaNs(a,b,c,&d,e,f,&g)
+}
+
+static void f3_unpack(int *sgn, int32_t *exp, u128_t *mnt, long double f)
+{
+    u128_t x;
+
+    x.f = f;
+    *sgn = x.x1 >> 63;
+    *exp = x.x1 >> 48 & 32767;
+    x.x1 = x.x1 << 16 >> 16;
+    if (*exp)
+        x.x1 |= (uint64_t)1 << 48;
+    else
+        *exp = 1;
+    mnt->f = x.f;
+}
+
+static void f3_normalise(int32_t *exp, u128_t *mnt)
+{
+    int sh;
+    if (!(mnt->x0 | mnt->x1))
+        return;
+    if (!mnt->x1) {
+        mnt->x1 = mnt->x0;
+        mnt->x0 = 0;
+        *exp -= 64;
+    }
+    for (sh = 32; sh; sh >>= 1) {
+        if (!(mnt->x1 >> (64 - sh))) {
+            mnt->x1 = mnt->x1 << sh | mnt->x0 >> (64 - sh);
+            mnt->x0 = mnt->x0 << sh;
+            *exp -= sh;
+        }
+    }
+}
+
+static void f3_sticky_shift(int32_t sh, u128_t *x)
+{
+  if (sh >= 128) {
+      x->x0 = !!(x->x0 | x->x1);
+      x->x1 = 0;
+      return;
+  }
+  if (sh >= 64) {
+      x->x0 = x->x1 | !!x->x0;
+      x->x1 = 0;
+      sh -= 64;
+  }
+  if (sh > 0) {
+      x->x0 = x->x0 >> sh | x->x1 << (64 - sh) | !!(x->x0 << (64 - sh));
+      x->x1 = x->x1 >> sh;
+  }
+}
+
+static long double f3_round(int sgn, int32_t exp, u128_t *x)
+{
+    long double f;
+    int error;
+
+    if (exp > 0) {
+        f3_sticky_shift(13, x);
+    }
+    else {
+        f3_sticky_shift(14 - exp, x);
+        exp = 0;
+    }
+
+    error = x->x0 & 3;
+    x->x0 = x->x0 >> 2 | x->x1 << 62;
+    x->x1 = x->x1 >> 2;
+
+    if (error == 3 || ((error == 2) & (x->x0 & 1))) {
+        if (!++x->x0) {
+            ++x->x1;
+            if (x->x1 == (uint64_t)1 << 48)
+                exp = 1;
+            else if (x->x1 == (uint64_t)1 << 49) {
+                ++exp;
+                x->x0 = x->x0 >> 1 | x->x1 << 63;
+                x->x1 = x->x1 >> 1;
+            }
+        }
+    }
+
+    if (exp >= 32767)
+        return f3_infinity(sgn);
+
+    x->x1 = x->x1 << 16 >> 16 | (uint64_t)exp << 48 | (uint64_t)sgn << 63;
+    return x->f;
+}
+
+static long double f3_add(long double fa, long double fb, int neg)
+{
+    u128_t a, b, x;
+    int32_t a_exp, b_exp, x_exp;
+    int a_sgn, b_sgn, x_sgn;
+    long double fx;
+
+    f3_unpack(&a_sgn, &a_exp, &a, fa);
+    f3_unpack(&b_sgn, &b_exp, &b, fb);
+
+    if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
+        return fx;
+
+    b_sgn ^= neg;
+
+    // Handle infinities and zeroes:
+    if (a_exp == 32767 && b_exp == 32767 && a_sgn != b_sgn)
+        return f3_NaN();
+    if (a_exp == 32767)
+        return f3_infinity(a_sgn);
+    if (b_exp == 32767)
+        return f3_infinity(b_sgn);
+    if (!(a.x0 | a.x1 | b.x0 | b.x1))
+        return f3_zero(a_sgn & b_sgn);
+
+    a.x1 = a.x1 << 3 | a.x0 >> 61;
+    a.x0 = a.x0 << 3;
+    b.x1 = b.x1 << 3 | b.x0 >> 61;
+    b.x0 = b.x0 << 3;
+
+    if (a_exp <= b_exp) {
+        f3_sticky_shift(b_exp - a_exp, &a);
+        a_exp = b_exp;
+    }
+    else {
+        f3_sticky_shift(a_exp - b_exp, &b);
+        b_exp = a_exp;
+    }
+
+    x_sgn = a_sgn;
+    x_exp = a_exp;
+    if (a_sgn == b_sgn) {
+        x.x0 = a.x0 + b.x0;
+        x.x1 = a.x1 + b.x1 + (x.x0 < a.x0);
+    }
+    else {
+        x.x0 = a.x0 - b.x0;
+        x.x1 = a.x1 - b.x1 - (x.x0 > a.x0);
+        if (x.x1 >> 63) {
+            x_sgn ^= 1;
+            x.x0 = -x.x0;
+            x.x1 = -x.x1 - !!x.x0;
+        }
+    }
+
+    if (!(x.x0 | x.x1))
+        return f3_zero(0);
+
+    f3_normalise(&x_exp, &x);
+
+    return f3_round(x_sgn, x_exp + 12, &x);
+}
+
+long double __addtf3(long double a, long double b)
+{
+    return f3_add(a, b, 0);
+}
+
+long double __subtf3(long double a, long double b)
+{
+    return f3_add(a, b, 1);
+}
+
+long double __multf3(long double fa, long double fb)
+{
+    u128_t a, b, x;
+    int32_t a_exp, b_exp, x_exp;
+    int a_sgn, b_sgn, x_sgn;
+    long double fx;
+
+    f3_unpack(&a_sgn, &a_exp, &a, fa);
+    f3_unpack(&b_sgn, &b_exp, &b, fb);
+
+    if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
+        return fx;
+
+    // Handle infinities and zeroes:
+    if ((a_exp == 32767 && !(b.x0 | b.x1)) ||
+        (b_exp == 32767 && !(a.x0 | a.x1)))
+        return f3_NaN();
+    if (a_exp == 32767 || b_exp == 32767)
+        return f3_infinity(a_sgn ^ b_sgn);
+    if (!(a.x0 | a.x1) || !(b.x0 | b.x1))
+        return f3_zero(a_sgn ^ b_sgn);
+
+    f3_normalise(&a_exp, &a);
+    f3_normalise(&b_exp, &b);
+
+    x_sgn = a_sgn ^ b_sgn;
+    x_exp = a_exp + b_exp - 16352;
+
+    {
+        // Convert to base (1 << 30), discarding bottom 6 bits, which are zero,
+        // so there are (32, 30, 30, 30) bits in (a3, a2, a1, a0):
+        uint64_t a0 = a.x0 << 28 >> 34;
+        uint64_t b0 = b.x0 << 28 >> 34;
+        uint64_t a1 = a.x0 >> 36 | a.x1 << 62 >> 34;
+        uint64_t b1 = b.x0 >> 36 | b.x1 << 62 >> 34;
+        uint64_t a2 = a.x1 << 32 >> 34;
+        uint64_t b2 = b.x1 << 32 >> 34;
+        uint64_t a3 = a.x1 >> 32;
+        uint64_t b3 = b.x1 >> 32;
+        // Use 16 small multiplications and additions that do not overflow:
+        uint64_t x0 = a0 * b0;
+        uint64_t x1 = (x0 >> 30) + a0 * b1 + a1 * b0;
+        uint64_t x2 = (x1 >> 30) + a0 * b2 + a1 * b1 + a2 * b0;
+        uint64_t x3 = (x2 >> 30) + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
+        uint64_t x4 = (x3 >> 30) + a1 * b3 + a2 * b2 + a3 * b1;
+        uint64_t x5 = (x4 >> 30) + a2 * b3 + a3 * b2;
+        uint64_t x6 = (x5 >> 30) + a3 * b3;
+        // We now have (64, 30, 30, ...) bits in (x6, x5, x4, ...).
+        // Take the top 128 bits, setting bottom bit if any lower bits were set:
+        uint64_t y0 = (x5 << 34 | x4 << 34 >> 30 | x3 << 34 >> 60 |
+                       !!(x3 << 38 | (x2 | x1 | x0) << 34));
+        uint64_t y1 = x6;
+        // Top bit may be zero. Renormalise:
+        if (!(y1 >> 63)) {
+            y1 = y1 << 1 | y0 >> 63;
+            y0 = y0 << 1;
+            --x_exp;
+        }
+        x.x0 = y0;
+        x.x1 = y1;
+    }
+
+    return f3_round(x_sgn, x_exp, &x);
+}
+
+long double __divtf3(long double fa, long double fb)
+{
+    u128_t a, b, x;
+    int32_t a_exp, b_exp, x_exp;
+    int a_sgn, b_sgn, x_sgn, i;
+    long double fx;
+
+    f3_unpack(&a_sgn, &a_exp, &a, fa);
+    f3_unpack(&b_sgn, &b_exp, &b, fb);
+
+    if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b))
+        return fx;
+
+    // Handle infinities and zeroes:
+    if ((a_exp == 32767 && b_exp == 32767) ||
+        (!(a.x0 | a.x1) && !(b.x0 | b.x1)))
+        return f3_NaN();
+    if (a_exp == 32767 || !(b.x0 | b.x1))
+        return f3_infinity(a_sgn ^ b_sgn);
+    if (!(a.x0 | a.x1) || b_exp == 32767)
+        return f3_zero(a_sgn ^ b_sgn);
+
+    f3_normalise(&a_exp, &a);
+    f3_normalise(&b_exp, &b);
+
+    x_sgn = a_sgn ^ b_sgn;
+    x_exp = a_exp - b_exp + 16395;
+
+    a.x0 = a.x0 >> 1 | a.x1 << 63;
+    a.x1 = a.x1 >> 1;
+    b.x0 = b.x0 >> 1 | b.x1 << 63;
+    b.x1 = b.x1 >> 1;
+    x.x0 = 0;
+    x.x1 = 0;
+    for (i = 0; i < 116; i++) {
+        x.x1 = x.x1 << 1 | x.x0 >> 63;
+        x.x0 = x.x0 << 1;
+        if (a.x1 > b.x1 || (a.x1 == b.x1 && a.x0 >= b.x0)) {
+            a.x1 = a.x1 - b.x1 - (a.x0 < b.x0);
+            a.x0 = a.x0 - b.x0;
+            x.x0 |= 1;
+        }
+        a.x1 = a.x1 << 1 | a.x0 >> 63;
+        a.x0 = a.x0 << 1;
+    }
+    x.x0 |= !!(a.x0 | a.x1);
+
+    f3_normalise(&x_exp, &x);
+
+    return f3_round(x_sgn, x_exp, &x);
+}
+
+long double __negtf2(long double f)
+{
+    ((u128_t*)&f)->x1 ^= 1UL << 63;
+    return f;
+}
+
+long double __extendsftf2(float f)
+{
+    u128_t x;
+    u32_t u;
+    uint32_t a;
+    uint64_t aa;
+
+    u.f = f, a = u.x;
+    aa = a;
+
+    x.x0 = 0;
+    if (!(a << 1))
+        x.x1 = aa << 32;
+    else if (a << 1 >> 24 == 255)
+        x.x1 = (0x7fff000000000000 | aa >> 31 << 63 | aa << 41 >> 16 |
+                (uint64_t)!!(a << 9) << 47);
+    else if (a << 1 >> 24 == 0) {
+        uint64_t adj = 0;
+        while (!(a << 1 >> 1 >> (23 - adj)))
+          adj++;
+        x.x1 = aa >> 31 << 63 | (16256 - adj + 1) << 48 | aa << adj << 41 >> 16;
+    } else
+        x.x1 = (aa >> 31 << 63 | ((aa >> 23 & 255) + 16256) << 48 |
+                aa << 41 >> 16);
+    return x.f;
+}
+
+long double __extenddftf2(double f)
+{
+    u128_t x;
+    u64_t u;
+    uint64_t a;
+
+    u.f = f, a = u.x;
+
+    x.x0 = a << 60;
+    if (!(a << 1))
+        x.x1 = a;
+    else if (a << 1 >> 53 == 2047)
+        x.x1 = (0x7fff000000000000 | a >> 63 << 63 | a << 12 >> 16 |
+                (uint64_t)!!(a << 12) << 47);
+    else if (a << 1 >> 53 == 0) {
+        uint64_t adj = 0;
+        while (!(a << 1 >> 1 >> (52 - adj)))
+          adj++;
+        x.x0 <<= adj;
+        x.x1 = a >> 63 << 63 | (15360 - adj + 1) << 48 | a << adj << 12 >> 16;
+    } else
+        x.x1 = a >> 63 << 63 | ((a >> 52 & 2047) + 15360) << 48 | a << 12 >> 16;
+    return x.f;
+}
+
+float __trunctfsf2(long double f)
+{
+    u128_t mnt;
+    int32_t exp;
+    int sgn;
+    u32_t x;
+#define x x.x
+
+    f3_unpack(&sgn, &exp, &mnt, f);
+    if (exp == 32767 && (mnt.x0 | mnt.x1 << 16))
+        x = 0x7fc00000 | (uint32_t)sgn << 31 | (mnt.x1 >> 25 & 0x007fffff);
+    else if (exp > 16510)
+        x = 0x7f800000 | (uint32_t)sgn << 31;
+    else if (exp < 16233)
+        x = (uint32_t)sgn << 31;
+    else {
+        exp -= 16257;
+        x = mnt.x1 >> 23 | !!(mnt.x0 | mnt.x1 << 41);
+        if (exp < 0) {
+            x = x >> -exp | !!(x << (32 + exp));
+            exp = 0;
+        }
+        if ((x & 3) == 3 || (x & 7) == 6)
+            x += 4;
+        x = ((x >> 2) + (exp << 23)) | (uint32_t)sgn << 31;
+    }
+#undef x
+    return x.f;
+}
+
+double __trunctfdf2(long double f)
+{
+    u128_t mnt;
+    int32_t exp;
+    int sgn;
+    u64_t x;
+#define x x.x
+
+    f3_unpack(&sgn, &exp, &mnt, f);
+    if (exp == 32767 && (mnt.x0 | mnt.x1 << 16))
+        x = (0x7ff8000000000000 | (uint64_t)sgn << 63 |
+             mnt.x1 << 16 >> 12 | mnt.x0 >> 60);
+    else if (exp > 17406)
+        x = 0x7ff0000000000000 | (uint64_t)sgn << 63;
+    else if (exp < 15308)
+        x = (uint64_t)sgn << 63;
+    else {
+        exp -= 15361;
+        x = mnt.x1 << 6 | mnt.x0 >> 58 | !!(mnt.x0 << 6);
+        if (exp < 0) {
+            x = x >> -exp | !!(x << (64 + exp));
+            exp = 0;
+        }
+        if ((x & 3) == 3 || (x & 7) == 6)
+            x += 4;
+        x = ((x >> 2) + ((uint64_t)exp << 52)) | (uint64_t)sgn << 63;
+    }
+#undef x
+    return x.f;
+}
+
+int32_t __fixtfsi(long double fa)
+{
+    u128_t a;
+    int32_t a_exp;
+    int a_sgn;
+    int32_t x;
+    f3_unpack(&a_sgn, &a_exp, &a, fa);
+    if (a_exp < 16369)
+        return 0;
+    if (a_exp > 16413)
+        return a_sgn ? -0x80000000 : 0x7fffffff;
+    x = a.x1 >> (16431 - a_exp);
+    return a_sgn ? -x : x;
+}
+
+int64_t __fixtfdi(long double fa)
+{
+    u128_t a;
+    int32_t a_exp;
+    int a_sgn;
+    int64_t x;
+    f3_unpack(&a_sgn, &a_exp, &a, fa);
+    if (a_exp < 16383)
+        return 0;
+    if (a_exp > 16445)
+        return a_sgn ? -0x8000000000000000 : 0x7fffffffffffffff;
+    x = (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp);
+    return a_sgn ? -x : x;
+}
+
+uint32_t __fixunstfsi(long double fa)
+{
+    u128_t a;
+    int32_t a_exp;
+    int a_sgn;
+    f3_unpack(&a_sgn, &a_exp, &a, fa);
+    if (a_sgn || a_exp < 16369)
+        return 0;
+    if (a_exp > 16414)
+        return -1;
+    return a.x1 >> (16431 - a_exp);
+}
+
+uint64_t __fixunstfdi(long double fa)
+{
+    u128_t a;
+    int32_t a_exp;
+    int a_sgn;
+    f3_unpack(&a_sgn, &a_exp, &a, fa);
+    if (a_sgn || a_exp < 16383)
+        return 0;
+    if (a_exp > 16446)
+        return -1;
+    return (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp);
+}
+
+long double __floatsitf(int32_t a)
+{
+    int sgn = 0;
+    int exp = 16414;
+    uint32_t mnt = a;
+    u128_t x = { 0, 0 };
+    int i;
+    if (a) {
+        if (a < 0) {
+            sgn = 1;
+            mnt = -mnt;
+        }
+        for (i = 16; i; i >>= 1)
+            if (!(mnt >> (32 - i))) {
+                mnt <<= i;
+                exp -= i;
+            }
+        x.x1 = ((uint64_t)sgn << 63 | (uint64_t)exp << 48 |
+                (uint64_t)(mnt << 1) << 16);
+    }
+    return x.f;
+}
+
+long double __floatditf(int64_t a)
+{
+    int sgn = 0;
+    int exp = 16446;
+    uint64_t mnt = a;
+    u128_t x = { 0, 0 };
+    int i;
+    if (a) {
+        if (a < 0) {
+            sgn = 1;
+            mnt = -mnt;
+        }
+        for (i = 32; i; i >>= 1)
+            if (!(mnt >> (64 - i))) {
+                mnt <<= i;
+                exp -= i;
+            }
+        x.x0 = mnt << 49;
+        x.x1 = (uint64_t)sgn << 63 | (uint64_t)exp << 48 | mnt << 1 >> 16;
+    }
+    return x.f;
+}
+
+long double __floatunsitf(uint32_t a)
+{
+    int exp = 16414;
+    uint32_t mnt = a;
+    u128_t x = { 0, 0 };
+    int i;
+    if (a) {
+        for (i = 16; i; i >>= 1)
+            if (!(mnt >> (32 - i))) {
+                mnt <<= i;
+                exp -= i;
+            }
+        x.x1 = (uint64_t)exp << 48 | (uint64_t)(mnt << 1) << 16;
+    }
+    return x.f;
+}
+
+long double __floatunditf(uint64_t a)
+{
+    int exp = 16446;
+    uint64_t mnt = a;
+    u128_t x = { 0, 0 };
+    long double f;
+    int i;
+    if (a) {
+        for (i = 32; i; i >>= 1)
+            if (!(mnt >> (64 - i))) {
+                mnt <<= i;
+                exp -= i;
+            }
+        x.x0 = mnt << 49;
+        x.x1 = (uint64_t)exp << 48 | mnt << 1 >> 16;
+    }
+    return x.f;
+}
+
+static int f3_cmp(long double fa, long double fb)
+{
+    u128_t a, b;
+    a.f = fa;
+    b.f = fb;
+    return (!(a.x0 | a.x1 << 1 | b.x0 | b.x1 << 1) ? 0 :
+            ((a.x1 << 1 >> 49 == 0x7fff && (a.x0 | a.x1 << 16)) ||
+             (b.x1 << 1 >> 49 == 0x7fff && (b.x0 | b.x1 << 16))) ? 2 :
+            a.x1 >> 63 != b.x1 >> 63 ? (int)(b.x1 >> 63) - (int)(a.x1 >> 63) :
+            a.x1 < b.x1 ? (int)(a.x1 >> 63 << 1) - 1 :
+            a.x1 > b.x1 ? 1 - (int)(a.x1 >> 63 << 1) :
+            a.x0 < b.x0 ? (int)(a.x1 >> 63 << 1) - 1 :
+            b.x0 < a.x0 ? 1 - (int)(a.x1 >> 63 << 1) : 0);
+}
+
+int __eqtf2(long double a, long double b)
+{
+    return !!f3_cmp(a, b);
+}
+
+int __netf2(long double a, long double b)
+{
+    return !!f3_cmp(a, b);
+}
+
+int __lttf2(long double a, long double b)
+{
+    return f3_cmp(a, b);
+}
+
+int __letf2(long double a, long double b)
+{
+    return f3_cmp(a, b);
+}
+
+int __gttf2(long double a, long double b)
+{
+    return -f3_cmp(b, a);
+}
+
+int __getf2(long double a, long double b)
+{
+    return -f3_cmp(b, a);
+}
--- a/src/userland/cli/third_party/tcc/lib/libtcc1.c
+++ b/src/userland/cli/third_party/tcc/lib/libtcc1.c
@@ -0,0 +1,623 @@
+/* TCC runtime library. 
+   Parts of this code are (c) 2002 Fabrice Bellard 
+
+   Copyright (C) 1987, 1988, 1992, 1994, 1995 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  
+*/
+
+#define W_TYPE_SIZE   32
+#define BITS_PER_UNIT 8
+
+typedef int Wtype;
+typedef unsigned int UWtype;
+typedef unsigned int USItype;
+typedef long long DWtype;
+typedef unsigned long long UDWtype;
+
+struct DWstruct {
+    Wtype low, high;
+};
+
+typedef union
+{
+  struct DWstruct s;
+  DWtype ll;
+} DWunion;
+
+typedef long double XFtype;
+#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT)
+#define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE)
+
+/* the following deal with IEEE single-precision numbers */
+#define EXCESS		126
+#define SIGNBIT		0x80000000
+#define HIDDEN		(1 << 23)
+#define SIGN(fp)	((fp) & SIGNBIT)
+#define EXP(fp)		(((fp) >> 23) & 0xFF)
+#define MANT(fp)	(((fp) & 0x7FFFFF) | HIDDEN)
+#define PACK(s,e,m)	((s) | ((e) << 23) | (m))
+
+/* the following deal with IEEE double-precision numbers */
+#define EXCESSD		1022
+#define HIDDEND		(1 << 20)
+#define EXPD(fp)	(((fp.l.upper) >> 20) & 0x7FF)
+#define SIGND(fp)	((fp.l.upper) & SIGNBIT)
+#define MANTD(fp)	(((((fp.l.upper) & 0xFFFFF) | HIDDEND) << 10) | \
+				(fp.l.lower >> 22))
+#define HIDDEND_LL	((long long)1 << 52)
+#define MANTD_LL(fp)	((fp.ll & (HIDDEND_LL-1)) | HIDDEND_LL)
+#define PACKD_LL(s,e,m)	(((long long)((s)+((e)<<20))<<32)|(m))
+
+/* the following deal with x86 long double-precision numbers */
+#define EXCESSLD	16382
+#define EXPLD(fp)	(fp.l.upper & 0x7fff)
+#define SIGNLD(fp)	((fp.l.upper) & 0x8000)
+
+/* only for x86 */
+union ldouble_long {
+    long double ld;
+    struct {
+        unsigned long long lower;
+        unsigned short upper;
+    } l;
+};
+
+union double_long {
+    double d;
+#if 1
+    struct {
+        unsigned int lower;
+        int upper;
+    } l;
+#else
+    struct {
+        int upper;
+        unsigned int lower;
+    } l;
+#endif
+    long long ll;
+};
+
+union float_long {
+    float f;
+    unsigned int l;
+};
+
+/* XXX: we don't support several builtin supports for now */
+#if defined __i386__
+
+/* XXX: use gcc/tcc intrinsic ? */
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subl %5,%1\n\tsbbl %3,%0"					\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mull %3"							\
+	   : "=a" ((USItype) (w0)),					\
+	     "=d" ((USItype) (w1))					\
+	   : "%0" ((USItype) (u)),					\
+	     "rm" ((USItype) (v)))
+#define udiv_qrnnd(q, r, n1, n0, dv) \
+  __asm__ ("divl %4"							\
+	   : "=a" ((USItype) (q)),					\
+	     "=d" ((USItype) (r))					\
+	   : "0" ((USItype) (n0)),					\
+	     "1" ((USItype) (n1)),					\
+	     "rm" ((USItype) (dv)))
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __cbtmp;							\
+    __asm__ ("bsrl %1,%0"						\
+	     : "=r" (__cbtmp) : "rm" ((USItype) (x)));			\
+    (count) = __cbtmp ^ 31;						\
+  } while (0)
+
+/* most of this code is taken from libgcc2.c from gcc */
+
+static UDWtype __udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
+{
+  DWunion ww;
+  DWunion nn, dd;
+  DWunion rr;
+  UWtype d0, d1, n0, n1, n2;
+  UWtype q0, q1;
+  UWtype b, bm;
+
+  nn.ll = n;
+  dd.ll = d;
+
+  d0 = dd.s.low;
+  d1 = dd.s.high;
+  n0 = nn.s.low;
+  n1 = nn.s.high;
+
+#if !defined(UDIV_NEEDS_NORMALIZATION)
+  if (d1 == 0)
+    {
+      if (d0 > n1)
+	{
+	  /* 0q = nn / 0D */
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+	  q1 = 0;
+
+	  /* Remainder in n0.  */
+	}
+      else
+	{
+	  /* qq = NN / 0d */
+
+	  if (d0 == 0)
+	    d0 = 1 / d0;	/* Divide intentionally by zero.  */
+
+	  udiv_qrnnd (q1, n1, 0, n1, d0);
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+
+	  /* Remainder in n0.  */
+	}
+
+      if (rp != 0)
+	{
+	  rr.s.low = n0;
+	  rr.s.high = 0;
+	  *rp = rr.ll;
+	}
+    }
+
+#else /* UDIV_NEEDS_NORMALIZATION */
+
+  if (d1 == 0)
+    {
+      if (d0 > n1)
+	{
+	  /* 0q = nn / 0D */
+
+	  count_leading_zeros (bm, d0);
+
+	  if (bm != 0)
+	    {
+	      /* Normalize, i.e. make the most significant bit of the
+		 denominator set.  */
+
+	      d0 = d0 << bm;
+	      n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
+	      n0 = n0 << bm;
+	    }
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+	  q1 = 0;
+
+	  /* Remainder in n0 >> bm.  */
+	}
+      else
+	{
+	  /* qq = NN / 0d */
+
+	  if (d0 == 0)
+	    d0 = 1 / d0;	/* Divide intentionally by zero.  */
+
+	  count_leading_zeros (bm, d0);
+
+	  if (bm == 0)
+	    {
+	      /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
+		 conclude (the most significant bit of n1 is set) /\ (the
+		 leading quotient digit q1 = 1).
+
+		 This special case is necessary, not an optimization.
+		 (Shifts counts of W_TYPE_SIZE are undefined.)  */
+
+	      n1 -= d0;
+	      q1 = 1;
+	    }
+	  else
+	    {
+	      /* Normalize.  */
+
+	      b = W_TYPE_SIZE - bm;
+
+	      d0 = d0 << bm;
+	      n2 = n1 >> b;
+	      n1 = (n1 << bm) | (n0 >> b);
+	      n0 = n0 << bm;
+
+	      udiv_qrnnd (q1, n1, n2, n1, d0);
+	    }
+
+	  /* n1 != d0...  */
+
+	  udiv_qrnnd (q0, n0, n1, n0, d0);
+
+	  /* Remainder in n0 >> bm.  */
+	}
+
+      if (rp != 0)
+	{
+	  rr.s.low = n0 >> bm;
+	  rr.s.high = 0;
+	  *rp = rr.ll;
+	}
+    }
+#endif /* UDIV_NEEDS_NORMALIZATION */
+
+  else
+    {
+      if (d1 > n1)
+	{
+	  /* 00 = nn / DD */
+
+	  q0 = 0;
+	  q1 = 0;
+
+	  /* Remainder in n1n0.  */
+	  if (rp != 0)
+	    {
+	      rr.s.low = n0;
+	      rr.s.high = n1;
+	      *rp = rr.ll;
+	    }
+	}
+      else
+	{
+	  /* 0q = NN / dd */
+
+	  count_leading_zeros (bm, d1);
+	  if (bm == 0)
+	    {
+	      /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
+		 conclude (the most significant bit of n1 is set) /\ (the
+		 quotient digit q0 = 0 or 1).
+
+		 This special case is necessary, not an optimization.  */
+
+	      /* The condition on the next line takes advantage of that
+		 n1 >= d1 (true due to program flow).  */
+	      if (n1 > d1 || n0 >= d0)
+		{
+		  q0 = 1;
+		  sub_ddmmss (n1, n0, n1, n0, d1, d0);
+		}
+	      else
+		q0 = 0;
+
+	      q1 = 0;
+
+	      if (rp != 0)
+		{
+		  rr.s.low = n0;
+		  rr.s.high = n1;
+		  *rp = rr.ll;
+		}
+	    }
+	  else
+	    {
+	      UWtype m1, m0;
+	      /* Normalize.  */
+
+	      b = W_TYPE_SIZE - bm;
+
+	      d1 = (d1 << bm) | (d0 >> b);
+	      d0 = d0 << bm;
+	      n2 = n1 >> b;
+	      n1 = (n1 << bm) | (n0 >> b);
+	      n0 = n0 << bm;
+
+	      udiv_qrnnd (q0, n1, n2, n1, d1);
+	      umul_ppmm (m1, m0, q0, d0);
+
+	      if (m1 > n1 || (m1 == n1 && m0 > n0))
+		{
+		  q0--;
+		  sub_ddmmss (m1, m0, m1, m0, d1, d0);
+		}
+
+	      q1 = 0;
+
+	      /* Remainder in (n1n0 - m1m0) >> bm.  */
+	      if (rp != 0)
+		{
+		  sub_ddmmss (n1, n0, n1, n0, m1, m0);
+		  rr.s.low = (n1 << b) | (n0 >> bm);
+		  rr.s.high = n1 >> bm;
+		  *rp = rr.ll;
+		}
+	    }
+	}
+    }
+
+  ww.s.low = q0;
+  ww.s.high = q1;
+  return ww.ll;
+}
+
+#define __negdi2(a) (-(a))
+
+long long __divdi3(long long u, long long v)
+{
+    int c = 0;
+    DWunion uu, vv;
+    DWtype w;
+    
+    uu.ll = u;
+    vv.ll = v;
+    
+    if (uu.s.high < 0) {
+        c = ~c;
+        uu.ll = __negdi2 (uu.ll);
+    }
+    if (vv.s.high < 0) {
+        c = ~c;
+        vv.ll = __negdi2 (vv.ll);
+    }
+    w = __udivmoddi4 (uu.ll, vv.ll, (UDWtype *) 0);
+    if (c)
+        w = __negdi2 (w);
+    return w;
+}
+
+long long __moddi3(long long u, long long v)
+{
+    int c = 0;
+    DWunion uu, vv;
+    DWtype w;
+    
+    uu.ll = u;
+    vv.ll = v;
+    
+    if (uu.s.high < 0) {
+        c = ~c;
+        uu.ll = __negdi2 (uu.ll);
+    }
+    if (vv.s.high < 0)
+        vv.ll = __negdi2 (vv.ll);
+    
+    __udivmoddi4 (uu.ll, vv.ll, (UDWtype *) &w);
+    if (c)
+        w = __negdi2 (w);
+    return w;
+}
+
+unsigned long long __udivdi3(unsigned long long u, unsigned long long v)
+{
+    return __udivmoddi4 (u, v, (UDWtype *) 0);
+}
+
+unsigned long long __umoddi3(unsigned long long u, unsigned long long v)
+{
+    UDWtype w;
+    
+    __udivmoddi4 (u, v, &w);
+    return w;
+}
+
+/* XXX: fix tcc's code generator to do this instead */
+long long __ashrdi3(long long a, int b)
+{
+#ifdef __TINYC__
+    DWunion u;
+    u.ll = a;
+    if (b >= 32) {
+        u.s.low = u.s.high >> (b - 32);
+        u.s.high = u.s.high >> 31;
+    } else if (b != 0) {
+        u.s.low = ((unsigned)u.s.low >> b) | (u.s.high << (32 - b));
+        u.s.high = u.s.high >> b;
+    }
+    return u.ll;
+#else
+    return a >> b;
+#endif
+}
+
+/* XXX: fix tcc's code generator to do this instead */
+unsigned long long __lshrdi3(unsigned long long a, int b)
+{
+#ifdef __TINYC__
+    DWunion u;
+    u.ll = a;
+    if (b >= 32) {
+        u.s.low = (unsigned)u.s.high >> (b - 32);
+        u.s.high = 0;
+    } else if (b != 0) {
+        u.s.low = ((unsigned)u.s.low >> b) | (u.s.high << (32 - b));
+        u.s.high = (unsigned)u.s.high >> b;
+    }
+    return u.ll;
+#else
+    return a >> b;
+#endif
+}
+
+/* XXX: fix tcc's code generator to do this instead */
+long long __ashldi3(long long a, int b)
+{
+#ifdef __TINYC__
+    DWunion u;
+    u.ll = a;
+    if (b >= 32) {
+        u.s.high = (unsigned)u.s.low << (b - 32);
+        u.s.low = 0;
+    } else if (b != 0) {
+        u.s.high = ((unsigned)u.s.high << b) | ((unsigned)u.s.low >> (32 - b));
+        u.s.low = (unsigned)u.s.low << b;
+    }
+    return u.ll;
+#else
+    return a << b;
+#endif
+}
+
+#endif /* __i386__ */
+
+/* XXX: fix tcc's code generator to do this instead */
+float __floatundisf(unsigned long long a)
+{
+    DWunion uu; 
+    XFtype r;
+
+    uu.ll = a;
+    if (uu.s.high >= 0) {
+        return (float)uu.ll;
+    } else {
+        r = (XFtype)uu.ll;
+        r += 18446744073709551616.0;
+        return (float)r;
+    }
+}
+
+double __floatundidf(unsigned long long a)
+{
+    DWunion uu; 
+    XFtype r;
+
+    uu.ll = a;
+    if (uu.s.high >= 0) {
+        return (double)uu.ll;
+    } else {
+        r = (XFtype)uu.ll;
+        r += 18446744073709551616.0;
+        return (double)r;
+    }
+}
+
+long double __floatundixf(unsigned long long a)
+{
+    DWunion uu; 
+    XFtype r;
+
+    uu.ll = a;
+    if (uu.s.high >= 0) {
+        return (long double)uu.ll;
+    } else {
+        r = (XFtype)uu.ll;
+        r += 18446744073709551616.0;
+        return (long double)r;
+    }
+}
+
+unsigned long long __fixunssfdi (float a1)
+{
+    register union float_long fl1;
+    register int exp;
+    register unsigned long long l;
+
+    fl1.f = a1;
+
+    if (fl1.l == 0)
+	return (0);
+
+    exp = EXP (fl1.l) - EXCESS - 24;
+    l = MANT(fl1.l);
+
+    if (exp >= 41)
+        return 1ULL << 63;
+    else if (exp >= 0)
+        l <<= exp;
+    else if (exp >= -23)
+        l >>= -exp;
+    else
+	return 0;
+    if (SIGN(fl1.l))
+        l = (unsigned long long)-l;
+    return l;
+}
+
+long long __fixsfdi (float a1)
+{
+    long long ret; int s;
+    ret = __fixunssfdi((s = a1 >= 0) ? a1 : -a1);
+    return s ? ret : -ret;
+}
+
+unsigned long long __fixunsdfdi (double a1)
+{
+    register union double_long dl1;
+    register int exp;
+    register unsigned long long l;
+
+    dl1.d = a1;
+
+    if (dl1.ll == 0)
+	return (0);
+
+    exp = EXPD (dl1) - EXCESSD - 53;
+    l = MANTD_LL(dl1);
+
+    if (exp >= 12)
+        return 1ULL << 63; /* overflow result (like gcc, somewhat) */
+    else if (exp >= 0)
+        l <<= exp;
+    else if (exp >= -52)
+        l >>= -exp;
+    else
+        return 0;
+    if (SIGND(dl1))
+        l = (unsigned long long)-l;
+    return l;
+}
+
+long long __fixdfdi (double a1)
+{
+    long long ret; int s;
+    ret = __fixunsdfdi((s = a1 >= 0) ? a1 : -a1);
+    return s ? ret : -ret;
+}
+
+#ifndef __arm__
+unsigned long long __fixunsxfdi (long double a1)
+{
+    register union ldouble_long dl1;
+    register int exp;
+    register unsigned long long l;
+
+    dl1.ld = a1;
+
+    if (dl1.l.lower == 0 && dl1.l.upper == 0)
+	return (0);
+
+    exp = EXPLD (dl1) - EXCESSLD - 64;
+    l = dl1.l.lower;
+    if (exp > 0)
+	return 1ULL << 63;
+    if (exp < -63)
+        return 0;
+    l >>= -exp;
+    if (SIGNLD(dl1))
+        l = (unsigned long long)-l;
+    return l;
+}
+
+long long __fixxfdi (long double a1)
+{
+    long long ret; int s;
+    ret = __fixunsxfdi((s = a1 >= 0) ? a1 : -a1);
+    return s ? ret : -ret;
+}
+#endif /* !ARM */
--- a/src/userland/cli/third_party/tcc/lib/pic86.S
+++ b/src/userland/cli/third_party/tcc/lib/pic86.S
@@ -0,0 +1,39 @@
+/* ---------------------------------------------- */
+/* get_pc_thunk.S */
+
+#ifdef __leading_underscore
+# define _(s) _##s
+#else
+# define _(s) s
+#endif
+
+/* ---------------------------------------------- */
+        .text
+
+        .globl _(__x86.get_pc_thunk.ax)
+        .hidden _(__x86.get_pc_thunk.ax)
+_(__x86.get_pc_thunk.ax):
+        mov (%esp),%eax
+        ret
+        .size _(__x86.get_pc_thunk.ax), .-_(__x86.get_pc_thunk.ax)
+
+        .globl _(__x86.get_pc_thunk.bx)
+        .hidden _(__x86.get_pc_thunk.bx)
+_(__x86.get_pc_thunk.bx):
+        mov (%esp),%ebx
+        ret
+        .size _(__x86.get_pc_thunk.bx), .-_(__x86.get_pc_thunk.bx)
+
+        .globl _(__x86.get_pc_thunk.cx)
+        .hidden _(__x86.get_pc_thunk.cx)
+_(__x86.get_pc_thunk.cx):
+        mov (%esp),%ecx
+        ret
+        .size _(__x86.get_pc_thunk.cx), .-_(__x86.get_pc_thunk.cx)
+
+        .globl _(__x86.get_pc_thunk.dx)
+        .hidden _(__x86.get_pc_thunk.dx)
+_(__x86.get_pc_thunk.dx):
+        mov (%esp),%edx
+        ret
+        .size _(__x86.get_pc_thunk.dx), .-_(__x86.get_pc_thunk.dx)
--- a/src/userland/cli/third_party/tcc/lib/runmain.c
+++ b/src/userland/cli/third_party/tcc/lib/runmain.c
@@ -0,0 +1,86 @@
+/* ------------------------------------------------------------- */
+/* support for tcc_run() */
+
+#ifdef __leading_underscore
+# define _(s) s
+#else
+# define _(s) _##s
+#endif
+
+#ifndef _WIN32
+extern void (*_(_init_array_start)[]) (int argc, char **argv, char **envp);
+extern void (*_(_init_array_end)[]) (int argc, char **argv, char **envp);
+static void run_ctors(int argc, char **argv, char **env)
+{
+    int i = 0;
+    while (&_(_init_array_start)[i] != _(_init_array_end))
+        (*_(_init_array_start)[i++])(argc, argv, env);
+}
+#endif
+
+extern void (*_(_fini_array_start)[]) (void);
+extern void (*_(_fini_array_end)[]) (void);
+static void run_dtors(void)
+{
+    int i = 0;
+    while (&_(_fini_array_end)[i] != _(_fini_array_start))
+        (*_(_fini_array_end)[--i])();
+}
+
+static void *rt_exitfunc[32];
+static void *rt_exitarg[32];
+static int __rt_nr_exit;
+
+void __run_on_exit(int ret)
+{
+    int n = __rt_nr_exit;
+    while (n)
+	--n, ((void(*)(int,void*))rt_exitfunc[n])(ret, rt_exitarg[n]);
+}
+
+int on_exit(void *function, void *arg)
+{
+    int n = __rt_nr_exit;
+    if (n < 32) {
+	rt_exitfunc[n] = function;
+	rt_exitarg[n] = arg;
+        __rt_nr_exit = n + 1;
+        return 0;
+    }
+    return 1;
+}
+
+int atexit(void (*function)(void))
+{
+    return on_exit(function, 0);
+}
+
+typedef struct rt_frame {
+    void *ip, *fp, *sp;
+} rt_frame;
+
+__attribute__((noreturn)) void __rt_exit(rt_frame *, int);
+
+void exit(int code)
+{
+    rt_frame f;
+    run_dtors();
+    __run_on_exit(code);
+    f.fp = 0;
+    f.ip = exit;
+    __rt_exit(&f, code);
+}
+
+#ifndef _WIN32
+int main(int, char**, char**);
+
+int _runmain(int argc, char **argv, char **envp)
+{
+    int ret;
+    run_ctors(argc, argv, envp);
+    ret = main(argc, argv, envp);
+    run_dtors();
+    __run_on_exit(ret);
+    return ret;
+}
+#endif
--- a/src/userland/cli/third_party/tcc/lib/stdatomic.c
+++ b/src/userland/cli/third_party/tcc/lib/stdatomic.c
@@ -0,0 +1,104 @@
+// for libtcc1, avoid including files that are not part of tcc
+// #include <stdint.h>
+#define uint8_t unsigned char
+#define uint16_t unsigned short
+#define uint32_t unsigned int
+#define uint64_t unsigned long long
+#define bool _Bool
+#define false 0
+#define true 1
+#define __ATOMIC_RELAXED 0
+#define __ATOMIC_CONSUME 1
+#define __ATOMIC_ACQUIRE 2
+#define __ATOMIC_RELEASE 3
+#define __ATOMIC_ACQ_REL 4
+#define __ATOMIC_SEQ_CST 5
+typedef __SIZE_TYPE__ size_t;
+
+#define ATOMIC_GEN_OP(TYPE, MODE, NAME, OP, RET) \
+    TYPE __atomic_##NAME##_##MODE(volatile void *atom, TYPE value, int memorder) \
+    { \
+        TYPE xchg, cmp; \
+        __atomic_load((TYPE *)atom, (TYPE *)&cmp, __ATOMIC_RELAXED); \
+        do { \
+            xchg = (OP); \
+        } while (!__atomic_compare_exchange((TYPE *)atom, &cmp, &xchg, true, \
+                                            __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); \
+        return RET; \
+    }
+
+#define ATOMIC_EXCHANGE(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, exchange, value, cmp)
+#define ATOMIC_ADD_FETCH(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, add_fetch, (cmp + value), xchg)
+#define ATOMIC_SUB_FETCH(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, sub_fetch, (cmp - value), xchg)
+#define ATOMIC_AND_FETCH(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, and_fetch, (cmp & value), xchg)
+#define ATOMIC_OR_FETCH(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, or_fetch, (cmp | value), xchg)
+#define ATOMIC_XOR_FETCH(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, xor_fetch, (cmp ^ value), xchg)
+#define ATOMIC_NAND_FETCH(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, nand_fetch, ~(cmp & value), xchg)
+#define ATOMIC_FETCH_ADD(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, fetch_add, (cmp + value), cmp)
+#define ATOMIC_FETCH_SUB(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, fetch_sub, (cmp - value), cmp)
+#define ATOMIC_FETCH_AND(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, fetch_and, (cmp & value), cmp)
+#define ATOMIC_FETCH_OR(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, fetch_or, (cmp | value), cmp)
+#define ATOMIC_FETCH_XOR(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, fetch_xor, (cmp ^ value), cmp)
+#define ATOMIC_FETCH_NAND(TYPE, MODE) \
+    ATOMIC_GEN_OP(TYPE, MODE, fetch_nand, ~(cmp & value), cmp)
+
+#define ATOMIC_GEN(TYPE, SIZE) \
+    ATOMIC_EXCHANGE(TYPE, SIZE) \
+    ATOMIC_ADD_FETCH(TYPE, SIZE) \
+    ATOMIC_SUB_FETCH(TYPE, SIZE) \
+    ATOMIC_AND_FETCH(TYPE, SIZE) \
+    ATOMIC_OR_FETCH(TYPE, SIZE) \
+    ATOMIC_XOR_FETCH(TYPE, SIZE) \
+    ATOMIC_NAND_FETCH(TYPE, SIZE) \
+    ATOMIC_FETCH_ADD(TYPE, SIZE) \
+    ATOMIC_FETCH_SUB(TYPE, SIZE) \
+    ATOMIC_FETCH_AND(TYPE, SIZE) \
+    ATOMIC_FETCH_OR(TYPE, SIZE) \
+    ATOMIC_FETCH_XOR(TYPE, SIZE) \
+    ATOMIC_FETCH_NAND(TYPE, SIZE)
+
+ATOMIC_GEN(uint8_t, 1)
+ATOMIC_GEN(uint16_t, 2)
+ATOMIC_GEN(uint32_t, 4)
+ATOMIC_GEN(uint64_t, 8)
+
+/* uses alias to allow building with gcc/clang */
+#ifdef __TINYC__
+#define ATOMIC(x)      __atomic_##x
+#else
+#define ATOMIC(x)      __tcc_atomic_##x
+#endif
+
+bool ATOMIC(is_lock_free) (unsigned long size, const volatile void *ptr)
+{
+    bool ret;
+
+    switch (size) {
+    case 1: ret = true; break;
+    case 2: ret = true; break;
+    case 4: ret = true; break;
+#if defined __x86_64__ || defined __aarch64__ || defined __riscv
+    case 8: ret = true; break;
+#else
+    case 8: ret = false; break;
+#endif
+    default: ret = false; break;
+    }
+    return ret;
+}
+
+#ifndef __TINYC__
+bool __atomic_is_lock_free(unsigned long size, const volatile void *ptr) __attribute__((alias("__tcc_atomic_is_lock_free")));
+#endif
--- a/src/userland/cli/third_party/tcc/lib/tcov.c
+++ b/src/userland/cli/third_party/tcc/lib/tcov.c
@@ -0,0 +1,428 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#ifndef _WIN32
+#include <unistd.h>
+#include <errno.h>
+#else
+#include <windows.h>
+#include <io.h>
+#endif
+
+/* section layout (all little endian):
+   32bit offset to executable/so file name
+     filename \0
+       function name \0
+       align to 64 bits
+       64bit function start line
+         64bits end_line(28bits) / start_line(28bits) / flag=0xff(8bits)
+	 64bits counter
+       \0
+     \0
+   \0
+   executable/so file name \0
+ */
+
+typedef struct tcov_line {
+    unsigned int fline;
+    unsigned int lline;
+    unsigned long long count;
+} tcov_line;
+
+typedef struct tcov_function {
+    char *function;
+    unsigned int first_line;
+    unsigned int n_line;
+    unsigned int m_line;
+    tcov_line *line;
+} tcov_function;
+
+typedef struct tcov_file {
+    char *filename;
+    unsigned int n_func;
+    unsigned int m_func;
+    tcov_function *func;
+    struct tcov_file *next;
+} tcov_file;
+
+static FILE *open_tcov_file (char *cov_filename)
+{
+    int fd;
+#ifndef _WIN32
+    struct flock lock;
+
+    lock.l_type = F_WRLCK;
+    lock.l_whence = SEEK_SET;
+    lock.l_start = 0;
+    lock.l_len = 0; /* Until EOF.  */
+    lock.l_pid = getpid ();
+#endif
+    fd = open (cov_filename, O_RDWR | O_CREAT, 0666);
+    if (fd < 0)
+	return NULL;
+  
+#ifndef _WIN32
+    while (fcntl (fd, F_SETLKW, &lock) && errno == EINTR)
+        continue;
+#else
+    {
+        OVERLAPPED overlapped = { 0 };
+        LockFileEx((HANDLE)_get_osfhandle(fd), LOCKFILE_EXCLUSIVE_LOCK,
+		   0, 1, 0, &overlapped);
+    }
+#endif
+
+    return fdopen (fd, "r+");
+}
+
+static unsigned long long get_value(unsigned char *p, int size)
+{
+    unsigned long long value = 0;
+
+    p += size;
+    while (size--)
+ 	value = (value << 8) | *--p;
+    return value;
+}
+
+static int sort_func (const void *p, const void *q)
+{
+    const tcov_function *pp = (const tcov_function *) p;
+    const tcov_function *pq = (const tcov_function *) q;
+
+    return pp->first_line > pq->first_line ? 1 :
+	   pp->first_line < pq->first_line ? -1 : 0;
+}
+
+static int sort_line (const void *p, const void *q)
+{
+    const tcov_line *pp = (const tcov_line *) p;
+    const tcov_line *pq = (const tcov_line *) q;
+
+    return pp->fline > pq->fline ? 1 :
+	   pp->fline < pq->fline ? -1 :
+           pp->count < pq->count ? 1 :
+	   pp->count > pq->count ? -1 : 0;
+}
+
+/* sort to let inline functions work */
+static tcov_file *sort_test_coverage (unsigned char *p)
+{
+    int i, j, k;
+    unsigned char *start = p;
+    tcov_file *file = NULL;
+    tcov_file *nfile;
+
+    p += 4;
+    while (*p) {
+        char *filename = (char *)p;
+	size_t len = strlen (filename);
+
+	nfile = file;
+	while (nfile) {
+	    if (strcmp (nfile->filename, filename) == 0)
+		break;
+	    nfile = nfile->next;
+	}
+	if (nfile == NULL) {
+	    nfile = malloc (sizeof(tcov_file));
+	    if (nfile == NULL) {
+	        fprintf (stderr, "Malloc error test_coverage\n");
+	        return file;
+    	    }
+	    nfile->filename = filename;
+	    nfile->n_func = 0;
+	    nfile->m_func = 0;
+	    nfile->func = NULL;
+	    nfile->next = NULL;
+	    if (file == NULL)
+	        file = nfile;
+	    else {
+		tcov_file *lfile = file;
+
+	        while (lfile->next)
+		    lfile = lfile->next;
+		lfile->next = nfile;
+	    }
+	}
+	p += len + 1;
+	while (*p) {
+	    int i;
+	    char *function = (char *)p;
+	    tcov_function *func;
+
+	    p += strlen (function) + 1;
+	    p += -(p - start) & 7;
+	    for (i = 0; i < nfile->n_func; i++) {
+		func = &nfile->func[i];
+		if (strcmp (func->function, function) == 0)
+		    break;
+	    }
+	    if (i == nfile->n_func) {
+	        if (nfile->n_func >= nfile->m_func) {
+		    nfile->m_func = nfile->m_func == 0 ? 4 : nfile->m_func * 2;
+		    nfile->func = realloc (nfile->func,
+					   nfile->m_func *
+					   sizeof (tcov_function));
+		    if (nfile->func == NULL) {
+		        fprintf (stderr, "Realloc error test_coverage\n");
+		        return file;
+		    }
+	        }
+	        func = &nfile->func[nfile->n_func++];
+	        func->function = function;
+	        func->first_line = get_value (p, 8);
+	        func->n_line = 0;
+	        func->m_line = 0;
+	        func->line = NULL;
+	    }
+	    p += 8;
+	    while (*p) {
+		tcov_line *line;
+		unsigned long long val;
+
+		if (func->n_line >= func->m_line) {
+		    func->m_line = func->m_line == 0 ? 4 : func->m_line * 2;
+		    func->line = realloc (func->line,
+					  func->m_line * sizeof (tcov_line));
+		    if (func->line == NULL) {
+		        fprintf (stderr, "Realloc error test_coverage\n");
+		        return file;
+		    }
+		}
+		line = &func->line[func->n_line++];
+		val = get_value (p, 8);
+	        line->fline = (val >> 8) & 0xfffffffULL;
+	        line->lline = val >> 36;
+	        line->count = get_value (p + 8, 8);
+	 	p += 16;
+	    }
+	    p++;
+	}
+	p++;
+    }
+    nfile = file;
+    while (nfile) {
+	qsort (nfile->func, nfile->n_func, sizeof (tcov_function), sort_func);
+	for (i = 0; i < nfile->n_func; i++) {
+	    tcov_function *func = &nfile->func[i];
+	    qsort (func->line, func->n_line, sizeof (tcov_line), sort_line);
+        }
+	nfile = nfile->next;
+    }
+    return file;
+}
+
+/* merge with previous tcov file */
+static void merge_test_coverage (tcov_file *file, FILE *fp,
+				 unsigned int *pruns)
+{
+    unsigned int runs;
+    char *p;
+    char str[10000];
+    
+    *pruns = 1;
+    if (fp == NULL)
+        return;
+    if (fgets(str, sizeof(str), fp) &&
+        (p = strrchr (str, ':')) &&
+        (sscanf (p + 1, "%u", &runs) == 1)) 
+        *pruns = runs + 1;
+    while (file) {
+	int i;
+	size_t len = strlen (file->filename);
+
+	while (fgets(str, sizeof(str), fp) &&
+	       (p = strstr(str, "0:File:")) == NULL) {}
+        if ((p = strstr(str, "0:File:")) == NULL ||
+	    strncmp (p + strlen("0:File:"), file->filename, len) != 0 ||
+	    p[strlen("0:File:") + len] != ' ')
+	    break;
+	for (i = 0; i < file->n_func; i++) {
+	    int j;
+	    tcov_function *func = &file->func[i];
+	    unsigned int next_zero = 0;
+	    unsigned int curline = 0;
+
+	    for (j = 0; j < func->n_line; j++) {
+		tcov_line *line = &func->line[j];
+	        unsigned int fline = line->fline;
+	        unsigned long long count;
+		unsigned int tmp;
+		char c;
+
+		while (curline < fline &&
+		       fgets(str, sizeof(str), fp))
+		    if ((p = strchr(str, ':')) &&
+			sscanf (p + 1, "%u", &tmp) == 1)
+			curline = tmp;
+		if (sscanf (str, "%llu%c\n", &count, &c) == 2) {
+		    if (next_zero == 0)
+		        line->count += count;
+		    next_zero = c == '*';
+		}
+	    }
+	}
+	file = file->next;
+    }
+}
+
+/* store tcov data in file */
+void __store_test_coverage (unsigned char * p)
+{
+    int i, j;
+    unsigned int files;
+    unsigned int funcs;
+    unsigned int blocks;
+    unsigned int blocks_run;
+    unsigned int runs;
+    char *cov_filename = (char *)p + get_value (p, 4);
+    FILE *fp;
+    char *q;
+    tcov_file *file;
+    tcov_file *nfile;
+    tcov_function *func;
+
+    fp = open_tcov_file (cov_filename);
+    if (fp == NULL) {
+	fprintf (stderr, "Cannot create coverage file: %s\n", cov_filename);
+	return;
+    }
+    file = sort_test_coverage (p);
+    merge_test_coverage (file, fp, &runs);
+    fseek (fp, 0, SEEK_SET);
+    fprintf (fp, "        -:    0:Runs:%u\n", runs);
+    files = 0;
+    funcs = 0;
+    blocks = 0;
+    blocks_run = 0;
+    nfile = file;
+    while (nfile) {
+	files++;
+	for (i = 0; i < nfile->n_func; i++) {
+	    func = &nfile->func[i];
+	    funcs++;
+	    for (j = 0; j < func->n_line; j++) {
+		blocks++;
+		blocks_run += func->line[j].count != 0;
+	    }
+	}
+	nfile = nfile->next;
+    }
+    if (blocks == 0)
+	blocks = 1;
+    fprintf (fp, "        -:    0:All:%s Files:%u Functions:%u %.02f%%\n",
+	     cov_filename, files, funcs, 100.0 * (double) blocks_run / blocks);
+    nfile = file;
+    while (nfile) {
+	FILE *src = fopen (nfile->filename, "r");
+	unsigned int curline = 1;
+	char str[10000];
+
+        if (src == NULL)
+	     goto next;
+	funcs = 0;
+	blocks = 0;
+	blocks_run = 0;
+	for (i = 0; i < nfile->n_func; i++) {
+	    func = &nfile->func[i];
+	    funcs++;
+	    for (j = 0; j < func->n_line; j++) {
+		blocks++;
+		blocks_run += func->line[j].count != 0;
+	    }
+	}
+	if (blocks == 0)
+	    blocks = 1;
+        fprintf (fp, "        -:    0:File:%s Functions:%u %.02f%%\n",
+		 nfile->filename, funcs, 100.0 * (double) blocks_run / blocks);
+        for (i = 0; i < nfile->n_func; i++) {
+	    func = &nfile->func[i];
+	
+	    while (curline < func->first_line &&
+		   fgets(str, sizeof(str), src))
+		fprintf (fp, "        -:%5u:%s", curline++, str);
+	    blocks = 0;
+	    blocks_run = 0;
+	    for (j = 0; j < func->n_line; j++) {
+		blocks++;
+		blocks_run += func->line[j].count != 0;
+	    }
+	    if (blocks == 0)
+		blocks = 1;
+            fprintf (fp, "        -:    0:Function:%s %.02f%%\n",
+		     func->function, 100.0 * (double) blocks_run / blocks);
+#if 0
+	    for (j = 0; j < func->n_line; j++) {
+	        unsigned int fline = func->line[j].fline;
+	        unsigned int lline = func->line[j].lline;
+		unsigned long long count = func->line[j].count;
+
+		fprintf (fp, "%u %u %llu\n", fline, lline, count);
+	    }
+#endif
+	    for (j = 0; j < func->n_line;) {
+	        unsigned int fline = func->line[j].fline;
+	        unsigned int lline = func->line[j].lline;
+	        unsigned long long count = func->line[j].count;
+		unsigned int has_zero = 0;
+		unsigned int same_line = fline == lline;
+
+		j++;
+		while (j < func->n_line) {
+	            unsigned int nfline = func->line[j].fline;
+	            unsigned int nlline = func->line[j].lline;
+	            unsigned long long ncount = func->line[j].count;
+
+		    if (fline == nfline) {
+			if (ncount == 0)
+			    has_zero = 1;
+			else if (ncount > count)
+			    count =  ncount;
+			same_line = nfline == nlline;
+			lline = nlline;
+			j++;
+		    }
+		    else
+			break;
+		}
+		if (same_line)
+		     lline++;
+
+	        while (curline < fline &&
+		       fgets(str, sizeof(str), src))
+		     fprintf (fp, "        -:%5u:%s", curline++, str);
+		while (curline < lline &&
+		       fgets(str, sizeof(str), src)) {
+		    if (count == 0)
+		        fprintf (fp, "    #####:%5u:%s",
+				 curline, str);
+		    else if (has_zero)
+		        fprintf (fp, "%8llu*:%5u:%s", 
+				 count, curline, str);
+		    else
+		        fprintf (fp, "%9llu:%5u:%s",
+				 count, curline, str);
+		    curline++;
+		}
+	    }
+	}
+	while (fgets(str, sizeof(str), src))
+	    fprintf (fp, "        -:%5u:%s", curline++, str);
+	fclose (src);
+next:
+	nfile = nfile->next;
+    }
+    while (file) {
+        for (i = 0; i < file->n_func; i++) {
+	    func = &file->func[i];
+	    free (func->line);
+        }
+	free (file->func);
+	nfile = file;
+	file = file->next;
+	free (nfile);
+    }
+    fclose (fp);
+}
--- a/src/userland/cli/third_party/tcc/lib/va_list.c
+++ b/src/userland/cli/third_party/tcc/lib/va_list.c
@@ -0,0 +1,67 @@
+/* va_list.c - tinycc support for va_list on X86_64 */
+
+#if defined __x86_64__
+
+/* Avoid include files, they may not be available when cross compiling */
+extern void abort(void);
+
+/* This should be in sync with our include/stdarg.h */
+enum __va_arg_type {
+    __va_gen_reg, __va_float_reg, __va_stack
+};
+
+/* GCC compatible definition of va_list. */
+/*predefined by TCC (tcc_predefs.h):
+typedef struct {
+    unsigned int gp_offset;
+    unsigned int fp_offset;
+    union {
+        unsigned int overflow_offset;
+        char *overflow_arg_area;
+    };
+    char *reg_save_area;
+} __builtin_va_list[1];
+*/
+
+extern void *memcpy(void *dest, const void *src, unsigned long n);
+
+void *__va_arg(__builtin_va_list ap,
+               int arg_type,
+               int size, int align)
+{
+    size = (size + 7) & ~7;
+    align = (align + 7) & ~7;
+    switch ((enum __va_arg_type)arg_type) {
+    case __va_gen_reg:
+        if (ap->gp_offset + size <= 48) {
+            ap->gp_offset += size;
+            return ap->reg_save_area + ap->gp_offset - size;
+        }
+        goto use_overflow_area;
+
+    case __va_float_reg:
+        if (ap->fp_offset < 128 + 48) {
+            ap->fp_offset += 16;
+            if (size == 8)
+                return ap->reg_save_area + ap->fp_offset - 16;
+            if (ap->fp_offset < 128 + 48) {
+                memcpy(ap->reg_save_area + ap->fp_offset - 8,
+                       ap->reg_save_area + ap->fp_offset, 8);
+                ap->fp_offset += 16;
+                return ap->reg_save_area + ap->fp_offset - 32;
+            }
+        }
+        goto use_overflow_area;
+
+    case __va_stack:
+    use_overflow_area:
+        ap->overflow_arg_area += size;
+        ap->overflow_arg_area = (char*)((long long)(ap->overflow_arg_area + align - 1) & -align);
+        return ap->overflow_arg_area - size;
+
+    default: /* should never happen */
+        abort();
+        return 0;
+    }
+}
+#endif
				`@@ -0,0 +1 @@`
				`void * __dso_handle __attribute((visibility("hidden"))) = &__dso_handle;`