diff --git a/Makefile b/Makefile
index 6c1eb51..0e7d818 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@ ASFLAGS=-g -mabi=ilp32e -march=rv32ec
 CFLAGS=$(ASFLAGS)
 LD=riscv64-unknown-elf-ld
 export JQ?=jaq
-export RV32EMU?=$(HOME)/Source/github.com/sysprog21/rv32emu/build/rv32emu
+export QEMU?=qemu-riscv32
 
 all: calc.elf
 
@@ -14,12 +14,12 @@ check: tests
 hello.elf: hello.o
 	$(LD) -m elf32lriscv $^ -o $@
 
-calc.elf: hex.o calc.o
+calc.elf: mem.o hex.o debug.o calc.o
 	$(LD) -m elf32lriscv -T link.ld $^ -o $@
 
 tests: tests/btohex.elf tests/tohex.elf
 
-tests/btohex.elf: hex.o tests/btohex.o
+tests/btohex.elf: mem.o hex.o debug.o tests/btohex.o
 	$(LD) -m elf32lriscv -T link.ld $^ -o $@
 
 tests/tohex.elf: hex.o tests/tohex.o
diff --git a/calc.s b/calc.s
index 22b58df..cff8a62 100644
--- a/calc.s
+++ b/calc.s
@@ -4,59 +4,24 @@
 # Provide program starting address to linker
 .global _start
 
-.extern tohex
+.extern regdump
 
 /* newlib system calls */
 .set SYSEXIT,  93
 .set SYSWRITE, 64
 
-.section .rodata
-str: .ascii "Hello World!\n"
-     .set str_size, .-str
-
-str2: .ascii "regdump\n"
-      .set str2_size, .-str2
-
-regnames:
-  .ascii "x0", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5"
-
-.section .bss
-buf: .skip 20        # room for 20 byte string
-                     # x10: ABCDEFGH\n is 14 chars
-
-.section .data
-buflen: .byte 0      # length of buf string
+# .section .rodata
 
+# .section .bss
 
 .text
 _start:
-    li t1, 0
-    li t2, 5
-
-    # dummy test for jal instruction
-.L1:
-    jal x0, .L2
-.L2:
-    nop
-
-loop:
-    beq t1, t2, end
-
-    li t0, SYSWRITE     # "write" syscall
-    li a0, 1            # 1 = standard output (stdout)
-    la a1, str          # load address of hello string
-    li a2, str_size     # length of hello string
-    ecall               # invoke syscall to print the string
-    addi t1, t1, 1
-    j loop
-
-end:
     # do some adding
     li a0, 0x80000000   # 0.5
     li a1, 1            # 1
     li a2, 0x80000000   # 0.5
     li a3, 1            # 1
-    # jal add64
+    jal add64
     #add t1, a0, 0       # copy a0 to t0 as end with overwrite it
     jal regdump
 
@@ -82,110 +47,3 @@ add64:
     ret
 
 
-# Print values of all registers (in hex)
-# write syscall uses t0, a0, a1, a2
-regdump:
-    # save all registers to preserve their state when entering the function
-    # except sp, which will be offset by -64
-    addi sp, sp, -64 # allocate stack space to stash all registers
-    sw x0, 0(sp)
-    sw x1, 4(sp)
-    sw x2, 8(sp)
-    sw x3, 12(sp)
-    sw x4, 16(sp)
-    sw x5, 20(sp)
-    sw x6, 24(sp)
-    sw x7, 28(sp)
-    sw x8, 32(sp)
-    sw x9, 36(sp)
-    sw x10, 40(sp)
-    sw x11, 44(sp)
-    sw x12, 48(sp)
-    sw x13, 52(sp)
-    sw x14, 56(sp)
-    sw x15, 60(sp)
-
-    li t0, SYSWRITE     # "write" syscall
-    li a0, 1            # 1 = standard output (stdout)
-    la a1, str2         # load address of hello string
-    li a2, str2_size    # length of other string
-    ecall               # invoke syscall to print the string
-
-    li s0, 0
-    li s1, 16
-
-regdump_loop:
-    beq s0, s1, regdump_done
-    la a1, regnames     # load address of regnames
-    slli a2, s0, 1      # a2 = s0 * 2
-    add a1, a1, a2      # a1 = a1 + a2
-
-    # copy regname to buf
-    la a0, buf          # load address of buf into a0 as dest
-    li a2, 2            # copy 2 bytes
-    addi sp, sp, -4     # allocate stack space
-    sw ra, 0(sp)        # save contents of ra
-    jal memcpy
-    lw ra, 0(sp)        # restore contents of ra
-    addi sp, sp, 4      # deallocate stack space
-    # append ': '
-    li t0, ':
-    sb t0, 0(a0)
-    li t0, 0x20
-    sb t0, 1(a0)
-    addi a0, a0, 2      # bump a0 address over ': ' text
-    mv a1, a0           # load address of output buffer to a1
-    slli a2, s0, 2      # calculate the offset of the register value relative to sp
-    add a2, a2, sp      # a2 = sp + (s0 * 4)
-    lw a0, 0(a2)        # load register value to format into a0
-    addi sp, sp, -8     # allocate stack space
-    sw a1, 4(sp)        # save contents of a1
-    sw ra, 0(sp)        # save contents of ra
-    jal tohex
-    lw ra, 0(sp)        # restore contents of ra
-    lw a1, 4(sp)        # restore contents of a1 (buffer)
-    addi sp, sp, 8      # deallocate stack space
-    li t0, '\n          # append newline
-    sb t0, 8(a1)
-
-    # print the register name and value
-    li t0, SYSWRITE     # "write" syscall
-    li a0, 1            # 1 = standard output (stdout)
-    la a1, buf
-    li a2, 4+8+1        # length of register name string:
-                        # rr: 12345678\n
-    ecall               # invoke syscall to print the string
-
-    addi s0, s0, 1
-    j regdump_loop
-
-regdump_done:
-    # TODO: Restore s0 & s1
-    addi sp, sp, 64   # deallocate stack space
-    ret
-
-
-# memcpy, copy n bytes of memory from src to dest
-#   arguments:
-#       a0: dest address
-#       a1: source address
-#       a2: number of bytes to copy
-#   temporaries used:
-#       t0, t1
-#   return:
-#       a0: address of dest + n
-#       a1: address of src + n
-#
-memcpy:
-  li t0, 0
-memcpy_loop:
-  # TODO: copy in chunks of 4 bytes if n > 4
-  beq t0, a2, memcpy_done
-  lbu t1, 0(a1)
-  sb  t1, 0(a0)
-  addi a0, a0, 1
-  addi a1, a1, 1
-  addi t0, t0, 1
-  j memcpy_loop
-memcpy_done:
-  ret
diff --git a/debug.s b/debug.s
new file mode 100644
index 0000000..90838e7
--- /dev/null
+++ b/debug.s
@@ -0,0 +1,93 @@
+# Debugging routines
+
+.extern tohex
+.extern memcpy
+
+.global regdump
+
+/* newlib system calls */
+.set SYSWRITE, 64
+
+.section .rodata
+regnames:
+  .ascii "x0", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5"
+
+.section .bss
+buf: .skip 15        # room for 20 byte string
+                     # x10: ABCDEFGH\n is 14 chars
+.text
+# Print values of all registers (in hex)
+# write syscall uses t0, a0, a1, a2
+regdump:
+    # save all registers to preserve their state when entering the function
+    # except sp, which will be offset by -64
+    addi sp, sp, -64 # allocate stack space to stash all registers
+    sw x0, 0(sp)
+    sw x1, 4(sp)
+    sw x2, 8(sp)
+    sw x3, 12(sp)
+    sw x4, 16(sp)
+    sw x5, 20(sp)
+    sw x6, 24(sp)
+    sw x7, 28(sp)
+    sw x8, 32(sp)
+    sw x9, 36(sp)
+    sw x10, 40(sp)
+    sw x11, 44(sp)
+    sw x12, 48(sp)
+    sw x13, 52(sp)
+    sw x14, 56(sp)
+    sw x15, 60(sp)
+
+    li s0, 0
+    li s1, 16
+
+regdump_loop:
+    beq s0, s1, regdump_done
+    la a1, regnames     # load address of regnames
+    slli a2, s0, 1      # a2 = s0 * 2
+    add a1, a1, a2      # a1 = a1 + a2
+
+    # copy regname to buf
+    la a0, buf          # load address of buf into a0 as dest
+    li a2, 2            # copy 2 bytes
+    addi sp, sp, -4     # allocate stack space
+    sw ra, 0(sp)        # save contents of ra
+    jal memcpy
+    lw ra, 0(sp)        # restore contents of ra
+    addi sp, sp, 4      # deallocate stack space
+    # append ': '
+    li t0, ':
+    sb t0, 0(a0)
+    li t0, 0x20
+    sb t0, 1(a0)
+    addi a0, a0, 2      # bump a0 address over ': ' text
+    mv a1, a0           # load address of output buffer to a1
+    slli a2, s0, 2      # calculate the offset of the register value relative to sp
+    add a2, a2, sp      # a2 = sp + (s0 * 4)
+    lw a0, 0(a2)        # load register value to format into a0
+    addi sp, sp, -8     # allocate stack space
+    sw a1, 4(sp)        # save contents of a1
+    sw ra, 0(sp)        # save contents of ra
+    jal tohex
+    lw ra, 0(sp)        # restore contents of ra
+    lw a1, 4(sp)        # restore contents of a1 (buffer)
+    addi sp, sp, 8      # deallocate stack space
+    li t0, '\n          # append newline
+    sb t0, 8(a1)
+
+    # print the register name and value
+    li t0, SYSWRITE     # "write" syscall
+    li a0, 1            # 1 = standard output (stdout)
+    la a1, buf
+    li a2, 4+8+1        # length of register name string:
+                        # rr: 12345678\n
+    ecall               # invoke syscall to print the string
+
+    addi s0, s0, 1
+    j regdump_loop
+
+regdump_done:
+    # TODO: Restore s0 & s1
+    addi sp, sp, 64   # deallocate stack space
+    ret
diff --git a/mem.s b/mem.s
new file mode 100644
index 0000000..445a6b1
--- /dev/null
+++ b/mem.s
@@ -0,0 +1,27 @@
+.global memcpy
+
+.text
+# memcpy, copy n bytes of memory from src to dest
+#   arguments:
+#       a0: dest address
+#       a1: source address
+#       a2: number of bytes to copy
+#   temporaries used:
+#       t0, t1
+#   return:
+#       a0: address of dest + n
+#       a1: address of src + n
+#
+memcpy:
+  li t0, 0
+memcpy_loop:
+  # TODO: copy in chunks of 4 bytes if n > 4
+  beq t0, a2, memcpy_done
+  lbu t1, 0(a1)
+  sb  t1, 0(a0)
+  addi a0, a0, 1
+  addi a1, a1, 1
+  addi t0, t0, 1
+  j memcpy_loop
+memcpy_done:
+  ret
diff --git a/tests/btohex.s b/tests/btohex.s
index 22a9adf..8ab320b 100644
--- a/tests/btohex.s
+++ b/tests/btohex.s
@@ -5,6 +5,7 @@
 .global _start
 
 .extern btohex
+.extern regdump
 
 /* newlib system calls */
 .set SYSEXIT,  93
@@ -13,7 +14,7 @@
 _start:
     li a0, 0xA5
     jal btohex
-    mv a1, a0
+    jal regdump
 
     li t0, SYSEXIT      # "exit" syscall
     la a0, 0            # Use 0 return code
diff --git a/tests/test_hex.sh b/tests/test_hex.sh
index 3340d10..ce47338 100644
--- a/tests/test_hex.sh
+++ b/tests/test_hex.sh
@@ -3,16 +3,14 @@
 test_btohex() {
   # FIXME: Remove grep when this bug is fixed:
   # https://github.com/sysprog21/rv32emu/issues/561
-  result=$("${RV32EMU}" -d - -q tests/btohex.elf | grep -v '^[0-9]' | "${JQ}" .x11)
+  result=$("${QEMU}" -B 0x80000000 -s 2k tests/btohex.elf | grep '^a0:')
 
-   # 16693 is 5A in ASCII (reversed from input due to little endian)
-  test $? -eq 0 && test "${result}" -eq 13633
+   # 3541 is 5A in ASCII (reversed from input due to little endian)
+  test $? -eq 0 && test "${result}" = "a0: 00003541"
 }
 
 test_tohex() {
-  # FIXME: Remove grep when this bug is fixed:
-  # https://github.com/sysprog21/rv32emu/issues/561
-  result=$("${RV32EMU}" -q tests/tohex.elf | grep -v '^[0-9]')
+  result=$("${QEMU}" -B 0x80000000 -s 2k tests/tohex.elf)
 
   test $? -eq 0 && test "${result}" = "CAFEFEED"
 }