# RISC-V assembly program implementing a calculator.

.org 0
# Provide program starting address to linker
.global _start

.extern tohex

/* newlib system calls */
.set SYSEXIT,  93
.set SYSWRITE, 64

.section .rodata
str: .ascii "Hello World!\n"
     .set str_size, .-str

str2: .ascii "regdump\n"
      .set str2_size, .-str2

regnames:
  .ascii "x0", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5"

.section .bss
buf: .skip 20        # room for 20 byte string
                     # x10: ABCDEFGH\n is 14 chars

.section .data
buflen: .byte 0      # length of buf string


.text
_start:
    li t1, 0
    li t2, 5

    # dummy test for jal instruction
.L1:
    jal x0, .L2
.L2:
    nop

loop:
    beq t1, t2, end

    li t0, SYSWRITE     # "write" syscall
    li a0, 1            # 1 = standard output (stdout)
    la a1, str          # load address of hello string
    li a2, str_size     # length of hello string
    ecall               # invoke syscall to print the string
    addi t1, t1, 1
    j loop

end:
    # do some adding
    li a0, 0x80000000   # 0.5
    li a1, 1            # 1
    li a2, 0x80000000   # 0.5
    li a3, 1            # 1
    # jal add64
    #add t1, a0, 0       # copy a0 to t0 as end with overwrite it
    jal regdump

    li t0, SYSEXIT      # "exit" syscall
    add a0, x0, 0       # Use 0 return code
    ecall               # invoke syscall to terminate the program

# 64-bit integer addition
#   arguments:
#       a0: x lower 32 bits
#       a1: x upper 32 bits
#       a2: y lower 32 bits
#       a3: y upper 32 bits
#   return:
#       a0: x+y lower 32 bits
#       a1: x+y upper 32 bits
#
add64:
    add  a0, a0, a2  # add lower 32 bits
    add  t0, a1, a3  # add upper 32 bits
    sltu t1, a0, a2  # if lower 32-bit sum < a2 then set t1=1 (carry bit)
    add  a1, t0, t1  # upper 32 bits of answer (upper sum + carry bit)
    ret


# Print values of all registers (in hex)
# write syscall uses t0, a0, a1, a2
regdump:
    # save all registers to preserve their state when entering the function
    # except sp, which will be offset by -64
    addi sp, sp, -64 # allocate stack space to stash all registers
    sw x0, 0(sp)
    sw x1, 4(sp)
    sw x2, 8(sp)
    sw x3, 12(sp)
    sw x4, 16(sp)
    sw x5, 20(sp)
    sw x6, 24(sp)
    sw x7, 28(sp)
    sw x8, 32(sp)
    sw x9, 36(sp)
    sw x10, 40(sp)
    sw x11, 44(sp)
    sw x12, 48(sp)
    sw x13, 52(sp)
    sw x14, 56(sp)
    sw x15, 60(sp)

    li t0, SYSWRITE     # "write" syscall
    li a0, 1            # 1 = standard output (stdout)
    la a1, str2         # load address of hello string
    li a2, str2_size    # length of other string
    ecall               # invoke syscall to print the string

    li s0, 0
    li s1, 16

regdump_loop:
    beq s0, s1, regdump_done
    la a1, regnames     # load address of regnames
    slli a2, s0, 1      # a2 = s0 * 2
    add a1, a1, a2      # a1 = a1 + a2

    # copy regname to buf
    la a0, buf          # load address of buf into a0 as dest
    li a2, 2            # copy 2 bytes
    addi sp, sp, -4     # allocate stack space
    sw ra, 0(sp)        # save contents of ra
    jal memcpy
    lw ra, 0(sp)        # restore contents of ra
    addi sp, sp, 4      # deallocate stack space
    # append ': '
    li t0, ':
    sb t0, 0(a0)
    li t0, 0x20
    sb t0, 1(a0)
    addi a0, a0, 2      # bump a0 address over ': ' text
    mv a1, a0           # load address of output buffer to a1
    slli a2, s0, 2      # calculate the offset of the register value relative to sp
    add a2, a2, sp      # a2 = sp + (s0 * 4)
    lw a0, 0(a2)        # load register value to format into a0
    addi sp, sp, -8     # allocate stack space
    sw a1, 4(sp)        # save contents of a1
    sw ra, 0(sp)        # save contents of ra
    jal tohex
    lw ra, 0(sp)        # restore contents of ra
    lw a1, 4(sp)        # restore contents of a1 (buffer)
    addi sp, sp, 8      # deallocate stack space
    li t0, '\n          # append newline
    sb t0, 8(a1)

    # print the register name and value
    li t0, SYSWRITE     # "write" syscall
    li a0, 1            # 1 = standard output (stdout)
    la a1, buf
    li a2, 4+8+1        # length of register name string:
                        # rr: 12345678\n
    ecall               # invoke syscall to print the string

    addi s0, s0, 1
    j regdump_loop

regdump_done:
    # TODO: Restore s0 & s1
    addi sp, sp, 64   # deallocate stack space
    ret


# memcpy, copy n bytes of memory from src to dest
#   arguments:
#       a0: dest address
#       a1: source address
#       a2: number of bytes to copy
#   temporaries used:
#       t0, t1
#   return:
#       a0: address of dest + n
#       a1: address of src + n
#
memcpy:
  li t0, 0
memcpy_loop:
  # TODO: copy in chunks of 4 bytes if n > 4
  beq t0, a2, memcpy_done
  lbu t1, 0(a1)
  sb  t1, 0(a0)
  addi a0, a0, 1
  addi a1, a1, 1
  addi t0, t0, 1
  j memcpy_loop
memcpy_done:
  ret