# RISC-V assembly program implementing a calculator.

.org 0
# Provide program starting address to linker
.global _start

/* newlib system calls */
.set SYSEXIT,  93
.set SYSWRITE, 64

.section .rodata
str: .ascii "Hello World!\n"
     .set str_size, .-str

str2: .ascii "regdump\n"
      .set str2_size, .-str2

regnames:
  .ascii "x0", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5"

.section .bss
buf: .skip 20        # room for 20 byte string
                     # x10: ABCDEFG\n is 23 chars

.section .data
buflen: .byte 0      # length of buf string


.text
_start:
    li t1, 0
    li t2, 5

    # dummy test for jal instruction
.L1:
    jal x0, .L2
.L2:
    nop

loop:
    beq t1, t2, end

    li t0, SYSWRITE     # "write" syscall
    li a0, 1            # 1 = standard output (stdout)
    la a1, str          # load address of hello string
    li a2, str_size     # length of hello string
    ecall               # invoke syscall to print the string
    addi t1, t1, 1
    j loop

end:
    # do some adding
    li a0, 0x80000000   # 0.5
    li a1, 1            # 1
    li a2, 0x80000000   # 0.5
    li a3, 1            # 1
    jal add64
    #add t1, a0, 0       # copy a0 to t0 as end with overwrite it
    jal regdump

    li t0, SYSEXIT      # "exit" syscall
    add a0, x0, 0       # Use 0 return code
    ecall               # invoke syscall to terminate the program

# 64-bit integer addition
#   arguments:
#       a0: x lower 32 bits
#       a1: x upper 32 bits
#       a2: y lower 32 bits
#       a3: y upper 32 bits
#   return:
#       a0: x+y lower 32 bits
#       a1: x+y upper 32 bits
#
add64:
    add  a0, a0, a2  # add lower 32 bits
    add  t0, a1, a3  # add upper 32 bits
    sltu t1, a0, a2  # if lower 32-bit sum < a2 then set t1=1 (carry bit)
    add  a1, t0, t1  # upper 32 bits of answer (upper sum + carry bit)
    ret


# Print values of all registers (in hex)
# write syscall uses t0, a0, a1, a2
regdump:
    addi sp, sp, -16 # allocate stack space
    sw a0, 12(sp)    # save contents of a0
    sw a1, 8(sp)     # save contents of a0
    sw a2, 4(sp)     # save contents of a0
    sw t0, 0(sp)     # save contents of a0

    li t0, SYSWRITE     # "write" syscall
    li a0, 1            # 1 = standard output (stdout)
    la a1, str2         # load address of hello string
    li a2, str2_size    # length of other string
    ecall               # invoke syscall to print the string

    li s0, 0
    li s1, 16

regdump_loop:
    beq s0, s1, regdump_done
    la a1, regnames     # load address of regnames
    slli a2, s0, 1      # a2 = s0 * 2
    add a1, a1, a2      # a1 = a1 + a2

    # copy regname to buf
    la a0, buf          # load address of buf into a0 as dest
    li a2, 2            # copy 2 bytes

    addi sp, sp, -4  # allocate stack space
    sw ra, 0(sp)     # save contents of ra
    jal memcpy
    lw ra, 0(sp)     # save contents of ra
    addi sp, sp, 4   # deallocate stack space
    # append ': \n'
    li t0, ':
    sb t0, 0(a0)
    li t0, 0x20
    sb t0, 1(a0)
    li t0, '\n
    sb t0, 2(a0)

    # print the register name
    li t0, SYSWRITE     # "write" syscall
    li a0, 1            # 1 = standard output (stdout)
    la a1, buf
    li a2, 5            # length of register name string
    ecall               # invoke syscall to print the string

    addi s0, s0, 1
    j regdump_loop

regdump_done:
    addi sp, sp, 16   # deallocate stack space
    ret


# memcpy, copy n bytes of memory from src to dest
#   arguments:
#       a0: dest address
#       a1: source address
#       a2: number of bytes to copy
#   temporaries used:
#       t0, t1
#   return:
#       a0: address of dest + n
#       a1: address of src + n
#
memcpy:
  li t0, 0
memcpy_loop:
  # TODO: copy in chunks of 4 bytes if n > 4
  beq t0, a2, memcpy_done
  lbu t1, 0(a1)
  sb  t1, 0(a0)
  addi a0, a0, 1
  addi a1, a1, 1
  addi t0, t0, 1
  j memcpy_loop
memcpy_done:
  ret