Implement mul, div, divmod, clz
This commit is contained in:
parent
10bafd8485
commit
b3605f4a04
9 changed files with 545 additions and 7 deletions
19
Makefile
19
Makefile
|
@ -6,6 +6,11 @@ LD=riscv64-unknown-elf-ld
|
|||
export JQ?=jaq
|
||||
export QEMU?=qemu-riscv32
|
||||
|
||||
TEST_SRC := $(shell find tests -name '*.s')
|
||||
|
||||
# Replace .s with .elf
|
||||
TESTS := $(patsubst %.s,%.elf,$(TEST_SRC))
|
||||
|
||||
all: calc.elf
|
||||
|
||||
check: tests
|
||||
|
@ -20,7 +25,7 @@ hello.elf: hello.o
|
|||
calc.elf: mem.o hex.o debug.o math.o calc.o
|
||||
$(LD) -m elf32lriscv -T link.ld $^ -o $@
|
||||
|
||||
tests: tests/btohex.elf tests/tohex.elf tests/math_add64.elf tests/math_mul.elf
|
||||
tests: $(TESTS)
|
||||
|
||||
tests/math_add64.elf: hex.o math.o tests/math_add64.o
|
||||
$(LD) -m elf32lriscv -T link.ld $^ -o $@
|
||||
|
@ -28,6 +33,18 @@ tests/math_add64.elf: hex.o math.o tests/math_add64.o
|
|||
tests/math_mul.elf: hex.o math.o tests/math_mul.o
|
||||
$(LD) -m elf32lriscv -T link.ld $^ -o $@
|
||||
|
||||
tests/math_div.elf: hex.o math.o tests/math_div.o
|
||||
$(LD) -m elf32lriscv -T link.ld $^ -o $@
|
||||
|
||||
tests/math_divmod.elf: hex.o math.o tests/math_divmod.o
|
||||
$(LD) -m elf32lriscv -T link.ld $^ -o $@
|
||||
|
||||
tests/math_mod.elf: hex.o math.o tests/math_mod.o
|
||||
$(LD) -m elf32lriscv -T link.ld $^ -o $@
|
||||
|
||||
tests/math_clz.elf: hex.o math.o tests/math_clz.o
|
||||
$(LD) -m elf32lriscv -T link.ld $^ -o $@
|
||||
|
||||
tests/btohex.elf: mem.o hex.o debug.o tests/btohex.o
|
||||
$(LD) -m elf32lriscv -T link.ld $^ -o $@
|
||||
|
||||
|
|
127
math.s
127
math.s
|
@ -1,5 +1,8 @@
|
|||
.globl add64
|
||||
.globl mul
|
||||
.globl div
|
||||
.globl divmod
|
||||
.globl clz
|
||||
|
||||
.text
|
||||
|
||||
|
@ -36,17 +39,129 @@ mul:
|
|||
mv t0, a1 # Save multiplier in t0
|
||||
li a1, 0 # Initialize product in a1
|
||||
|
||||
multiply_loop:
|
||||
beqz t0, done # If multiplier is 0, we're done
|
||||
.multiply_loop:
|
||||
beqz t0, .done # If multiplier is 0, we're done
|
||||
andi t1, t0, 1 # Check least significant bit
|
||||
beqz t1, shift # If LSB is 0, skip addition
|
||||
beqz t1, .shift # If LSB is 0, skip addition
|
||||
add a1, a1, a0 # Add multiplicand to product
|
||||
|
||||
shift:
|
||||
.shift:
|
||||
slli a0, a0, 1 # Shift multiplicand left
|
||||
srli t0, t0, 1 # Shift multiplier right
|
||||
j multiply_loop # Continue loop
|
||||
j .multiply_loop # Continue loop
|
||||
|
||||
done:
|
||||
.done:
|
||||
mv a0, a1 # Move product to return register
|
||||
ret
|
||||
|
||||
# 32-bit shift-subtract integer division
|
||||
# arguments:
|
||||
# a0: dividend, u
|
||||
# a1: divisor, v
|
||||
# return:
|
||||
# a0 = a0 ÷ a1
|
||||
#
|
||||
# https://blog.segger.com/algorithms-for-division-part-2-classics/
|
||||
div:
|
||||
bltu a0, a1, .zero # if (u < v) return 0
|
||||
addi sp, sp, -16
|
||||
sw s1, 4(sp)
|
||||
mv s1, a0
|
||||
mv a0, a1
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
mv s0, a1
|
||||
jal clz # clz(u)
|
||||
sw a0, 0(sp)
|
||||
mv a0, s1
|
||||
jal clz # clz(v)
|
||||
lw a5, 0(sp)
|
||||
sub a5, a5, a0 # k = clz(v) - clz(u); Calculate number of quotient digits - 1
|
||||
sll a1, s0, a5 # v <<= k; Normalize divisor
|
||||
li a0, 0 # q = 0; Init quotient
|
||||
|
||||
# Iterate k+1 times, each iteration developing one quotient bit.
|
||||
.loop:
|
||||
slli a0, a0, 1 # q <<= 1; Record preliminary '0' quotient digit
|
||||
bltu s1, a1, .skip # if (u >= v) Subtraction will succeed...
|
||||
sub s1, s1, a1 # u -= v;
|
||||
addi a0, a0, 1 # q += 1; Turn preliminary '0' quotient digit to '1'
|
||||
.skip:
|
||||
addi a5, a5, -1 # k -= 1;
|
||||
srli a1, a1, 1 # v >>= 1;
|
||||
bgez a5, .loop # while (k >= 0);
|
||||
|
||||
lw ra, 12(sp)
|
||||
lw s0, 8(sp)
|
||||
lw s1, 4(sp)
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
.zero:
|
||||
li a0, 0
|
||||
ret
|
||||
|
||||
# 32-bit integer division with modulus (remainder)
|
||||
# arguments:
|
||||
# a0: dividend, u
|
||||
# a1: divisor, v
|
||||
# return:
|
||||
# a0 = a0 ÷ a1
|
||||
# a1 = remainder
|
||||
divmod:
|
||||
# call div; multiply quotient by divisor; subtract that from dividend
|
||||
addi sp, sp, -16
|
||||
sw ra, 12(sp)
|
||||
sw s0, 8(sp)
|
||||
sw s1, 4(sp)
|
||||
mv s0, a0 # save dividend
|
||||
mv s1, a1 # save divisor
|
||||
jal div # a0 = a0 ÷ a1
|
||||
sw a0, 0(sp)
|
||||
mv a1, a0 # a1 = quotient
|
||||
mv a0, s1 # a0 = divisor
|
||||
jal mul # a0 = divisor × quotient
|
||||
lw ra, 12(sp)
|
||||
sub a1, s0, a0 # a1 = dividend - product; remainder
|
||||
lw s0, 8(sp)
|
||||
lw s1, 4(sp)
|
||||
lw a0, 0(sp) # a0 = quotient
|
||||
addi sp, sp, 16
|
||||
ret
|
||||
|
||||
# count leading zero bits
|
||||
# arguments:
|
||||
# a0: input
|
||||
# return:
|
||||
# a0 = count of leading zero bits
|
||||
#
|
||||
# binary search approach translated from C code on
|
||||
# https://blog.stephencleary.com/2010/10/implementing-gccs-builtin-functions.html
|
||||
clz:
|
||||
li a4, 16 # initialise count of zeros to 16
|
||||
srli a5, a0, 16 # shift value right 16 bits
|
||||
bne a5, zero, .eight # if the result is != 0 we have up to 16 leading zeros
|
||||
mv a5, a0 # restore unshifted value to a5
|
||||
li a4, 32 # we have up to 32 leading zeros
|
||||
.eight:
|
||||
srli a3, a5, 8 # shift the value right 8 bits
|
||||
beq a3, zero, .four
|
||||
addi a4, a4, -8 # subtract 8 leading zeros if shift result was non-zero
|
||||
mv a5, a3
|
||||
.four:
|
||||
srli a3, a5, 4 # shift the value right 4 bits
|
||||
beq a3, zero, .two
|
||||
addi a4, a4, -4 # subtract 4 leading zeros if shift result was non-zero
|
||||
mv a5, a3
|
||||
.two:
|
||||
srli a3, a5, 2 # shift the value right 2 bits
|
||||
beq a3, zero, .one
|
||||
addi a4, a4, -2 # subtract 2 leading zeros if shift result was non-zero
|
||||
mv a5, a3
|
||||
.one:
|
||||
srli a3, a5, 1 # shift the value right 1 bit
|
||||
sub a0, a4, a5 # a0 = count - remaining value
|
||||
beq a3, zero, .end # if shift result was zero, return a0
|
||||
addi a0, a4, -2 # subtract 2 leading zeros if shift result was non-zero
|
||||
.end:
|
||||
ret
|
||||
|
|
|
@ -23,6 +23,16 @@ inputs:
|
|||
.word 1 # 1
|
||||
.word 0x80000000 # 0.5
|
||||
.word 2 # 2
|
||||
|
||||
.word 0x40000000 # 0.25
|
||||
.word -1 # -1
|
||||
.word 0x80000000 # 0.5
|
||||
.word 2 # 2
|
||||
|
||||
.word 0x40000000 # 0.25
|
||||
.word 1 # 1
|
||||
.word 0x80000000 # 0.5
|
||||
.word -2 # -2
|
||||
inputs_end:
|
||||
# llvm doesn't like this: error: expected relocatable expression
|
||||
#.set inputs_end, .-inputs
|
||||
|
|
63
tests/math_clz.s
Normal file
63
tests/math_clz.s
Normal file
|
@ -0,0 +1,63 @@
|
|||
# Test for clz
|
||||
|
||||
.org 0
|
||||
# Provide program starting address to linker
|
||||
.global _start
|
||||
|
||||
.extern clz
|
||||
.extern tohex
|
||||
|
||||
/* newlib system calls */
|
||||
.set SYSEXIT, 93
|
||||
.set SYSWRITE, 64
|
||||
|
||||
.section .rodata
|
||||
|
||||
inputs:
|
||||
.word 0
|
||||
.word 1
|
||||
.word 2
|
||||
.word 3
|
||||
.word 0xF
|
||||
.word 0xFF
|
||||
.word 0xFFF
|
||||
.word 0xFFFF
|
||||
.word 0xFFFFF
|
||||
.word 0xFFFFFF
|
||||
.word 0xFFFFFFF
|
||||
.word 0xFFFFFFFF
|
||||
.word 0x80000000
|
||||
.word 0x00008000
|
||||
inputs_end:
|
||||
|
||||
.section .bss
|
||||
|
||||
buf: .skip 9
|
||||
|
||||
.text
|
||||
|
||||
_start:
|
||||
li a0, '\n'
|
||||
la a1, buf
|
||||
sb a0, 8(a1) # append newline to buf
|
||||
|
||||
la s0, inputs # init loop variables
|
||||
la s1, inputs_end
|
||||
loop:
|
||||
lw a0, 0(s0) # input value
|
||||
jal clz
|
||||
la a1, buf
|
||||
jal tohex
|
||||
|
||||
li t0, SYSWRITE # "write" syscall
|
||||
li a0, 1 # 1 = standard output (stdout)
|
||||
la a1, buf # load address of output string
|
||||
li a2, 9 # length of output string
|
||||
ecall # invoke syscall to print the string
|
||||
|
||||
addi s0, s0, 4 # increment input pointer to the next input
|
||||
bltu s0, s1, loop # if the input address is less than inputs_end, loop
|
||||
|
||||
li t0, SYSEXIT # "exit" syscall
|
||||
la a0, 0 # Use 0 return code
|
||||
ecall # invoke syscall to terminate the program
|
62
tests/math_div.s
Normal file
62
tests/math_div.s
Normal file
|
@ -0,0 +1,62 @@
|
|||
# Test for div
|
||||
|
||||
.org 0
|
||||
# Provide program starting address to linker
|
||||
.global _start
|
||||
|
||||
.extern div
|
||||
.extern tohex
|
||||
|
||||
/* newlib system calls */
|
||||
.set SYSEXIT, 93
|
||||
.set SYSWRITE, 64
|
||||
|
||||
.section .rodata
|
||||
|
||||
inputs:
|
||||
.word 3
|
||||
.word 5
|
||||
|
||||
.word 21
|
||||
.word 3
|
||||
|
||||
.word 0xFFFF
|
||||
.word 100
|
||||
|
||||
.word -7
|
||||
.word 3
|
||||
inputs_end:
|
||||
|
||||
.section .bss
|
||||
|
||||
buf: .skip 9
|
||||
|
||||
.text
|
||||
|
||||
_start:
|
||||
li a0, '\n'
|
||||
la a1, buf
|
||||
sb a0, 8(a1) # append newline to buf
|
||||
|
||||
la s0, inputs # init loop variables
|
||||
la s1, inputs_end
|
||||
loop:
|
||||
lw a0, 0(s0) # dividend
|
||||
lw a1, 4(s0) # divisor
|
||||
jal div
|
||||
# TODO: Format as an actual decimal
|
||||
la a1, buf
|
||||
jal tohex
|
||||
|
||||
li t0, SYSWRITE # "write" syscall
|
||||
li a0, 1 # 1 = standard output (stdout)
|
||||
la a1, buf # load address of output string
|
||||
li a2, 9 # length of output string
|
||||
ecall # invoke syscall to print the string
|
||||
|
||||
addi s0, s0, 8 # increment input pointer to next pair of inputs
|
||||
bltu s0, s1, loop # if the input address is less than inputs_end, loop
|
||||
|
||||
li t0, SYSEXIT # "exit" syscall
|
||||
la a0, 0 # Use 0 return code
|
||||
ecall # invoke syscall to terminate the program
|
83
tests/math_divmod.s
Normal file
83
tests/math_divmod.s
Normal file
|
@ -0,0 +1,83 @@
|
|||
# Test for divmod
|
||||
|
||||
.org 0
|
||||
# Provide program starting address to linker
|
||||
.global _start
|
||||
|
||||
.extern div
|
||||
.extern tohex
|
||||
|
||||
/* newlib system calls */
|
||||
.set SYSEXIT, 93
|
||||
.set SYSWRITE, 64
|
||||
|
||||
.section .rodata
|
||||
|
||||
inputs:
|
||||
.word 3
|
||||
.word 5
|
||||
|
||||
.word 21
|
||||
.word 3
|
||||
|
||||
.word 0xFFFF
|
||||
.word 100
|
||||
|
||||
# Division of this one doesn't work yet
|
||||
# .word -7
|
||||
# .word 3
|
||||
inputs_end:
|
||||
|
||||
.section .bss
|
||||
|
||||
buf: .skip 9
|
||||
|
||||
.text
|
||||
|
||||
_start:
|
||||
li a0, '\n'
|
||||
la a1, buf
|
||||
sb a0, 8(a1) # append newline to buf
|
||||
|
||||
la s0, inputs # init loop variables
|
||||
loop:
|
||||
lw a0, 0(s0) # dividend
|
||||
lw a1, 4(s0) # divisor
|
||||
jal divmod
|
||||
mv s1, a1 # save remainder
|
||||
|
||||
# print quotient
|
||||
la a1, buf
|
||||
jal tohex
|
||||
li t0, SYSWRITE # "write" syscall
|
||||
li a0, 1 # 1 = standard output (stdout)
|
||||
la a1, buf # load address of output string
|
||||
li a2, 8 # length of output string
|
||||
ecall # invoke syscall to print the string
|
||||
|
||||
# print ,
|
||||
li t0, SYSWRITE # "write" syscall
|
||||
li a0, 1 # 1 = standard output (stdout)
|
||||
la a1, buf # load address of output string
|
||||
li a2, ','
|
||||
sb a2, 0(a1)
|
||||
li a2, 1 # length of output string
|
||||
ecall # invoke syscall to print the string
|
||||
|
||||
# print remainder
|
||||
mv a0, s1 # restore remainder
|
||||
la a1, buf
|
||||
jal tohex
|
||||
li t0, SYSWRITE # "write" syscall
|
||||
li a0, 1 # 1 = standard output (stdout)
|
||||
la a1, buf # load address of output string
|
||||
li a2, 9 # length of output string
|
||||
ecall # invoke syscall to print the string
|
||||
|
||||
addi s0, s0, 8 # increment input pointer to next pair of inputs
|
||||
la a0, inputs_end
|
||||
bltu s0, a0, loop # if the input address is less than inputs_end, loop
|
||||
|
||||
li t0, SYSEXIT # "exit" syscall
|
||||
la a0, 0 # Use 0 return code
|
||||
ecall # invoke syscall to terminate the program
|
66
tests/math_mod.s
Normal file
66
tests/math_mod.s
Normal file
|
@ -0,0 +1,66 @@
|
|||
# Test for mul
|
||||
|
||||
.org 0
|
||||
# Provide program starting address to linker
|
||||
.global _start
|
||||
|
||||
.extern mul
|
||||
.extern tohex
|
||||
|
||||
/* newlib system calls */
|
||||
.set SYSEXIT, 93
|
||||
.set SYSWRITE, 64
|
||||
|
||||
.section .rodata
|
||||
|
||||
inputs:
|
||||
.word 3
|
||||
.word 5
|
||||
|
||||
.word 21
|
||||
.word 3
|
||||
|
||||
.word 0xFFFF
|
||||
.word 100
|
||||
|
||||
.word -7
|
||||
.word 3
|
||||
inputs_end:
|
||||
# llvm doesn't like this: error: expected relocatable expression
|
||||
#.set inputs_end, .-inputs
|
||||
# turns out it was right. That was calculating a length, which was
|
||||
# incorrect for how it was used for looping.
|
||||
|
||||
.section .bss
|
||||
|
||||
buf: .skip 9
|
||||
|
||||
.text
|
||||
|
||||
_start:
|
||||
li a0, '\n'
|
||||
la a1, buf
|
||||
sb a0, 8(a1) # append newline to buf
|
||||
|
||||
la s0, inputs # init loop variables
|
||||
la s1, inputs_end
|
||||
loop:
|
||||
lw a0, 0(s0) # multiplicand
|
||||
lw a1, 4(s0) # multiplier
|
||||
jal mul
|
||||
# TODO: Format as an actual decimal
|
||||
la a1, buf
|
||||
jal tohex
|
||||
|
||||
li t0, SYSWRITE # "write" syscall
|
||||
li a0, 1 # 1 = standard output (stdout)
|
||||
la a1, buf # load address of output string
|
||||
li a2, 9 # length of output string
|
||||
ecall # invoke syscall to print the string
|
||||
|
||||
addi s0, s0, 8 # increment input pointer to next pair of 64-bit inputs
|
||||
bltu s0, s1, loop # if the input address is less than inputs_end, loop
|
||||
|
||||
li t0, SYSEXIT # "exit" syscall
|
||||
la a0, 0 # Use 0 return code
|
||||
ecall # invoke syscall to terminate the program
|
66
tests/math_mul.s
Normal file
66
tests/math_mul.s
Normal file
|
@ -0,0 +1,66 @@
|
|||
# Test for mul
|
||||
|
||||
.org 0
|
||||
# Provide program starting address to linker
|
||||
.global _start
|
||||
|
||||
.extern mul
|
||||
.extern tohex
|
||||
|
||||
/* newlib system calls */
|
||||
.set SYSEXIT, 93
|
||||
.set SYSWRITE, 64
|
||||
|
||||
.section .rodata
|
||||
|
||||
inputs:
|
||||
.word 3
|
||||
.word 5
|
||||
|
||||
.word 21
|
||||
.word 3
|
||||
|
||||
.word 0xFFFF
|
||||
.word 100
|
||||
|
||||
.word -7
|
||||
.word 3
|
||||
inputs_end:
|
||||
# llvm doesn't like this: error: expected relocatable expression
|
||||
#.set inputs_end, .-inputs
|
||||
# turns out it was right. That was calculating a length, which was
|
||||
# incorrect for how it was used for looping.
|
||||
|
||||
.section .bss
|
||||
|
||||
buf: .skip 9
|
||||
|
||||
.text
|
||||
|
||||
_start:
|
||||
li a0, '\n'
|
||||
la a1, buf
|
||||
sb a0, 8(a1) # append newline to buf
|
||||
|
||||
la s0, inputs # init loop variables
|
||||
la s1, inputs_end
|
||||
loop:
|
||||
lw a0, 0(s0) # multiplicand
|
||||
lw a1, 4(s0) # multiplier
|
||||
jal mul
|
||||
# TODO: Format as an actual decimal
|
||||
la a1, buf
|
||||
jal tohex
|
||||
|
||||
li t0, SYSWRITE # "write" syscall
|
||||
li a0, 1 # 1 = standard output (stdout)
|
||||
la a1, buf # load address of output string
|
||||
li a2, 9 # length of output string
|
||||
ecall # invoke syscall to print the string
|
||||
|
||||
addi s0, s0, 8 # increment input pointer to next pair of 64-bit inputs
|
||||
bltu s0, s1, loop # if the input address is less than inputs_end, loop
|
||||
|
||||
li t0, SYSEXIT # "exit" syscall
|
||||
la a0, 0 # Use 0 return code
|
||||
ecall # invoke syscall to terminate the program
|
|
@ -9,6 +9,8 @@ test_add64() {
|
|||
expected=$(cat << END
|
||||
00000003.80000000
|
||||
00000003.C0000000
|
||||
00000001.C0000000
|
||||
FFFFFFFF.C0000000
|
||||
END
|
||||
)
|
||||
|
||||
|
@ -31,3 +33,57 @@ END
|
|||
# different inputs.
|
||||
test $? -eq 0 && test "$result" = "$expected"
|
||||
}
|
||||
|
||||
test_div() {
|
||||
result=$("${QEMU}" -B 0x80000000 -s 2k tests/math_div.elf)
|
||||
expected=$(cat << END
|
||||
00000000
|
||||
00000007
|
||||
0000028F
|
||||
55555553
|
||||
END
|
||||
)
|
||||
|
||||
# TODO: Ideally this test would allow calling the binary repeatedly with
|
||||
# different inputs.
|
||||
test $? -eq 0 && test "$result" = "$expected"
|
||||
}
|
||||
|
||||
test_divmod() {
|
||||
result=$("${QEMU}" -B 0x80000000 -s 2k tests/math_divmod.elf)
|
||||
expected=$(cat << END
|
||||
00000000,00000003
|
||||
00000007,00000000
|
||||
0000028F,00000023
|
||||
END
|
||||
)
|
||||
|
||||
# TODO: Ideally this test would allow calling the binary repeatedly with
|
||||
# different inputs.
|
||||
test $? -eq 0 && test "$result" = "$expected"
|
||||
}
|
||||
|
||||
test_clz() {
|
||||
result=$("${QEMU}" -B 0x80000000 -s 2k tests/math_clz.elf)
|
||||
expected=$(cat << END
|
||||
00000020
|
||||
0000001F
|
||||
0000001E
|
||||
0000001E
|
||||
0000001C
|
||||
00000018
|
||||
00000014
|
||||
00000010
|
||||
0000000C
|
||||
00000008
|
||||
00000004
|
||||
00000000
|
||||
00000000
|
||||
00000010
|
||||
END
|
||||
)
|
||||
|
||||
# TODO: Ideally this test would allow calling the binary repeatedly with
|
||||
# different inputs.
|
||||
test $? -eq 0 && test "$result" = "$expected"
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue