2025-02-11 12:08:32 +00:00
|
|
|
|
.globl add64
|
2025-02-14 11:09:46 +00:00
|
|
|
|
.globl mul
|
2025-02-15 09:45:25 +00:00
|
|
|
|
.globl div
|
|
|
|
|
.globl divmod
|
|
|
|
|
.globl clz
|
2025-02-11 12:08:32 +00:00
|
|
|
|
|
|
|
|
|
.text
|
|
|
|
|
|
|
|
|
|
# > When primitive arguments twice the size of a pointer-word are passed on the
|
|
|
|
|
# > stack, they are naturally aligned. When they are passed in the integer
|
|
|
|
|
# > registers, they reside in an aligned even-odd register pair, with the even
|
|
|
|
|
# > register holding the least-significant bits.
|
|
|
|
|
|
|
|
|
|
# 64-bit integer addition
|
|
|
|
|
# arguments:
|
|
|
|
|
# a0: x lower 32 bits
|
|
|
|
|
# a1: x upper 32 bits
|
|
|
|
|
# a2: y lower 32 bits
|
|
|
|
|
# a3: y upper 32 bits
|
|
|
|
|
# return:
|
|
|
|
|
# a0: x+y lower 32 bits
|
|
|
|
|
# a1: x+y upper 32 bits
|
|
|
|
|
#
|
|
|
|
|
add64:
|
|
|
|
|
add a0, a0, a2 # add lower 32 bits
|
|
|
|
|
add t0, a1, a3 # add upper 32 bits
|
|
|
|
|
sltu t1, a0, a2 # if lower 32-bit sum < a2 then set t1=1 (carry bit)
|
|
|
|
|
add a1, t0, t1 # upper 32 bits of answer (upper sum + carry bit)
|
|
|
|
|
ret
|
|
|
|
|
|
2025-02-14 11:09:46 +00:00
|
|
|
|
# 32-bit shift-add multiplication
|
|
|
|
|
# arguments:
|
|
|
|
|
# a0: multiplicand
|
|
|
|
|
# a1: multiplier
|
|
|
|
|
# return:
|
|
|
|
|
# a0 = a0 × a1
|
|
|
|
|
#
|
|
|
|
|
mul:
|
|
|
|
|
mv t0, a1 # Save multiplier in t0
|
|
|
|
|
li a1, 0 # Initialize product in a1
|
|
|
|
|
|
2025-02-15 09:45:25 +00:00
|
|
|
|
.multiply_loop:
|
|
|
|
|
beqz t0, .done # If multiplier is 0, we're done
|
2025-02-14 11:09:46 +00:00
|
|
|
|
andi t1, t0, 1 # Check least significant bit
|
2025-02-15 09:45:25 +00:00
|
|
|
|
beqz t1, .shift # If LSB is 0, skip addition
|
2025-02-14 11:09:46 +00:00
|
|
|
|
add a1, a1, a0 # Add multiplicand to product
|
|
|
|
|
|
2025-02-15 09:45:25 +00:00
|
|
|
|
.shift:
|
2025-02-14 11:09:46 +00:00
|
|
|
|
slli a0, a0, 1 # Shift multiplicand left
|
|
|
|
|
srli t0, t0, 1 # Shift multiplier right
|
2025-02-15 09:45:25 +00:00
|
|
|
|
j .multiply_loop # Continue loop
|
2025-02-14 11:09:46 +00:00
|
|
|
|
|
2025-02-15 09:45:25 +00:00
|
|
|
|
.done:
|
2025-02-14 11:09:46 +00:00
|
|
|
|
mv a0, a1 # Move product to return register
|
|
|
|
|
ret
|
2025-02-15 09:45:25 +00:00
|
|
|
|
|
|
|
|
|
# 32-bit shift-subtract integer division
|
|
|
|
|
# arguments:
|
|
|
|
|
# a0: dividend, u
|
|
|
|
|
# a1: divisor, v
|
|
|
|
|
# return:
|
|
|
|
|
# a0 = a0 ÷ a1
|
|
|
|
|
#
|
|
|
|
|
# https://blog.segger.com/algorithms-for-division-part-2-classics/
|
|
|
|
|
div:
|
|
|
|
|
bltu a0, a1, .zero # if (u < v) return 0
|
|
|
|
|
addi sp, sp, -16
|
|
|
|
|
sw s1, 4(sp)
|
|
|
|
|
mv s1, a0
|
|
|
|
|
mv a0, a1
|
|
|
|
|
sw ra, 12(sp)
|
|
|
|
|
sw s0, 8(sp)
|
|
|
|
|
mv s0, a1
|
|
|
|
|
jal clz # clz(u)
|
|
|
|
|
sw a0, 0(sp)
|
|
|
|
|
mv a0, s1
|
|
|
|
|
jal clz # clz(v)
|
|
|
|
|
lw a5, 0(sp)
|
|
|
|
|
sub a5, a5, a0 # k = clz(v) - clz(u); Calculate number of quotient digits - 1
|
|
|
|
|
sll a1, s0, a5 # v <<= k; Normalize divisor
|
|
|
|
|
li a0, 0 # q = 0; Init quotient
|
|
|
|
|
|
|
|
|
|
# Iterate k+1 times, each iteration developing one quotient bit.
|
|
|
|
|
.loop:
|
|
|
|
|
slli a0, a0, 1 # q <<= 1; Record preliminary '0' quotient digit
|
|
|
|
|
bltu s1, a1, .skip # if (u >= v) Subtraction will succeed...
|
|
|
|
|
sub s1, s1, a1 # u -= v;
|
|
|
|
|
addi a0, a0, 1 # q += 1; Turn preliminary '0' quotient digit to '1'
|
|
|
|
|
.skip:
|
|
|
|
|
addi a5, a5, -1 # k -= 1;
|
|
|
|
|
srli a1, a1, 1 # v >>= 1;
|
|
|
|
|
bgez a5, .loop # while (k >= 0);
|
|
|
|
|
|
|
|
|
|
lw ra, 12(sp)
|
|
|
|
|
lw s0, 8(sp)
|
|
|
|
|
lw s1, 4(sp)
|
|
|
|
|
addi sp, sp, 16
|
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
.zero:
|
|
|
|
|
li a0, 0
|
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
# 32-bit integer division with modulus (remainder)
|
|
|
|
|
# arguments:
|
|
|
|
|
# a0: dividend, u
|
|
|
|
|
# a1: divisor, v
|
|
|
|
|
# return:
|
|
|
|
|
# a0 = a0 ÷ a1
|
|
|
|
|
# a1 = remainder
|
|
|
|
|
divmod:
|
|
|
|
|
# call div; multiply quotient by divisor; subtract that from dividend
|
|
|
|
|
addi sp, sp, -16
|
|
|
|
|
sw ra, 12(sp)
|
|
|
|
|
sw s0, 8(sp)
|
|
|
|
|
sw s1, 4(sp)
|
|
|
|
|
mv s0, a0 # save dividend
|
|
|
|
|
mv s1, a1 # save divisor
|
|
|
|
|
jal div # a0 = a0 ÷ a1
|
|
|
|
|
sw a0, 0(sp)
|
|
|
|
|
mv a1, a0 # a1 = quotient
|
|
|
|
|
mv a0, s1 # a0 = divisor
|
|
|
|
|
jal mul # a0 = divisor × quotient
|
|
|
|
|
lw ra, 12(sp)
|
|
|
|
|
sub a1, s0, a0 # a1 = dividend - product; remainder
|
|
|
|
|
lw s0, 8(sp)
|
|
|
|
|
lw s1, 4(sp)
|
|
|
|
|
lw a0, 0(sp) # a0 = quotient
|
|
|
|
|
addi sp, sp, 16
|
|
|
|
|
ret
|
|
|
|
|
|
|
|
|
|
# count leading zero bits
|
|
|
|
|
# arguments:
|
|
|
|
|
# a0: input
|
|
|
|
|
# return:
|
|
|
|
|
# a0 = count of leading zero bits
|
|
|
|
|
#
|
|
|
|
|
# binary search approach translated from C code on
|
|
|
|
|
# https://blog.stephencleary.com/2010/10/implementing-gccs-builtin-functions.html
|
|
|
|
|
clz:
|
|
|
|
|
li a4, 16 # initialise count of zeros to 16
|
|
|
|
|
srli a5, a0, 16 # shift value right 16 bits
|
|
|
|
|
bne a5, zero, .eight # if the result is != 0 we have up to 16 leading zeros
|
|
|
|
|
mv a5, a0 # restore unshifted value to a5
|
|
|
|
|
li a4, 32 # we have up to 32 leading zeros
|
|
|
|
|
.eight:
|
|
|
|
|
srli a3, a5, 8 # shift the value right 8 bits
|
|
|
|
|
beq a3, zero, .four
|
|
|
|
|
addi a4, a4, -8 # subtract 8 leading zeros if shift result was non-zero
|
|
|
|
|
mv a5, a3
|
|
|
|
|
.four:
|
|
|
|
|
srli a3, a5, 4 # shift the value right 4 bits
|
|
|
|
|
beq a3, zero, .two
|
|
|
|
|
addi a4, a4, -4 # subtract 4 leading zeros if shift result was non-zero
|
|
|
|
|
mv a5, a3
|
|
|
|
|
.two:
|
|
|
|
|
srli a3, a5, 2 # shift the value right 2 bits
|
|
|
|
|
beq a3, zero, .one
|
|
|
|
|
addi a4, a4, -2 # subtract 2 leading zeros if shift result was non-zero
|
|
|
|
|
mv a5, a3
|
|
|
|
|
.one:
|
|
|
|
|
srli a3, a5, 1 # shift the value right 1 bit
|
|
|
|
|
sub a0, a4, a5 # a0 = count - remaining value
|
|
|
|
|
beq a3, zero, .end # if shift result was zero, return a0
|
|
|
|
|
addi a0, a4, -2 # subtract 2 leading zeros if shift result was non-zero
|
|
|
|
|
.end:
|
|
|
|
|
ret
|