diff --git a/stage1/eval.s b/stage1/eval.s index 9042735..b380d6c 100644 --- a/stage1/eval.s +++ b/stage1/eval.s @@ -152,17 +152,83 @@ call_procedure: .Lcall_procedure_not_callable: addi sp, sp, -0x18 sd ra, 0x00(sp) - sd a1, 0x08(sp) + sd a0, 0x08(sp) sd a2, 0x10(sp) # release arguments - call release_object - ld a0, 0x08(sp) + mv a0, a1 call release_object ld a0, 0x10(sp) call release_object # clean up stack and return error ld ra, 0x00(sp) - addi sp, sp, 0x18 + ld a1, 0x08(sp) li a0, EVAL_ERROR_NOT_CALLABLE - mv a1, zero + addi sp, sp, 0x18 ret + +# Takes the first element of a list and evaluates it +# +# a0 = pointer to structure. pass (list, _), will be written with (head, tail) +# a1 = local words +# +# Return: +# +# a0 = eval error +# a1 = eval error data if error +# +# The pointer will contain (nil, nil) on failure, no further release is necessary +.global eval_head +eval_head: + addi sp, sp, -0x28 + sd ra, 0x00(sp) + sd s1, 0x08(sp) # s1 = a0, pointer to structure + sd a1, 0x10(sp) # locals + mv s1, a0 + # uncons the list + ld t0, 0x00(a0) + sd zero, 0x00(a0) # taken + sd zero, 0x08(a0) # clear in case of error + mv a0, t0 + call uncons + beqz a0, .Leval_head_exc # not a cons + # store the tail + sd a2, 0x08(s1) + # eval head x locals + mv a0, a1 + ld a1, 0x10(sp) + sd zero, 0x10(sp) # used + call eval + bnez a0, .Leval_head_err # err + # store evaluated head + sd a1, 0x00(s1) + # set result to ok + sd zero, 0x18(sp) + sd zero, 0x20(sp) +.Leval_head_ret: + # release locals if not used + ld a0, 0x10(sp) + call release_object + # restore and return + ld ra, 0x00(sp) + ld s1, 0x08(sp) + ld a0, 0x18(sp) + ld a1, 0x20(sp) + addi sp, sp, 0x28 + ret +.Leval_head_exc: + # set exception + li t0, EVAL_ERROR_EXCEPTION + sd t0, 0x18(sp) + sd zero, 0x20(sp) + j .Leval_head_err_ret +.Leval_head_err: + # store result from eval + sd a0, 0x18(sp) + sd a1, 0x20(sp) +.Leval_head_err_ret: + # release tail if it was set + ld a0, 0x08(s1) + call release_object + sd zero, 0x08(s1) # clear it because we released it + # return + j .Leval_head_ret diff --git a/stage1/proc_builtin.s b/stage1/proc_builtin.s index 1cbe08d..4557f4c 100644 --- a/stage1/proc_builtin.s +++ b/stage1/proc_builtin.s @@ -450,82 +450,61 @@ proc_cdr: .global proc_cons proc_cons: # reserve stack, preserve return addr - addi sp, sp, -0x20 + addi sp, sp, -0x28 sd ra, 0x00(sp) - sd s1, 0x08(sp) - sd a1, 0x10(sp) # locals - sd zero, 0x18(sp) # cons head - mv s1, zero # args list - # get first arg - call uncons - beqz a0, .Lproc_cons_exc - # store rest of args, stash head for now - mv s1, a2 - sd a1, 0x18(sp) - # acquire locals + sd a1, 0x08(sp) # locals + sd a0, 0x10(sp) # cons head + sd zero, 0x18(sp) # cons tail + sd zero, 0x20(sp) # unused args + # evaluate first two args + # arg 0 (head) + addi a0, sp, 0x10 + call acquire_locals + call eval_head + bnez a0, .Lproc_cons_ret + # arg 1 (tail) + addi a0, sp, 0x18 + ld a1, 0x08(sp) # locals + sd zero, 0x08(sp) # used + call eval_head + bnez a0, .Lproc_cons_ret + # cons head, tail ld a0, 0x10(sp) - call acquire_object - # unstash and eval - mv a1, a0 - ld a0, 0x18(sp) - sd zero, 0x18(sp) - call eval - # handle err - bnez a0, .Lproc_cons_end - # store result - sd a1, 0x18(sp) - # get second arg - mv a0, s1 - mv s1, zero - call uncons - beqz a0, .Lproc_cons_exc - # store rest of args (will release on ret) - mv s1, a2 - # eval second arg, give up locals - mv a0, a1 - ld a1, 0x10(sp) - sd zero, 0x10(sp) - call eval - # handle err - bnez a0, .Lproc_cons_end - # do cons (tail already in a1) - ld a0, 0x18(sp) - sd zero, 0x18(sp) + ld a1, 0x18(sp) + sd zero, 0x10(sp) # used + sd zero, 0x18(sp) # used call cons beqz a0, .Lproc_cons_no_mem - # move cons to a1 (result), set a0 to ok - mv a1, a0 - li a0, 0 - # done - j .Lproc_cons_end -.Lproc_cons_no_mem: - li a0, EVAL_ERROR_NO_FREE_MEM - li a1, 0 - j .Lproc_cons_end -.Lproc_cons_exc: - li a0, EVAL_ERROR_EXCEPTION - li a1, 0 -.Lproc_cons_end: + mv a1, a0 # result + mv a0, zero # ok +.Lproc_cons_ret: # stash return value addi sp, sp, -0x10 sd a0, 0x00(sp) sd a1, 0x08(sp) - # release a1 arg list - mv a0, s1 + # release from 0x18 .. 0x38 (sp) + ld a0, 0x18(sp) call release_object - # release locals ld a0, 0x20(sp) call release_object - # release cons head ld a0, 0x28(sp) call release_object + ld a0, 0x30(sp) + call release_object # load stashed data and return ld a0, 0x00(sp) ld a1, 0x08(sp) ld ra, 0x10(sp) - ld s1, 0x18(sp) - addi sp, sp, 0x30 + addi sp, sp, 0x38 ret +.Lproc_cons_no_mem: + li a0, EVAL_ERROR_NO_FREE_MEM + li a1, 0 + j .Lproc_cons_ret +.Lproc_cons_exc: + li a0, EVAL_ERROR_EXCEPTION + li a1, 0 + j .Lproc_cons_ret # Create procedure # e.g. (proc args locals (car args)) ; equivalent to quote @@ -649,34 +628,50 @@ proc_stub: # (eval ) .global proc_eval proc_eval: - addi sp, sp, -0x20 + addi sp, sp, -0x28 sd ra, 0x00(sp) - sd a1, 0x10(sp) - # evaluate first arg = locals, to 0x18(sp) - call uncons - beqz a0, .Lproc_eval_exc # end of arg list - sd a2, 0x08(sp) # save rest of arg list - mv a0, a1 # eval head - ld a1, 0x10(sp) - call acquire_locals # we need to use a1 locals one more time - call eval + sd a1, 0x08(sp) # locals (in) + sd a0, 0x10(sp) # arg 0 = provided locals + sd zero, 0x18(sp) # arg 1 = expression + sd zero, 0x20(sp) # unused args + # arg 0 + addi a0, sp, 0x10 + call acquire_locals + call eval_head bnez a0, .Lproc_eval_error - sd a1, 0x18(sp) - # evaluate second arg = expression, to 0x20(sp) - ld a0, 0x08(sp) - call car # drop rest - ld a1, 0x10(sp) - call eval + # arg 1 + addi a0, sp, 0x18 + ld a1, 0x08(sp) # locals + sd zero, 0x08(sp) # used + call eval_head bnez a0, .Lproc_eval_error - # tail-evaluate the result again in provided scope - mv a0, a1 - ld a1, 0x18(sp) + # release rest of args + ld a0, 0x20(sp) + call release_object + # tail-call eval (args are actually in reverse of what they are for eval) ld ra, 0x00(sp) - addi sp, sp, 0x20 + ld a1, 0x10(sp) + ld a0, 0x18(sp) + addi sp, sp, 0x28 j eval -.Lproc_eval_exc: - li a0, EVAL_ERROR_EXCEPTION .Lproc_eval_error: - ld ra, 0x00(sp) - addi sp, sp, 0x20 + # on error, release everything remaining in the stack + # stash a0, a1 first + addi sp, sp, -0x10 + sd a0, 0x00(sp) + sd a1, 0x08(sp) + # release 0x18 .. 0x38 (sp) + ld a0, 0x18(sp) + call release_object + ld a0, 0x20(sp) + call release_object + ld a0, 0x28(sp) + call release_object + ld a0, 0x30(sp) + call release_object + # restore and return + ld a0, 0x00(sp) + ld a1, 0x08(sp) + ld ra, 0x10(sp) + addi sp, sp, 0x38 ret diff --git a/stage1/words.s b/stage1/words.s index db507dd..cb3f52d 100644 --- a/stage1/words.s +++ b/stage1/words.s @@ -170,6 +170,12 @@ INITIAL_WORDS: .ascii "eval$" .balign 8 + .quad eval_head + .2byte 10 + .byte LISP_OBJECT_TYPE_INTEGER + .ascii "eval-head$" + .balign 8 + .quad allocate .2byte 9 .byte LISP_OBJECT_TYPE_INTEGER diff --git a/stage2/05-math.lsp b/stage2/05-math.lsp index 2c06396..27cd556 100644 --- a/stage2/05-math.lsp +++ b/stage2/05-math.lsp @@ -1,20 +1,10 @@ ; Create procedure from native math routine -; (proc.native-math
) -; These can take any number of arguments and fold them. e.g. (+ a b c) = a + b + c +; (fn.native-math
) +; In the interest of speed, these can only take two arguments for now (define fn.native-math (fn (address) - (let-recursive self - (proc args scope - (if (nil? (cdr args)) - (eval scope (car args)) ; no more args - (let1 value - (car ;a0 - (call-native address 1 - (eval scope (car args)) - (eval scope (cadr args)))) - ; tail recursive call with remainder of args - (eval scope (cons self (cons value (cdr (cdr args)))))))) - self))) + (proc args scope + (car (unquote (cons call-native (cons address (cons 1 (eval-list scope args))))))))) ; define nicer versions of the core math ops we put into memory earlier (define + (fn.native-math +$)) diff --git a/stage2/10-riscv.lsp b/stage2/10-riscv.lsp index d83b6a0..64838a2 100644 --- a/stage2/10-riscv.lsp +++ b/stage2/10-riscv.lsp @@ -1,62 +1,91 @@ +; RISC-V assembler (first pass) +(define rv.opcode-len 7) +(define rv.reg-len 5) +(define rv.funct3-len 3) +(define rv.funct7-len 7) +(define rv.opcode-mask (bit-mask rv.opcode-len)) +(define rv.reg-mask (bit-mask rv.reg-len)) +(define rv.funct3-mask (bit-mask rv.funct3-len)) +(define rv.funct7-mask (bit-mask rv.funct7-len)) + ; RISC-V instruction formats -(define rv.opcode-mask (bit-mask 7)) -(define rv.reg-mask (bit-mask 5)) -(define rv.funct3-mask (bit-mask 3)) -(define rv.funct7-mask (bit-mask 7)) +; These can be optimized later with native implementations using the assembler we're building +(define rv.format.r (quote + (left-fold | (& opcode rv.opcode-mask) + (list + (<< (& rd rv.reg-mask) 7) + (<< (& funct3 rv.funct3-mask) 12) + (<< (& rs1 rv.reg-mask) 15) + (<< (& rs2 rv.reg-mask) 20) + (<< (& funct7 rv.funct7-mask) 25))))) (define rv.instr.r (fn (opcode funct3 funct7) (fn (rd rs1 rs2) - (| (& opcode rv.opcode-mask) - (<< (& rd rv.reg-mask) 7) - (<< (& funct3 rv.funct3-mask) 12) - (<< (& rs1 rv.reg-mask) 15) - (<< (& rs2 rv.reg-mask) 20) - (<< (& funct7 rv.funct7-mask) 25))))) + (unquote rv.format.r)))) + +(define rv.format.i (quote + (left-fold | (& opcode rv.opcode-mask) + (list + (<< (& rd rv.reg-mask) 7) + (<< (& funct3 rv.funct3-mask) 12) + (<< (& rs1 rv.reg-mask) 15) + (<< (& imm (bit-mask 12)) 20))))) (define rv.instr.i (fn (opcode funct3) (fn (rd rs1 imm) - (| (& opcode rv.opcode-mask) - (<< (& rd rv.reg-mask) 7) - (<< (& funct3 rv.funct3-mask) 12) - (<< (& rs1 rv.reg-mask) 15) - (<< (& imm (bit-mask 12)) 20))))) + (unquote rv.format.i)))) + +(define rv.format.s (quote + (left-fold | (& opcode rv.opcode-mask) + (list + (<< (& imm (bit-mask 5)) 7) + (<< (& funct3 rv.funct3-mask) 12) + (<< (& rs1 rv.reg-mask) 15) + (<< (& rs2 rv.reg-mask) 20) + (<< (& (>> imm 5) (bit-mask 7)) 25))))) (define rv.instr.s (fn (opcode funct3) (fn (rs2 rs1 imm) - (| (& opcode rv.opcode-mask) - (<< (& imm (bit-mask 5)) 7) - (<< (& funct3 rv.funct3-mask) 12) - (<< (& rs1 rv.reg-mask) 15) - (<< (& rs2 rv.reg-mask) 20) - (<< (& (>> imm 5) (bit-mask 7)) 25))))) + (unquote rv.format.s)))) + +(define rv.format.b (quote + (left-fold | (& opcode rv.opcode-mask) + (list + (<< (& (>> imm 11) 1) 7) + (<< (& (>> imm 1) (bit-mask 4)) 8) + (<< (& funct3 rv.funct3-mask) 12) + (<< (& rs1 rv.reg-mask) 15) + (<< (& rs2 rv.reg-mask) 20) + (<< (& (>> imm 5) (bit-mask 6)) 25) + (<< (& (>> imm 12) 1) 31))))) (define rv.instr.b (fn (opcode funct3) (fn (rs1 rs2 imm) - (| (& opcode rv.opcode-mask) - (<< (& (>> imm 11) 1) 7) - (<< (& (>> imm 1) (bit-mask 4)) 8) - (<< (& funct3 rv.funct3-mask) 12) - (<< (& rs1 rv.reg-mask) 15) - (<< (& rs2 rv.reg-mask) 20) - (<< (& (>> imm 5) (bit-mask 6)) 25) - (<< (& (>> imm 12) 1) 31))))) + (unquote rv.format.b)))) + +(define rv.format.u (quote + (| (& opcode rv.opcode-mask) + (| (<< (& rd rv.reg-mask) 7) + (+ ; add one if 11th bit is set + (& imm (<< (bit-mask 20) 12)) + (<< (& imm 0x800) 1)))))) (define rv.instr.u (fn (opcode) (fn (rd imm) - (| (& opcode rv.opcode-mask) - (<< (& rd rv.reg-mask) 7) - (+ ; add one if 11th bit is set - (& imm (<< (bit-mask 20) 12)) - (<< (& imm 0x800) 1)))))) + (unquote rv.format.u)))) + +(define rv.format.j (quote + (left-fold | (& opcode rv.opcode-mask) + (list + (<< (& rd rv.reg-mask) 7) + (& imm (<< (bit-mask 8) 12)) ; inst[19:12] = imm[19:12] + (<< (& (>> imm 11) 1) 20) ; inst[20] = imm[11] + (<< (& (>> imm 1) (bit-mask 10)) 21) ; inst[30:21] = imm[10:1] + (<< (& (>> imm 20) 1) 31))))) ; inst[31] = imm[20] (define rv.instr.j (fn (opcode) (fn (rd imm) - (| (& opcode rv.opcode-mask) - (<< (& rd rv.reg-mask) 7) - (& imm (<< (bit-mask 8) 12)) ; inst[19:12] = imm[19:12] - (<< (& (>> imm 11) 1) 20) ; inst[20] = imm[11] - (<< (& (>> imm 1) (bit-mask 10)) 21) ; inst[30:21] = imm[10:1] - (<< (& (>> imm 20) 1) 31))))) ; inst[31] = imm[20] + (unquote rv.format.j)))) ; RISC-V registers (define $zero 0) diff --git a/stage2/11-riscv-optimized.lsp b/stage2/11-riscv-optimized.lsp new file mode 100644 index 0000000..962adcc --- /dev/null +++ b/stage2/11-riscv-optimized.lsp @@ -0,0 +1,96 @@ +; Optimized implementations of the RISC-V instruction formatters +(define rv.format.r$ + (poke.w (allocate (<< 17 2) 4) + (\andi $a0 $a0 rv.opcode-mask) + (\andi $a1 $a1 rv.funct3-mask) + (\andi $a2 $a2 rv.funct7-mask) + (\andi $a3 $a3 rv.reg-mask) ; rd + (\andi $a4 $a4 rv.reg-mask) ; rs1 + (\andi $a5 $a5 rv.reg-mask) ; rs2 + (\slli $a3 $a3 7) ; rd + (\slli $a1 $a1 12) ; funct3 + (\slli $a4 $a4 15) ; rs1 + (\slli $a5 $a5 20) ; rs2 + (\slli $a2 $a2 25) ; funct7 + (\or $a0 $a0 $a1) + (\or $a0 $a0 $a2) + (\or $a0 $a0 $a3) + (\or $a0 $a0 $a4) + (\or $a0 $a0 $a5) + (\ret) + )) +(define rv.format.r (quote + (car (call-native rv.format.r$ 1 opcode funct3 funct7 rd rs1 rs2)))) + +(define rv.format.i$ + (poke.w (allocate (<< 17 2) 4) + (\andi $a0 $a0 rv.opcode-mask) + (\andi $a1 $a1 rv.funct3-mask) + (\andi $a2 $a2 rv.reg-mask) ; rd + (\andi $a3 $a3 rv.reg-mask) ; rs1 + ; 12-bit mask + (\li $t0 1) + (\slli $t0 $t0 12) + (\addi $t0 $t0 -1) + (\and $a4 $a4 $t0) ; imm + (\slli $a2 $a2 7) ; rd + (\slli $a1 $a1 12) ; funct3 + (\slli $a3 $a3 15) ; rs1 + (\slli $a4 $a4 20) ; imm + (\or $a0 $a0 $a1) + (\or $a0 $a0 $a2) + (\or $a0 $a0 $a3) + (\or $a0 $a0 $a4) + (\ret) + )) +(define rv.format.i (quote + (car (call-native rv.format.i$ 1 opcode funct3 rd rs1 imm)))) + +(define rv.format.s$ + (poke.w (allocate (<< 18 2) 4) + (\andi $a0 $a0 rv.opcode-mask) + (\andi $a1 $a1 rv.funct3-mask) + (\andi $a2 $a2 rv.reg-mask) ; rs2 + (\andi $a3 $a3 rv.reg-mask) ; rs1 + ; split up imm to two fields + (\andi $t0 $a4 (bit-mask 5)) + (\slli $t0 $t0 7) + (\srli $t1 $a4 5) + (\andi $t1 $t1 (bit-mask 7)) + (\slli $t1 $t1 25) + (\slli $a1 $a1 12) ; funct3 + (\slli $a3 $a3 15) ; rs1 + (\slli $a2 $a2 20) ; rs2 + (\or $a0 $a0 $a1) + (\or $a0 $a0 $a2) + (\or $a0 $a0 $a3) + (\or $a0 $a0 $t0) + (\or $a0 $a0 $t1) + (\ret) + )) +(define rv.format.s (quote + (car (call-native rv.format.s$ 1 opcode funct3 rs2 rs1 imm)))) + +(define rv.format.u$ + (poke.w (allocate (<< 16 2) 4) + (\andi $a0 $a0 rv.opcode-mask) + (\andi $a1 $a1 rv.reg-mask) ; rd + ; 20-bit upper mask + (\li $t0 1) + (\slli $t0 $t0 20) + (\addi $t0 $t0 -1) + (\slli $t0 $t0 12) + ; add one to 12th bit if 11th bit is set + (\li $t1 1) + (\slli $t1 $t1 11) + (\and $t1 $a2 $t1) + (\slli $t1 $t1 1) + (\and $a2 $a2 $t0) ; imm + (\add $a2 $a2 $t1) + (\slli $a1 $a1 7) ; rd + (\or $a0 $a0 $a1) + (\or $a0 $a0 $a2) + (\ret) + )) +(define rv.format.u (quote + (car (call-native rv.format.u$ 1 opcode rd imm)))) diff --git a/stage2/16-optimize-prims.lsp b/stage2/16-optimize-prims.lsp index 57b9505..c60516f 100644 --- a/stage2/16-optimize-prims.lsp +++ b/stage2/16-optimize-prims.lsp @@ -1,2 +1,122 @@ ; create some more optimized primitives with the linker +(define eval-list (box-procedure (car (link + (start + ; stash locals, set up variables + (\addi $sp $sp -0x30) + (\sd $ra $sp 0x00) + (\sd $s1 $sp 0x08) + (\sd $s2 $sp 0x10) + (\sd $a1 $sp 0x18) ; locals + (\sd $a0 $sp 0x20) ; args / head + (\sd $zero $sp 0x28) ; tail + (\li $s1 0) ; return value (dest) + (\li $s2 0) ; current node of dest to append to + ) + (setup + ; evaluate args first + ; arg 0 - locals (to be used) + (\ld $a0 $sp 0x18) + (\auipc $ra (rel acquire-object$)) + (\callr $ra (rel+ acquire-object$)) + (\mv $a1 $a0) + (\addi $a0 $sp 0x20) + (\auipc $ra (rel eval-head$)) + (\callr $ra (rel+ eval-head$)) + (\bnez $a0 (rel ret)) + ; swap provided locals into position + (\ld $t0 $sp 0x20) ; new locals + (\ld $a1 $sp 0x18) ; old locals (use one more time) + (\sd $t0 $sp 0x18) ; save new as locals + ; shuffle arg list back + (\ld $t0 $sp 0x28) + (\sd $t0 $sp 0x20) + (\sd $zero $sp 0x28) + ; arg 1 - list to evaluate + (\addi $a0 $sp 0x20) + (\auipc $ra (rel eval-head$)) + (\callr $ra (rel+ eval-head$)) + (\bnez $a0 (rel ret)) + ; release rest of args + (\ld $a0 $sp 0x28) + (\auipc $ra (rel release-object$)) + (\callr $ra (rel+ release-object$)) + ) + (loop + ; check if next is nil + (\ld $t0 $sp 0x20) + (\beqz $t0 (rel done)) + ; acquire locals + (\ld $a0 $sp 0x18) + (\auipc $ra (rel acquire-object$)) + (\callr $ra (rel+ acquire-object$)) + (\mv $a1 $a0) + ; set address of head/tail + (\addi $a0 $sp 0x20) + ; call eval-head$ + (\auipc $ra (rel eval-head$)) + (\callr $ra (rel+ eval-head$)) + ; handle error + (\bnez $a0 (rel ret)) + ; move tail back to args position + (\ld $a0 $sp 0x20) + (\ld $t0 $sp 0x28) + (\sd $t0 $sp 0x20) + (\sd $zero $sp 0x28) + ; make cons with nil + (\li $a1 0) + (\auipc $ra (rel cons$)) + (\callr $ra (rel+ cons$)) + ; handle error + (\beqz $a0 (rel nomem)) + ; handle first node specially + (\beqz $s1 (rel first)) + ; set cons into current node + (\sd $a0 $s2 0x10) ; tail + (\mv $s2 $a0) ; advance + (\j (rel loop)) + ) + (first + ; first node = set to s1 and s2 + (\mv $s1 $a0) + (\mv $s2 $a0) + (\j (rel loop)) + ) + (done + ; ok + (\li $a0 0) + ; take result from s1 + (\mv $a1 $s1) + (\mv $s1 $zero) + ) + (ret + (\addi $sp $sp -0x10) + (\sd $a0 $sp 0x00) + (\sd $a1 $sp 0x08) + ; free locals + (\ld $a0 $sp 0x28) + (\auipc $ra (rel acquire-object$)) + (\callr $ra (rel+ acquire-object$)) + ; free args/head (never need to free tail) + (\ld $a0 $sp 0x30) + (\auipc $ra (rel acquire-object$)) + (\callr $ra (rel+ acquire-object$)) + ; free s1 + (\mv $a0 $s1) + (\auipc $ra (rel acquire-object$)) + (\callr $ra (rel+ acquire-object$)) + ; restore and return + (\ld $a0 $sp 0x00) + (\ld $a1 $sp 0x08) + (\ld $ra $sp 0x10) + (\ld $s1 $sp 0x18) + (\ld $s2 $sp 0x20) + (\addi $sp $sp 0x40) + (\ret) + ) + (nomem + (\li $a0 -4) ; EVAL_EXCEPTION_NO_FREE_MEM + (\li $a1 0) + (\j (rel ret)) + ) +))))