/* * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. * * SPDX-License-Identifier: BSD-3-Clause */ #include "pico/asm_helper.S" #include "pico/runtime_init.h" #include "pico/bootrom/sf_table.h" #include "hardware/divider_helper.S" PICO_RUNTIME_INIT_FUNC_RUNTIME(__aeabi_float_init, PICO_RUNTIME_INIT_AEABI_FLOAT) pico_default_asm_setup .macro float_section name #if PICO_FLOAT_IN_RAM .section RAM_SECTION_NAME(\name), "ax" #else .section SECTION_NAME(\name), "ax" #endif .endm .macro float_wrapper_section func float_section WRAPPER_FUNC_NAME(\func) .endm .macro _float_wrapper_func x wrapper_func \x .endm .macro wrapper_func_f1 x _float_wrapper_func \x #if PICO_FLOAT_PROPAGATE_NANS mov ip, lr bl __check_nan_f1 mov lr, ip #endif .endm .macro wrapper_func_f2 x _float_wrapper_func \x #if PICO_FLOAT_PROPAGATE_NANS mov ip, lr bl __check_nan_f2 mov lr, ip #endif .endm .section .text #if PICO_FLOAT_PROPAGATE_NANS .thumb_func __check_nan_f1: movs r3, #1 lsls r3, #24 lsls r2, r0, #1 adds r2, r3 bhi 1f bx lr 1: bx ip .thumb_func __check_nan_f2: movs r3, #1 lsls r3, #24 lsls r2, r0, #1 adds r2, r3 bhi 1f lsls r2, r1, #1 adds r2, r3 bhi 2f bx lr 2: mov r0, r1 1: bx ip #endif .macro table_tail_call SF_TABLE_OFFSET #if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED #ifndef NDEBUG movs r3, #0 mov ip, r3 #endif #endif ldr r3, =sf_table ldr r3, [r3, #\SF_TABLE_OFFSET] bx r3 .endm .macro shimmable_table_tail_call SF_TABLE_OFFSET shim ldr r3, =sf_table ldr r3, [r3, #\SF_TABLE_OFFSET] #if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED mov ip, pc #endif bx r3 #if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED .byte \SF_TABLE_OFFSET, 0xdf .word \shim #endif .endm // note generally each function is in a separate section unless there is fall thru or branching between them // note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool // note functions are word aligned except where they are an odd number of linear instructions // float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition float_wrapper_section __aeabi_farithmetic // float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x // frsub first because it is the only one that needs alignment .align 2 wrapper_func __aeabi_frsub eors r0, r1 eors r1, r0 eors r0, r1 // fall thru // float FUNC_NAME(__aeabi_fsub)(float x, float y) single-precision subtraction, x - y wrapper_func_f2 __aeabi_fsub #if PICO_FLOAT_PROPAGATE_NANS // we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost mov r2, r0 eors r2, r1 bmi 1f // different signs push {r0, r1, lr} bl 1f b fdiv_fsub_nan_helper 1: #endif table_tail_call SF_TABLE_FSUB wrapper_func_f2 __aeabi_fadd table_tail_call SF_TABLE_FADD // float FUNC_NAME(__aeabi_fdiv)(float n, float d) single-precision division, n / d wrapper_func_f2 __aeabi_fdiv #if PICO_FLOAT_PROPAGATE_NANS push {r0, r1, lr} bl 1f b fdiv_fsub_nan_helper 1: #endif #if !PICO_DIVIDER_DISABLE_INTERRUPTS // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty ldr r2, =(SIO_BASE) ldr r3, [r2, #SIO_DIV_CSR_OFFSET] lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY bcs fdiv_save_state #else // to avoid worrying about IRQs (or context switches), simply disable interrupts around call push {r4, lr} mrs r4, PRIMASK cpsid i bl fdiv_shim_call msr PRIMASK, r4 pop {r4, pc} #endif fdiv_shim_call: table_tail_call SF_TABLE_FDIV #if !PICO_DIVIDER_DISABLE_INTERRUPTS fdiv_save_state: save_div_state_and_lr bl fdiv_shim_call ldr r2, =(SIO_BASE) restore_div_state_and_return #endif fdiv_fsub_nan_helper: #if PICO_FLOAT_PROPAGATE_NANS pop {r1, r2} // check for infinite op infinite (or rather check for infinite result with both // operands being infinite) lsls r3, r0, #1 asrs r3, r3, #24 adds r3, #1 beq 2f pop {pc} 2: lsls r1, #1 asrs r1, r1, #24 lsls r2, #1 asrs r2, r2, #24 ands r1, r2 adds r1, #1 bne 3f // infinite to nan movs r1, #1 lsls r1, #22 orrs r0, r1 3: pop {pc} #endif // float FUNC_NAME(__aeabi_fmul)(float, float) single-precision multiplication wrapper_func_f2 __aeabi_fmul #if PICO_FLOAT_PROPAGATE_NANS push {r0, r1, lr} bl 1f pop {r1, r2} // check for multiplication of infinite by zero (or rather check for infinite result with either // operand 0) lsls r3, r0, #1 asrs r3, r3, #24 adds r3, #1 beq 2f pop {pc} 2: ands r1, r2 bne 3f // infinite to nan movs r1, #1 lsls r1, #22 orrs r0, r1 3: pop {pc} 1: #endif table_tail_call SF_TABLE_FMUL // void FUNC_NAME(__aeabi_cfrcmple)(float, float) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags float_wrapper_section __aeabi_cfcmple .align 2 wrapper_func __aeabi_cfrcmple push {r0-r2, lr} eors r0, r1 eors r1, r0 eors r0, r1 b __aeabi_cfcmple_guts // NOTE these share an implementation as we have no excepting NaNs. // void FUNC_NAME(__aeabi_cfcmple)(float, float) 3-way (<, =, ?>) compare [1], result in PSR ZC flags // void FUNC_NAME(__aeabi_cfcmpeq)(float, float) non-excepting equality comparison [1], result in PSR ZC flags .align 2 wrapper_func __aeabi_cfcmple wrapper_func __aeabi_cfcmpeq push {r0-r2, lr} __aeabi_cfcmple_guts: lsls r2,r0,#1 lsrs r2,#24 beq 1f cmp r2,#0xff bne 2f lsls r2, r0, #9 bhi 3f 1: lsrs r0,#23 @ clear mantissa if denormal or infinite lsls r0,#23 2: lsls r2,r1,#1 lsrs r2,#24 beq 1f cmp r2,#0xff bne 2f lsls r2, r1, #9 bhi 3f 1: lsrs r1,#23 @ clear mantissa if denormal or infinite lsls r1,#23 2: movs r2,#1 @ initialise result eors r1,r0 bmi 2f @ opposite signs? then can proceed on basis of sign of x eors r1,r0 @ restore y bpl 1f cmp r1,r0 pop {r0-r2, pc} 1: cmp r0,r1 pop {r0-r2, pc} 2: orrs r1, r0 @ handle 0/-0 adds r1, r1 @ note this always sets C beq 3f mvns r0, r0 @ carry inverse of r0 sign adds r0, r0 3: pop {r0-r2, pc} // int FUNC_NAME(__aeabi_fcmpeq)(float, float) result (1, 0) denotes (=, ?<>) [2], use for C == and != float_wrapper_section __aeabi_fcmpeq .align 2 wrapper_func __aeabi_fcmpeq push {lr} bl __aeabi_cfcmpeq beq 1f movs r0, #0 pop {pc} 1: movs r0, #1 pop {pc} // int FUNC_NAME(__aeabi_fcmplt)(float, float) result (1, 0) denotes (<, ?>=) [2], use for C < float_wrapper_section __aeabi_fcmplt .align 2 wrapper_func __aeabi_fcmplt push {lr} bl __aeabi_cfcmple sbcs r0, r0 pop {pc} // int FUNC_NAME(__aeabi_fcmple)(float, float) result (1, 0) denotes (<=, ?>) [2], use for C <= float_wrapper_section __aeabi_fcmple .align 2 wrapper_func __aeabi_fcmple push {lr} bl __aeabi_cfcmple bls 1f movs r0, #0 pop {pc} 1: movs r0, #1 pop {pc} // int FUNC_NAME(__aeabi_fcmpge)(float, float) result (1, 0) denotes (>=, ?<) [2], use for C >= float_wrapper_section __aeabi_fcmpge .align 2 wrapper_func __aeabi_fcmpge push {lr} // because of NaNs it is better to reverse the args than the result bl __aeabi_cfrcmple bls 1f movs r0, #0 pop {pc} 1: movs r0, #1 pop {pc} // int FUNC_NAME(__aeabi_fcmpgt)(float, float) result (1, 0) denotes (>, ?<=) [2], use for C > float_wrapper_section __aeabi_fcmpgt wrapper_func __aeabi_fcmpgt push {lr} // because of NaNs it is better to reverse the args than the result bl __aeabi_cfrcmple sbcs r0, r0 pop {pc} // int FUNC_NAME(__aeabi_fcmpun)(float, float) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered() float_wrapper_section __aeabi_fcmpun wrapper_func __aeabi_fcmpun movs r3, #1 lsls r3, #24 lsls r2, r0, #1 adds r2, r3 bhi 1f lsls r2, r1, #1 adds r2, r3 bhi 1f movs r0, #0 bx lr 1: movs r0, #1 bx lr // float FUNC_NAME(__aeabi_ui2f)(unsigned) unsigned to float (single precision) conversion float_wrapper_section __aeabi_ui2f wrapper_func __aeabi_ui2f regular_func uint2float subs r1, r1 cmp r0, #0 bne __aeabi_i2f_main mov r0, r1 bx lr float_wrapper_section __aeabi_i2f // float FUNC_NAME(__aeabi_i2f)(int) integer to float (single precision) conversion wrapper_func __aeabi_i2f regular_func int2float lsrs r1, r0, #31 lsls r1, #31 bpl 1f negs r0, r0 1: cmp r0, #0 beq 7f __aeabi_i2f_main: mov ip, lr push {r0, r1} ldr r3, =sf_clz_func ldr r3, [r3] blx r3 pop {r1, r2} lsls r1, r0 subs r0, #158 negs r0, r0 adds r1,#0x80 @ rounding bcs 5f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits) lsls r3,r1,#24 @ check bottom 8 bits of r1 beq 6f @ in rounding-tie case? lsls r1,#1 @ remove leading 1 3: lsrs r1,#9 @ align mantissa lsls r0,#23 @ align exponent orrs r0,r2 @ assemble exponent and mantissa 4: orrs r0,r1 @ apply sign 1: bx ip 5: adds r0,#1 @ correct exponent offset b 3b 6: lsrs r1,#9 @ ensure even result lsls r1,#10 b 3b 7: bx lr // int FUNC_NAME(__aeabi_f2iz)(float) float (single precision) to integer C-style conversion [3] float_wrapper_section __aeabi_f2iz wrapper_func __aeabi_f2iz regular_func float2int_z lsls r1, r0, #1 lsrs r2, r1, #24 movs r3, #0x80 lsls r3, #24 cmp r2, #126 ble 1f subs r2, #158 bge 2f asrs r1, r0, #31 lsls r0, #9 lsrs r0, #1 orrs r0, r3 negs r2, r2 lsrs r0, r2 lsls r1, #1 adds r1, #1 muls r0, r1 bx lr 1: movs r0, #0 bx lr 2: lsrs r0, #31 adds r0, r3 subs r0, #1 bx lr cmn r0, r0 bcc float2int push {lr} lsls r0, #1 lsrs r0, #1 movs r1, #0 bl __aeabi_f2uiz cmp r0, #0 bmi 1f negs r0, r0 pop {pc} 1: movs r0, #128 lsls r0, #24 pop {pc} float_section float2int regular_func float2int shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim float_section float2fix_z regular_func float2fix_z cmn r0, r0 bcc float2fix push {lr} lsls r0, #1 lsrs r0, #1 bl float2ufix_z cmp r0, #0 bmi 1f negs r0, r0 pop {pc} 1: movs r0, #128 lsls r0, #24 pop {pc} float_section float2fix regular_func float2fix shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim float_section float2ufix regular_func float2ufix regular_func float2ufix_z table_tail_call SF_TABLE_FLOAT2UFIX // unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3] float_wrapper_section __aeabi_f2uiz wrapper_func __aeabi_f2uiz regular_func float2uint regular_func float2uint_z table_tail_call SF_TABLE_FLOAT2UINT float_section fix2float regular_func fix2float table_tail_call SF_TABLE_FIX2FLOAT float_section ufix2float regular_func ufix2float table_tail_call SF_TABLE_UFIX2FLOAT float_section fix642float regular_func fix642float shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim float_section ufix642float regular_func ufix642float shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim // float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion float_wrapper_section __aeabi_l2f 1: ldr r2, =__aeabi_i2f bx r2 wrapper_func __aeabi_l2f regular_func int642float asrs r2, r0, #31 cmp r1, r2 beq 1b shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim // float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion float_wrapper_section __aeabi_ul2f 1: ldr r2, =__aeabi_ui2f bx r2 wrapper_func __aeabi_ul2f regular_func uint642float cmp r1, #0 beq 1b shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim // long long FUNC_NAME(__aeabi_f2lz)(float) float (single precision) to long long C-style conversion [3] float_wrapper_section __aeabi_f2lz wrapper_func __aeabi_f2lz regular_func float2int64_z cmn r0, r0 bcc float2int64 movs r1, #0 float2fix64_z_neg: push {lr} lsls r0, #1 lsrs r0, #1 bl float2ufix64 cmp r1, #0 bmi 1f movs r2, #0 negs r0, r0 sbcs r2, r1 mov r1, r2 pop {pc} 1: movs r1, #128 lsls r1, #24 movs r0, #0 pop {pc} float_section float2int64 regular_func float2int64 shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim float_section float2fix64 regular_func float2fix64_z cmn r0, r0 bcs float2fix64_z_neg // fall thru regular_func float2fix64 shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim // unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3] float_wrapper_section __aeabi_f2ulz wrapper_func __aeabi_f2ulz regular_func float2uint64 regular_func float2uint64_z shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim float_section float2ufix64 regular_func float2ufix64 regular_func float2ufix64_z shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim float_wrapper_section __aeabi_f2d 1: #if PICO_FLOAT_PROPAGATE_NANS // copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit asrs r1, r0, #3 movs r2, #0xf lsls r2, #27 orrs r1, r2 lsls r0, #25 bx lr #endif wrapper_func __aeabi_f2d #if PICO_FLOAT_PROPAGATE_NANS movs r3, #1 lsls r3, #24 lsls r2, r0, #1 adds r2, r3 bhi 1b #endif shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim float_wrapper_section sqrtf wrapper_func_f1 sqrtf #if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED // check for negative asrs r1, r0, #23 bmi 1f #endif table_tail_call SF_TABLE_FSQRT #if PICO_FLOAT_SUPPORT_ROM_V1 && PICO_RP2040_B0_SUPPORTED 1: mvns r0, r1 cmp r0, #255 bne 2f // -0 or -Denormal return -0 (0x80000000) lsls r0, #31 bx lr 2: // return -Inf (0xff800000) asrs r0, r1, #31 lsls r0, #23 bx lr #endif float_wrapper_section cosf // note we don't use _f1 since we do an infinity/nan check for outside of range wrapper_func cosf // rom version only works for -128 < angle < 128 lsls r1, r0, #1 lsrs r1, #24 cmp r1, #127 + 7 bge 1f 2: table_tail_call SF_TABLE_FCOS 1: #if PICO_FLOAT_PROPAGATE_NANS // also check for infinites cmp r1, #255 bne 3f // infinite to nan movs r1, #1 lsls r1, #22 orrs r0, r1 bx lr 3: #endif ldr r1, =0x40c90fdb // 2 * M_PI push {lr} bl remainderf pop {r1} mov lr, r1 b 2b float_wrapper_section sinf // note we don't use _f1 since we do an infinity/nan check for outside of range wrapper_func sinf // rom version only works for -128 < angle < 128 lsls r1, r0, #1 lsrs r1, #24 cmp r1, #127 + 7 bge 1f 2: table_tail_call SF_TABLE_FSIN 1: #if PICO_FLOAT_PROPAGATE_NANS // also check for infinites cmp r1, #255 bne 3f // infinite to nan movs r1, #1 lsls r1, #22 orrs r0, r1 bx lr 3: #endif ldr r1, =0x40c90fdb // 2 * M_PI push {lr} bl remainderf pop {r1} mov lr, r1 b 2b float_wrapper_section sincosf // note we don't use _f1 since we do an infinity/nan check for outside of range wrapper_func sincosf push {r1, r2, lr} // rom version only works for -128 < angle < 128 lsls r3, r0, #1 lsrs r3, #24 cmp r3, #127 + 7 bge 3f 2: ldr r3, =sf_table ldr r3, [r3, #SF_TABLE_FSIN] blx r3 pop {r2, r3} str r0, [r2] str r1, [r3] pop {pc} #if PICO_FLOAT_PROPAGATE_NANS .align 2 pop {pc} #endif 3: #if PICO_FLOAT_PROPAGATE_NANS // also check for infinites cmp r3, #255 bne 4f // infinite to nan movs r3, #1 lsls r3, #22 orrs r0, r3 str r0, [r1] str r0, [r2] add sp, #12 bx lr 4: #endif ldr r1, =0x40c90fdb // 2 * M_PI push {lr} bl remainderf pop {r1} mov lr, r1 b 2b float_wrapper_section tanf // note we don't use _f1 since we do an infinity/nan check for outside of range wrapper_func tanf // rom version only works for -128 < angle < 128 lsls r1, r0, #1 lsrs r1, #24 cmp r1, #127 + 7 bge ftan_out_of_range ftan_in_range: #if !PICO_DIVIDER_DISABLE_INTERRUPTS // to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty ldr r2, =(SIO_BASE) ldr r3, [r2, #SIO_DIV_CSR_OFFSET] lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY bcs ftan_save_state #else // to avoid worrying about IRQs (or context switches), simply disable interrupts around call push {r4, lr} mrs r4, PRIMASK cpsid i bl ftan_shim_call msr PRIMASK, r4 pop {r4, pc} #endif ftan_shim_call: table_tail_call SF_TABLE_FTAN #if !PICO_DIVIDER_DISABLE_INTERRUPTS ftan_save_state: save_div_state_and_lr bl ftan_shim_call ldr r2, =(SIO_BASE) restore_div_state_and_return #endif ftan_out_of_range: #if PICO_FLOAT_PROPAGATE_NANS // also check for infinites cmp r1, #255 bne 3f // infinite to nan movs r1, #1 lsls r1, #22 orrs r0, r1 bx lr 3: #endif ldr r1, =0x40c90fdb // 2 * M_PI push {lr} bl remainderf pop {r1} mov lr, r1 b ftan_in_range float_wrapper_section atan2f wrapper_func_f2 atan2f shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim float_wrapper_section expf wrapper_func_f1 expf table_tail_call SF_TABLE_FEXP float_wrapper_section logf wrapper_func_f1 logf table_tail_call SF_TABLE_FLN