/* * Copyright (c) 2024 Raspberry Pi (Trading) Ltd. * * SPDX-License-Identifier: BSD-3-Clause */ #include "pico/asm_helper.S" #if !HAS_DOUBLE_COPROCESSOR #error attempt to compile float_aeabi_dcp when there is no DCP #else #include "hardware/dcp_instr.inc.S" #include "hardware/dcp_canned.inc.S" pico_default_asm_setup // todo factor out save/restore (there is a copy in double code) .macro float_section name #if PICO_FLOAT_IN_RAM .section RAM_SECTION_NAME(\name), "ax" #else .section SECTION_NAME(\name), "ax" #endif .endm .macro float_wrapper_section func float_section WRAPPER_FUNC_NAME(\func) .endm // ============== STATE SAVE AND RESTORE =============== .macro saving_func type func, opt_label1='-', opt_label2='-' // Note we are usually 32-bit aligned already at this point, as most of the // function bodies contain exactly two 16-bit instructions: bmi and bx lr. // We want the PCMP word-aligned. .p2align 2 // When the engaged flag is set, branch back here to invoke save routine and // hook lr with the restore routine, then fall back through to the entry // point. The engaged flag will be clear when checked a second time. 1: push {lr} // 16-bit instruction bl generic_save_state // 32-bit instruction b 1f // 16-bit instruction .ifnc \opt_label1,'-' regular_func \opt_label1 .endif .ifnc \opt_label2,'-' regular_func \opt_label2 .endif // This is the actual entry point: \type\()_func \func PCMP apsr_nzcv bmi 1b 1: .endm .macro saving_func_return bx lr .endm float_section __rp2350_dcp_engaged_state_save_restore .thumb_func generic_save_state: sub sp, #24 push {r0, r1} // do save here PXMD r0, r1 strd r0, r1, [sp, #8 + 0] PYMD r0, r1 strd r0, r1, [sp, #8 + 8] REFD r0, r1 strd r0, r1, [sp, #8 + 16] pop {r0, r1} blx lr // <- wrapped function returns here // fall through into restore: .thumb_func generic_restore_state: // do restore here pop {r12, r14} WXMD r12, r14 pop {r12, r14} WYMD r12, r14 pop {r12, r14} WEFD r12, r14 pop {pc} // ============== ARITHMETIC FUNCTIONS =============== float_wrapper_section __aeabi_fadd saving_func wrapper __aeabi_fadd dcp_fadd_m r0,r0,r1 saving_func_return float_wrapper_section __aeabi_fsub saving_func wrapper __aeabi_fsub dcp_fsub_m r0,r0,r1 saving_func_return float_wrapper_section __aeabi_frsub saving_func wrapper __aeabi_frsub dcp_fsub_m r0,r1,r0 saving_func_return float_wrapper_section __aeabi_fmul saving_func wrapper __aeabi_fmul dcp_fmul_m r0,r0,r1,r0,r1 saving_func_return float_section fdiv_fast saving_func regular fdiv_fast dcp_fdiv_fast_m r0,r0,r1,r0,r1,r2 saving_func_return float_wrapper_section __aeabi_fdiv saving_func wrapper __aeabi_fdiv @ with correct rounding dcp_fdiv_m r0,r0,r1,r0,r1,r2,r3 saving_func_return float_section sqrtf_fast saving_func regular sqrtf_fast dcp_fsqrt_fast_m r0,r0,r0,r1,r2,r3 saving_func_return float_wrapper_section sqrtf saving_func wrapper sqrtf @ with correct rounding dcp_fsqrt_m r0,r0,r0,r1,r2,r3 saving_func_return float_section fclassify saving_func regular fclassify dcp_fclassify_m apsr_nzcv,r0 saving_func_return // ============== CONVERSION FUNCTIONS =============== float_wrapper_section __aeabi_f2d saving_func wrapper __aeabi_f2d float2double dcp_float2double_m r0,r1,r0 saving_func_return float_wrapper_section __aeabi_i2f saving_func wrapper __aeabi_i2f int2float @ with rounding dcp_int2float_m r0,r0 saving_func_return float_wrapper_section __aeabi_ui2f saving_func wrapper __aeabi_ui2f uint2float @ with rounding dcp_uint2float_m r0,r0 saving_func_return float_section float2fix_z regular_func float2fix_z ubfx r2, r0, #23, #8 cbz r2, 2f // input is zero or denormal cmp r2, #0xff beq 3f // input infinite or nan adds r2, r1 ble 2f // modified input is denormal so zero cmp r2, #0xff beq 3f // modified input is infinite 1: bfi r0, r2, #23, #8 b float2int_z_entry 2: movs r0, #0 bx lr 3: mvn r1, #0x80000000 add r0, r1, r0, lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff bx lr float_wrapper_section __aeabi_f2iz saving_func wrapper __aeabi_f2iz float2int_z @ with truncation towards 0 float2int_z_entry: dcp_float2int_m r0,r0 saving_func_return float_section __aeabi_f2ufix regular_func float2ufix regular_func float2ufix_z ubfx r2, r0, #23, #8 cbz r2, 2f // input is zero or denormal cmp r2, #0xff beq 3f // input infinite or nan adds r2, r1 ble 2f // modified input is denormal so zero cmp r2, #0xff beq 3f // modified input is infinite 1: bfi r0, r2, #23, #8 b float2uint_z_entry 2: movs r0, #0 bx lr 3: mvn r0, r0, asr #31 bx lr float_wrapper_section __aeabi_f2uiz saving_func wrapper __aeabi_f2uiz float2uint_z float2uint @ with truncation towards 0 float2uint_z_entry: dcp_float2uint_m r0,r0 saving_func_return float_section conv_f2fix saving_func regular float2fix ubfx r2, r0, #23, #8 cbz r2, 2f // input is zero or denormal cmp r2, #0xff beq 3f // input infinite or nan adds r2, r1 ble 2f // modified input is denormal so zero cmp r2, #0xff beq 3f // modified input is infinite 1: bfi r0, r2, #23, #8 b float2int_entry 2: movs r0, #0 bx lr 3: mvn r1, #0x80000000 add r0, r1, r0, lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff bx lr float_section float2int // (not a real thing - kept because we use wrapper in saving_func) saving_func regular float2int float2int_entry: lsls r1, r0, #1 // r0 = abs(zero) => r1 = 0x00000000 // r0 = abs(denornaml) => r1 = 0x00xxxxxx // r0 = abs(1.0f) => r1 = 0x7f000000 // r0 = abs(inf/nan) => r1 = 0xffxxxxxx bls float2int_z_entry // input positive or zero or -zero are ok for int64_z lsrs r1, #24 beq float2int_z_entry // input denormal is flushed to zero anyway subs r1, #0x7f bcc 1f // input < 1.0f means we need to subtract 1 after conversion // mask off all but fractional bits lsls r2, r0, r1 lsls r2, #9 beq float2int_z_entry // input is integer 1: WXFC r0, r0 ADD0 ADD1 NTDC RDIC r0 subs r0, #1 saving_func_return #if 0 // not sure these are super useful; if they are we should give them names float_wrapper_section __aeabi_f2i_r // (not a real thing - kept because we use wrapper in saving_func) saving_func wrapper __aeabi_f2i_r @ with rounding dcp_float2int_r_m r0,r0 saving_func_return float_wrapper_section __aeabi_f2ui_r // (not a real thing - kept because we use wrapper in saving_func) saving_func wrapper __aeabi_f2ui_r @ with rounding dcp_float2uint_r_m r0,r0 saving_func_return #endif // ============== COMPARISON FUNCTIONS =============== float_wrapper_section __aeabi_fcmpun saving_func wrapper __aeabi_fcmpun dcp_fcmp_m r0,r0,r1 // extract unordered bit ubfx r0, r0, #28, #1 saving_func_return float_wrapper_section __aeabi_fcmp saving_func wrapper __aeabi_cfrcmple dcp_fcmp_m apsr_nzcv,r1,r0 // with arguments reversed bvs cmp_nan saving_func_return // these next two can be the same function in the absence of exceptions saving_func wrapper __aeabi_cfcmple dcp_fcmp_m apsr_nzcv,r0,r1 bvs cmp_nan saving_func_return // It is not clear from the ABI documentation whether cfcmpeq must set the C flag // in the same way as cfcmple. If not, we could save the "bvs" below; but we // err on the side of caution. saving_func wrapper __aeabi_cfcmpeq dcp_fcmp_m apsr_nzcv,r0,r1 bvs cmp_nan saving_func_return // If the result of a flag-setting comparison is "unordered" then we need to set C and clear Z. // We could conceivably just do lsrs r12,r14,#1, or even cmp r14,r14,lsr#1 as (a) r14 here is a // return address and r14b0=1 for Thumb mode; (b) we are unlikely to be returning to address 0. cmp_nan: movs r12, #3 // r12 does not need to be preserved by the flag-setting comparisons lsrs r12, #1 // set C, clear Z saving_func_return float_wrapper_section __aeabi_fcmpeq saving_func wrapper __aeabi_fcmpeq dcp_fcmp_m r0,r0,r1 // extract Z ubfx r0, r0, #30, #1 saving_func_return float_wrapper_section __aeabi_fcmplt saving_func wrapper __aeabi_fcmplt dcp_fcmp_m apsr_nzcv,r1,r0 ite hi movhi r0,#1 movls r0,#0 saving_func_return float_wrapper_section __aeabi_fcmple saving_func wrapper __aeabi_fcmple dcp_fcmp_m apsr_nzcv,r1,r0 ite hs movhs r0,#1 movlo r0,#0 saving_func_return float_wrapper_section __aeabi_fcmpge saving_func wrapper __aeabi_fcmpge dcp_fcmp_m apsr_nzcv,r0,r1 ite hs movhs r0,#1 movlo r0,#0 saving_func_return float_wrapper_section __aeabi_fcmpgt saving_func wrapper __aeabi_fcmpgt dcp_fcmp_m apsr_nzcv,r0,r1 ite hi movhi r0,#1 movls r0,#0 saving_func_return #endif