/* * Copyright (c) 2024 Raspberry Pi (Trading) Ltd. * * SPDX-License-Identifier: BSD-3-Clause */ #include "pico/asm_helper.S" #if !HAS_DOUBLE_COPROCESSOR #error attempt to compile double_aeabi_dcp when there is no DCP #else #include "hardware/dcp_instr.inc.S" #include "hardware/dcp_canned.inc.S" pico_default_asm_setup .macro double_section name #if PICO_DOUBLE_IN_RAM .section RAM_SECTION_NAME(\name), "ax" #else .section SECTION_NAME(\name), "ax" #endif .endm .macro double_wrapper_section func double_section WRAPPER_FUNC_NAME(\func) .endm // ============== STATE SAVE AND RESTORE =============== .macro saving_func type func, opt_label1='-', opt_label2='-' // Note we are usually 32-bit aligned already at this point, as most of the // function bodies contain exactly two 16-bit instructions: bmi and bx lr. // We want the PCMP word-aligned. .p2align 2 // When the engaged flag is set, branch back here to invoke save routine and // hook lr with the restore routine, then fall back through to the entry // point. The engaged flag will be clear when checked a second time. 1: push {lr} // 16-bit instruction bl generic_save_state // 32-bit instruction b 1f // 16-bit instruction .ifnc \opt_label1,'-' regular_func \opt_label1 .endif .ifnc \opt_label2,'-' regular_func \opt_label2 .endif // This is the actual entry point: \type\()_func \func PCMP apsr_nzcv bmi 1b 1: .endm .macro saving_func_return bx lr .endm double_section __rp2350_dcp_engaged_state_save_restore .thumb_func generic_save_state: sub sp, #24 push {r0, r1} // do save here PXMD r0, r1 strd r0, r1, [sp, #8 + 0] PYMD r0, r1 strd r0, r1, [sp, #8 + 8] REFD r0, r1 strd r0, r1, [sp, #8 + 16] pop {r0, r1} blx lr // <- wrapped function returns here // fall through into restore: .thumb_func generic_restore_state: // do restore here pop {r12, r14} WXMD r12, r14 pop {r12, r14} WYMD r12, r14 pop {r12, r14} WEFD r12, r14 pop {pc} // ============== ARITHMETIC FUNCTIONS =============== double_wrapper_section __aeabi_dadd saving_func wrapper __aeabi_dadd dcp_dadd_m r0,r1,r0,r1,r2,r3 saving_func_return double_wrapper_section __aeabi_dsub saving_func wrapper __aeabi_dsub dcp_dsub_m r0,r1,r0,r1,r2,r3 saving_func_return double_wrapper_section __aeabi_drsub saving_func wrapper __aeabi_drsub dcp_dsub_m r0,r1,r2,r3,r0,r1 saving_func_return double_wrapper_section __aeabi_dmul saving_func wrapper __aeabi_dmul // todo optimize this based on final decision on saving_func_entry push {r4,r14} dcp_dmul_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r4,r12,r14 // todo optimize this based on final decision on saving_func_entry pop {r4,lr} saving_func_return double_section ddiv_fast saving_func regular ddiv_fast dcp_ddiv_fast_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r12 saving_func_return double_wrapper_section __aeabi_ddiv saving_func wrapper __aeabi_ddiv @ with correct rounding dcp_ddiv_m r0,r1,r0,r1,r2,r3,r0,r1,r2,r3,r12 saving_func_return double_section sqrt_fast saving_func regular sqrt_fast dcp_dsqrt_fast_m r0,r1,r0,r1,r0,r1,r2,r3,r12 saving_func_return double_wrapper_section sqrt saving_func wrapper sqrt @ with correct rounding dcp_dsqrt_m r0,r1,r0,r1,r0,r1,r2,r3,r12 saving_func_return double_section dclassify saving_func regular dclassify dcp_dclassify_m apsr_nzcv,r0,r1 saving_func_return // ============== CONVERSION FUNCTIONS =============== double_wrapper_section __aeabi_d2f saving_func wrapper __aeabi_d2f double2float @ with rounding dcp_double2float_m r0,r0,r1 saving_func_return double_wrapper_section __aeabi_i2d saving_func wrapper __aeabi_i2d int2double dcp_int2double_m r0,r1,r0 saving_func_return double_wrapper_section __aeabi_ui2d saving_func wrapper __aeabi_ui2d uint2double dcp_uint2double_m r0,r1,r0 saving_func_return double_section double2fix_z saving_func regular double2fix_z ubfx r3, r1, #20, #11 adds r3, r2 beq 1f // very small; we don't care that we might make a denormal asrs ip, r3, #11 beq 1f ite pl movpl r3, #0x7ff movsmi r3, #0 1: bfi r1, r3, #20, #11 b double2int_z_entry double_section double2ufix saving_func regular double2ufix_z double2ufix double2ufix_z_entry: ubfx r3, r1, #20, #11 adds r3, r2 beq 1f // very small; we don't care that we might make a denormal asrs ip, r3, #11 beq 1f ite pl lsrspl r3, r1, #20 // 0x7ff movsmi r3, #0 1: bfi r1, r3, #20, #11 b double2uint_z_entry double_section double2fix saving_func regular double2fix ubfx r3, r1, #20, #11 cbz r3, 2f // 0 or denormal adds r3, r2 beq 1f // very small; we don't care that we might make a denormal asrs ip, r3, #11 beq 1f ite pl movpl r3, #0x7ff movsmi r3, #0 1: bfi r1, r3, #20, #11 b double2int_entry 2: movs r0, #0 saving_func_return double_section double2int saving_func regular double2int double2int_entry: lsls r2, r1, #1 bcc double2int_z_entry // positive is ok for int64_z lsrs r3, r2, #21 beq double2int_z_entry // 0 or -0 or denormal is ok for int_z lsrs r2, #21 adds r2, #1 subs r2, r2, #0x400 bcc 1f // <1 means subtract 1 cmp r2, #31 bge double2int_z_entry // must be an integer or maxed out lsls r3, r1, #12 adds r3, r3, r0, lsr #20 // r3 now has highest 32 mantissa bits lsls r3, r2 orrs r3, r3, r0, lsl #12 // these bits are all guaranteed to be in the fraction beq double2int_z_entry // integer 1: dcp_double2int_m r0,r0,r1 subs r0, #1 saving_func_return double_wrapper_section __aeabi_d2iz saving_func wrapper __aeabi_d2iz double2int_z double2int_z_entry: @ with truncation towards 0 dcp_double2int_m r0,r0,r1 // note: this works with either saved or not saved call as it is just a `bx lr` saving_func_return double_wrapper_section __aeabi_d2uiz saving_func wrapper __aeabi_d2uiz double2uint double2uint_z double2uint_z_entry: @ with truncation towards 0 dcp_double2uint_m r0,r0,r1 saving_func_return double_section double2int_r saving_func regular double2int_r @ with rounding dcp_double2int_r_m r0,r0,r1 saving_func_return double_section double2uint_r saving_func regular double2uint_r @ with rounding dcp_double2uint_r_m r0,r0,r1 saving_func_return // ============== COMPARISON FUNCTIONS =============== double_wrapper_section __aeabi_dcmpun saving_func wrapper __aeabi_dcmpun dcp_dcmp_m r0,r0,r1,r2,r3 // extract unordered bit ubfx r0, r0, #28, #1 saving_func_return double_wrapper_section __aeabi_dcmp saving_func wrapper __aeabi_cdrcmple dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 // with arguments reversed bvs cmp_nan saving_func_return // these next two can be the same function in the absence of exceptions saving_func wrapper __aeabi_cdcmple //wrapper_func __aeabi_dcmp dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3 bvs cmp_nan saving_func_return // It is not clear from the ABI documentation whether cdcmpeq must set the C flag // in the same way as cdcmple. If not, we could save the "bvs" below; but we // err on the side of caution. saving_func wrapper __aeabi_cdcmpeq //wrapper_func __aeabi_dcmp dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3 bvs cmp_nan saving_func_return // If the result of a flag-setting comparison is "unordered" then we need to set C and clear Z. // We could conceivably just do lsrs r12,r14,#1, or even cmp r14,r14,lsr#1 as (a) r14 here is a // return address and r14b0=1 for Thumb mode; (b) we are unlikely to be returning to address 0. cmp_nan: movs r12, #3 // r12 does not need to be preserved by the flag-setting comparisons lsrs r12, #1 // set C, clear Z saving_func_return // int FUNC_NAME(__aeabi_dcmpeq)(double, double) result (1, 0) denotes (=, ?<>) [2], use for C == and != double_wrapper_section __aeabi_dcmpeq saving_func wrapper __aeabi_dcmpeq dcp_dcmp_m r0,r0,r1,r2,r3 // extract Z ubfx r0, r0, #30, #1 saving_func_return // int FUNC_NAME(__aeabi_dcmplt)(double, double) result (1, 0) denotes (<, ?>=) [2], use for C < double_wrapper_section __aeabi_dcmplt saving_func wrapper __aeabi_dcmplt dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 ite hi movhi r0,#1 movls r0,#0 saving_func_return // int FUNC_NAME(__aeabi_dcmple)(double, double) result (1, 0) denotes (<=, ?>) [2], use for C <= double_wrapper_section __aeabi_dcmple saving_func wrapper __aeabi_dcmple dcp_dcmp_m apsr_nzcv,r2,r3,r0,r1 ite hs movhs r0,#1 movlo r0,#0 saving_func_return // int FUNC_NAME(__aeabi_dcmpge)(double, double) result (1, 0) denotes (>=, ?<) [2], use for C >= double_wrapper_section __aeabi_dcmpge saving_func wrapper __aeabi_dcmpge dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3 ite hs movhs r0,#1 movlo r0,#0 saving_func_return // int FUNC_NAME(__aeabi_dcmpgt)(double, double) result (1, 0) denotes (>, ?<=) [2], use for C > double_wrapper_section __aeabi_dcmpgt saving_func wrapper __aeabi_dcmpgt dcp_dcmp_m apsr_nzcv,r0,r1,r2,r3 ite hi movhi r0,#1 movls r0,#0 saving_func_return #endif