/*
 * Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */

#include "pico/asm_helper.S"
#if HAS_DOUBLE_COPROCESSOR

pico_default_asm_setup

.macro double_section name
#if PICO_DOUBLE_IN_RAM
.section RAM_SECTION_NAME(\name), "ax"
#else
.section SECTION_NAME(\name), "ax"
#endif
.endm

.macro double_wrapper_section func
double_section WRAPPER_FUNC_NAME(\func)
.endm

double_wrapper_section conv_tod

@ convert int64 to double, rounding
wrapper_func __aeabi_l2d
regular_func int642double
 movs r2,#0       @ fall through
@ convert unsigned 64-bit fix to double, rounding; number of r0:r1 bits after point in r2
regular_func fix642double
 cmp r1,#0
 bge 10f @ positive? can use unsigned code
 rsbs r0,#0
 sbc r1,r1,r1,lsl#1 @ make positive
 cbz r1,7f @ high word is zero?
 clz r3,r1
 subs r3,#11
 bmi 2f
 rsbs r12,r3,#32
 lsrs r12,r0,r12
 lsls r0,r3
 lsls r1,r3
 orrs r1,r1,r12
 add r2,r2,r3
 rsbs r2,#0
 add r2,#0x3ff+19+32
 add r1,r1,r2,lsl#20 @ insert exponent
 orr r1,#0x80000000
 mov r3,0x7fe
 cmp r2,r3
 it lo @ over/underflow?
 bxlo r14
 b 3f
7:
 mov r1,r2
 b fix2double_neg
2:
 add r3,#33
 lsls r12,r0,r3 @ rounding bit in carry, sticky bits in Z
 sub r3,#1
 lsl r12,r1,r3
 rsb r3,#32
 lsr r0,r3
 lsr r1,r3
 orr r0,r0,r12
@ push {r14}
@ bl dumpreg
@ pop {r14}
 sub r2,r3,r2
 add r2,#0x3ff+19+32
 beq 4f @ potential rounding tie?
 adcs r0,r0,#0
5:
 adc r1,r1,r2,lsl#20 @ insert exponent, add rounding
 orr r1,#0x80000000
 mov r3,0x7fe
 cmp r2,r3
 it lo
 bxlo r14
@ over/underflow?
3:
 mov r1,#0
 it ge
 movtge r1,#0x7ff0 @ overflow
 mov r0,#0
 bx r14
1:
 movs r1,#0
 bx r14
4:
 bcc 5b @ not a rounding tie after all
 adcs r0,r0,#0
 bic r0,r0,#1 @ force to even
 b 5b

@ convert uint64 to double, rounding
wrapper_func __aeabi_ul2d
regular_func uint642double
 movs r2,#0       @ fall through
@ convert unsigned 64-bit fix to double, rounding; number of r0:r1 bits after point in r2
regular_func ufix642double
10:
 cbz r1,7f @ high word zero?
 clz r3,r1
 subs r3,#11
 bmi 2f
 rsbs r12,r3,#32
 lsrs r12,r0,r12
 lsls r0,r3
 lsls r1,r3
 orrs r1,r1,r12
 add r2,r2,r3
 rsbs r2,#0
 add r2,#0x3ff+19+32
 add r1,r1,r2,lsl#20 @ insert exponent
 mov r3,0x7fe
 cmp r2,r3
 it lo @ over/underflow?
 bxlo r14
 b 3f
7:
 mov r1,r2
 b ufix2double
2:
 add r3,#33
 lsls r12,r0,r3 @ rounding bit in carry, sticky bits in Z
 sub r3,#1
 lsl r12,r1,r3
 rsb r3,#32
 lsr r0,r3
 lsr r1,r3
 orr r0,r0,r12
@ push {r14}
@ bl dumpreg
@ pop {r14}
 sub r2,r3,r2
 add r2,#0x3ff+19+32
 beq 4f @ potential rounding tie?
 adcs r0,r0,#0
5:
 adc r1,r1,r2,lsl#20 @ insert exponent, add rounding
 mov r3,0x7fe
 cmp r2,r3
 it lo
 bxlo r14
@ over/underflow?
3:
 mov r1,#0
 it ge
 movtge r1,#0x7ff0 @ overflow
 mov r0,#0
 bx r14
1:
 movs r1,#0
 bx r14
4:
 bcc 5b @ not a rounding tie after all
 adcs r0,r0,#0
 bic r0,r0,#1 @ force to even
 b 5b

regular_func fix2double
 cmp r0,#0
 bge ufix2double @ positive? can use unsigned code
 rsbs r0,#0 @ make positive
fix2double_neg:
 clz r3,r0
 subs r3,#11
 bmi 2f
 lsls r0,r3
 add r2,r1,r3
 rsbs r2,#0
 add r2,#0x3ff+19
 add r1,r0,r2,lsl#20 @ insert exponent
 orr r1,#0x80000000
 mov r0,#0
 mov r3,0x7fe
 cmp r2,r3
 it lo @ over/underflow?
 bxlo r14
 b 3f
2:
 rsb r3,#0
 lsrs r12,r0,r3
 rsb r2,r3,#32
 lsls r0,r0,r2
@ push {r14}
@ bl dumpreg
@ pop {r14}
 sub r2,r3,r1
 add r2,#0x3ff+19
 add r1,r12,r2,lsl#20 @ insert exponent
 orr r1,#0x80000000
 mov r3,0x7fe
 cmp r2,r3
 it lo
 bxlo r14
@ over/underflow?
3:
 mov r1,#0x80000000
 it ge
 movtge r1,#0xfff0 @ overflow
 mov r0,#0
 bx r14
1:
 movs r1,#0
 bx r14

regular_func ufix2double
 cbz r0,1f @ zero? return it
 clz r3,r0
 subs r3,#11
 bmi 2f
 lsls r0,r3
 add r2,r1,r3
 rsbs r2,#0
 add r2,#0x3ff+19
 add r1,r0,r2,lsl#20 @ insert exponent
 mov r0,#0
 mov r3,0x7fe
 cmp r2,r3
 it lo @ over/underflow?
 bxlo r14
 b 3f
2:
 rsbs r3,#0
 lsrs r12,r0,r3
 rsb r2,r3,#32
 lsls r0,r0,r2
@ push {r14}
@ bl dumpreg
@ pop {r14}
 sub r2,r3,r1
 add r2,#0x3ff+19
 add r1,r12,r2,lsl#20 @ insert exponent
 mov r3,0x7fe
 cmp r2,r3
 it lo
 bxlo r14
@ over/underflow?
3:
 mov r1,#0
 it ge
 movtge r1,#0x7ff0 @ overflow
 mov r0,#0
 bx r14
1:
 movs r1,#0
 bx r14

double_section conv_dtoi64
regular_func double2int64
  lsls r3, r1, #1
  bcc double2int64_z // input positive is ok for int64_z
  cmp r3, #0xffe00000
  bcs double2int64_z // input is infinite
  lsrs r3, #21
  beq 2f // input zero or denormal, means answer remains zero
  sub r3, #0x3ff
  cmp r3, #0
  blt 1f // input is less than 1.0
  cmp r3, #52
  bge double2int64_z // modified input must be an integer or infinite
  adds r3, #12
  lsls r2, r1, r3    // r2 has remaining fractional mantissa bits of r1
  bne 1f             // not integer as non zero fractional bits remain
  subs r3, #32
  bics r3, r3, r3, asr #31 // map negative shift to zero
  lsls r3, r0, r3
  beq double2int64_z   // remaining fractional bits are 0, so argument was an integer
1:
  push {lr}
  bl double2int64_z
  subs r0, #1
  sbcs r1, r1, #0
  pop {pc}
2:
  movs r0, #0
  movs r1, #0
  bx lr

double_section conv_dtofix64
regular_func double2fix64
  lsls r3, r1, #1
  bcc double2fix64_z // input positive is ok for fix64_z
  cmp r3, #0xffe00000
  bcs double2fix64_z // input is infinite
  lsrs r3, #21
  beq 2f // input zero or denormal, means answer remains zero
  sub r3, #0x3ff
  adds r3, r2
  blt 1f // modified input zero or denormal, or less than 1.0
  cmp r3, #52
  bge double2fix64_z // modified input must be an integer or infinite
  adds r3, #12
  lsls ip, r1, r3    // ip has remaining fractional mantissa bits of r1
  bne 1f             // not integer as non zero fractional bits remain
  subs r3, #32
  bics r3, r3, r3, asr #31 // map negative shift to zero
  lsls r3, r0, r3
  beq double2fix64_z   // remaining fractional bits are 0, so argument was an integer
1:
  push {lr}
  bl double2fix64_z
  subs r0, #1
  sbcs r1, r1, #0
  pop {pc}
2:
  movs r0, #0
  movs r1, #0
  bx lr

double_wrapper_section conv_dtoi64_z

@ convert double to signed int64, rounding towards 0, clamping
wrapper_func __aeabi_d2lz
regular_func double2int64_z
 movs r2,#0      @ fall through
@ convert double in r0:r1 to signed fixed point in r0:r1, clamping
regular_func double2fix64_z
 sub r2,#0x3ff+52-1 @ remove exponent bias, compensate for mantissa length
 asrs r12,r1,#20 @ sign and exponent
 sub r3,r12,#1
 sub r1,r1,r3,lsl#20 @ install implied 1, clear exponent
 lsls r3,#21
@ push {r14}
@ bl dumpreg
@ pop {r14}
 cmp r3,#0xffc00000
 bhs 1f @ 0, ∞/NaN?
 adds r2,r2,r3,lsr#21 @ offset exponent by fix precision; r1 is now required left shift
 bmi 4f @ actually a right shift?
 cmp r2,#11 @ overflow?
 bge 5f
 lsls r1,r2
 rsbs r3,r2,#32
 lsrs r3,r0,r3
 orrs r1,r1,r3
 lsls r0,r2
 cmp r12,#0
 it ge
 bxge r14
 rsbs r0,#0
 sbc r1,r1,r1,lsl#1
 bx r14
4:
 adds r2,#32
 ble 6f @ result fits in low word?
 lsl r3,r1,r2
 rsbs r2,#32
 lsrs r1,r2
 lsrs r0,r2
 orrs r0,r0,r3
 cmp r12,#0
 it ge
 bxge r14
 rsbs r0,#0
 sbc r1,r1,r1,lsl#1
 bx r14
6:
 rsbs r2,#0
 usat r2,#5,r2 @ underflow to 0
 lsrs r0,r1,r2
 movs r1,#0
 cmp r12,#0
 it ge
 bxge r14
 rsbs r0,#0
 sbc r1,r1,r1,lsl#1
 bx r14
1:
 beq 3f @ ±∞/±NaN?
2:
 movs r0,#0 @ ±0: return 0
 movs r1,#0
 bx r14
3:
 orrs r1,r0,r1,lsl#12 @ mantissa field
 it ne @ NaN?
 movne r12,#0 @ treat NaNs as +∞
@ here original argument was ±Inf or we have under/overflow
5:
 mvn r1,#0x80000000
 add r1,r1,r12,lsr#31 @ so -Inf → 0x80000000, +Inf → 0x7fffffff
 mvn r0,r12,asr#31
 bx r14

double_wrapper_section conv_dtoui64

@ convert double to unsigned int64, rounding towards -Inf, clamping
wrapper_func __aeabi_d2ulz
regular_func double2uint64
regular_func double2uint64_z
 movs r2,#0      @ fall through
@ convert double in r0:r1 to unsigned fixed point in r0:r1, clamping
regular_func double2ufix64
regular_func double2ufix64_z
 subw r2,r2,#0x3ff+52-1 @ remove exponent bias, compensate for mantissa length
 asrs r3,r1,#20 @ sign and exponent
 sub r3,#1
 sub r1,r1,r3,lsl#20 @ install implied 1, clear exponent and sign
 bmi 7f @ argument negative?
 movw r12,#0x7fe
 cmp r3,r12
 bhs 1f @ 0, ∞/NaN?
 adds r2,r3 @ offset exponent by fix precision; r2 is now required left shift
 bmi 2f @ actually a right shift?
 cmp r2,#12 @ overflow?
 bge 4f
 lsls r1,r2
 rsbs r3,r2,#32
 lsrs r3,r0,r3
 lsls r0,r2
 orrs r1,r1,r3
 bx r14
2:
 adds r2,#32
 ble 5f @ result fits in low word?
 lsl r3,r1,r2
 rsbs r2,#32
 lsrs r1,r2
 lsrs r0,r2
 orrs r0,r0,r3
 bx r14
5:
 rsbs r2,#0
 usat r2,#5,r2 @ underflow to 0
 lsrs r0,r1,r2
 movs r1,#0
 bx r14
1:
 bhi 3f @ 0? return 0
4:
@ here overflow has occurred
 mvn r0,#0
 mvn r1,#0
 bx r14
7:
 cmp r3,#0xfffffffe
 bne 3f @ -0? return 0
 orrs r2,r0,r1,lsl#12 @ mantissa field
 bne 4b
3:
 movs r0,#0
 movs r1,#0
 bx r14

#endif