This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

rint/nearbyint for x86-64


Regarding rint and nearbyint for x86_64:
> Take absolute value, ADD then SUBtract the power of 2 whose LSB in f.p.
> representation has the place value 1 (namely: 2**23 or 2**52), copy the
> sign of the original argument.

This implementation uses a data dependency (indexing by the sign of the
input) to avoid any branching.
-----
// nearbyint, rint, nearbyintf, rintf for x86_64 using SSE instructions.
// Convert to rounded integer value in floating-point represenation.
// rint temporarily enables Precision exception (SIGFPE if result != input),
// nearbyint uses existing PrecisionMask (usually 1, thus SIGFPE inhibited.)

// Copyright 2008 BitWagon Software LLC.  All rights reserved.
// Licensed under GNU Lesser General Public License (LGPL) version 2.1.
// Written by John Reiser, March 2008.

//#include <machine/asm.h>
#define ENTRY(sym) sym: .globl sym; .type sym,@function
#define END(sym) .size sym, . - sym
#define weak_alias(a, b) .globl b; b = a

P_PRECISION_MASK= 12 // bit number of PrecisonMask bit in MXCSR

	.section .rodata
	.align 16  // SSE memory operands demand 16-byte alignment
two52p:
	.long 0,0x43300000  // +(2**52);  lsb has place value +1
	.long 0,0
	.long 0,0xc3300000  // -(2**52);  must be 16 bytes from two52p

	.text
ENTRY(__nearbyint)
	movmskpd %xmm0,%eax  // extract packed signs to low 2 bits of eax
	lea two52p(%rip),%rdx  // no double index with %rip as base
	and $1,%eax  // sign of scalar in xmm0
	add %eax,%eax  // 2*
	addpd (%rdx,%rax,8),%xmm0  // align binary point and integer point
	subpd (%rdx,%rax,8),%xmm0  // go back near original value
	ret
END(__nearbyint)
weak_alias(__nearbyint, nearbyint)

ENTRY(__rint)
	movmskpd %xmm0,%eax  // extract packed signs to low 2 bits of eax
	stmxcsr -4(%rsp); movl -4(%rsp),%ecx  // original mxcsr
	lea two52p(%rip),%rdx  // no double index with %rip as base
	andl $~(1<<P_PRECISION_MASK),-4(%rsp)
	ldmxcsr -4(%rsp)  // enable Precision exception
	add %eax,%eax  // 2*
	movl %ecx,-4(%rsp)  // mxcsr at entry
	addpd (%rdx,%rax,8),%xmm0  // align binary point and integer point
	subpd (%rdx,%rax,8),%xmm0  // go back near original value
	ldmxcsr -4(%rsp)  // restore original mxcsr
	ret
END(__rint)
weak_alias(__rint,rint)

	.section .rodata
	.align 16  // SSE memory operands demand 16-byte alignment
two23p:
	.long 0x4b000000  // +(2**23);  lsb has place value +1
	.long 0,0,0
	.long 0xcb000000  // -(2**23);  must be 16 bytes from two23p

	.text
ENTRY(__nearbyintf)
	movmskps %xmm0,%eax  // extract packed signs to low 4 bits of eax
	lea two23p(%rip),%rdx  // no double index with %rip as base
	and $1,%eax  // sign of scalar in xmm0
	add %eax,%eax  // 2*
	addps (%rdx,%rax,8),%xmm0  // align binary point and integer point
	subps (%rdx,%rax,8),%xmm0  // go back near original value
	ret
END(__nearbyintf)
weak_alias(__nearbyintf,nearbyintf)

ENTRY(__rintf)
	movmskps %xmm0,%eax  // extract packed signs to low 4 bits of eax
	stmxcsr -4(%rsp); movl -4(%rsp),%ecx  // original mxcsr
	lea two23p(%rip),%rdx  // no double index with %rip as base
	andl $~(1<<P_PRECISION_MASK),-4(%rsp)
	and $1,%eax  // sign of scalar in xmm0
	ldmxcsr -4(%rsp)  // enable Precision exception
	add %eax,%eax  // 2*
	movl %ecx,-4(%rsp)  // mxcsr at entry
	addps (%rdx,%rax,8),%xmm0  // align binary point and integer point
	subps (%rdx,%rax,8),%xmm0  // go back near original value
	ldmxcsr -4(%rsp)  // restore original mxcsr
	ret
END(__rintf)
weak_alias(__rintf,rintf)

-- 
John Reiser, jreiser@BitWagon.com


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]