This is the mail archive of the
ecos-discuss@sources.redhat.com
mailing list for the eCos project.
Re: TCP/IP performance under eCos & other compilers than gnu
- To: Richard Wicks <rich at accetnetwork dot com>,ecos-discuss at sourceware dot cygnus dot com
- Subject: Re: [ECOS] TCP/IP performance under eCos & other compilers than gnu
- From: Grant Edwards <grante at visi dot com>
- Date: Mon, 1 Oct 2001 09:15:03 -0500
- References: <01092816224500.26300@localhost.localdomain> <20011001093920.N9700@biferten.ma.tech.ascom.ch>
On Mon, Oct 01, 2001 at 09:39:20AM +0200, Andrew Lunn wrote:
> Quite a while back there was a post to the list about gcc producing
> slow CRC calculation code when using -O3. I don't remember all the
> details, so find the post in the archive. I think if you compiled the
> CRC code -O you got faster code.
[I assume you're talking about the IP checksum routine, since I
don't remember any CRC stuff in the TCP/IP network stack.] With
-O3 optimization arm-gcc 2.95.2 starts "register thrashing" and
generates code that's about 50% slower than -O0. I switched to
an assembly language checksum, which improved throughput
considerably on myplatform. I've attached the changed files in
case you want to try it.
--
Grant Edwards
grante@visi.com
//==========================================================================
//
// sys/netinet/in_cksum.c
//
//
//
//==========================================================================
//####COPYRIGHTBEGIN####
//
// -------------------------------------------
// The contents of this file are subject to the Red Hat eCos Public License
// Version 1.1 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://www.redhat.com/
//
// Software distributed under the License is distributed on an "AS IS"
// basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
// License for the specific language governing rights and limitations under
// the License.
//
// The Original Code is eCos - Embedded Configurable Operating System,
// released September 30, 1998.
//
// The Initial Developer of the Original Code is Red Hat.
// Portions created by Red Hat are
// Copyright (C) 1998, 1999, 2000 Red Hat, Inc.
// All Rights Reserved.
// -------------------------------------------
//
//####COPYRIGHTEND####
//####BSDCOPYRIGHTBEGIN####
//
// -------------------------------------------
//
// Portions of this software may have been derived from OpenBSD or other sources,
// and are covered by the appropriate copyright disclaimers included herein.
//
// -------------------------------------------
//
//####BSDCOPYRIGHTEND####
//==========================================================================
//#####DESCRIPTIONBEGIN####
//
// Author(s): gthomas
// Contributors: gthomas
// Date: 2000-01-10
// Purpose:
// Description:
//
//
//####DESCRIPTIONEND####
//
//==========================================================================
/* $OpenBSD: in_cksum.c,v 1.3 1997/02/24 14:06:35 niklas Exp $ */
/* $NetBSD: in_cksum.c,v 1.11 1996/04/08 19:55:37 jonathan Exp $ */
/*
* Copyright (c) 1988, 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
*/
#include <sys/param.h>
#include <sys/mbuf.h>
#ifndef __ECOS
#include <sys/systm.h>
#endif
#include <netinet/in.h>
/*
* This routine is very heavily used in the network
* code and should be modified for each CPU to be as fast as possible.
*/
extern unsigned ipChecksum(unsigned char *p, int len, unsigned currentSum);
struct net_stats stats_in_cksum;
int in_cksum(register struct mbuf *m, register int len)
{
register unsigned sum = 0;
register int mlen;
START_STATS();
while (m && len>0)
{
if (len < m->m_len)
mlen = len;
else
mlen = m->m_len;
asm(" ldr r0,%0" : : "m" (m->m_data) : "r0");
asm(" mov r1,%0" : : "r" (mlen) : "r1");
asm(" mov r2,%0" : : "r" (sum) : "r2");
asm(" bl ipChecksum" : : : "r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","cc","lr");
asm(" mov %0,r0" : "=r" (sum));
len -= mlen;
m = m->m_next;
}
if (len)
diag_printf("nin_cksum: out of data\n");
FINISH_STATS(stats_in_cksum);
return (~sum & 0xffff);
}
.section .text,"ax"
/* data pointer in r0, byte count in r1, initial sum in r2
* destroys registers r0-r9. Result in r0
*/
.global ipChecksum
ipChecksum:
/* do large blocks in first loop. It might be slightly
* advantageous to make the block size match either TCP or IP
* header size.
*/
loop1:
cmp r1,#56
blt loop1done
ldmia r0!,{r3,r4,r5,r6,r7,r8,r9}
adds r2,r2,r3
adcs r2,r2,r4
adcs r2,r2,r5
adcs r2,r2,r6
adcs r2,r2,r7
adcs r2,r2,r8
adcs r2,r2,r9
ldmia r0!,{r3,r4,r5,r6,r7,r8,r9}
adcs r2,r2,r3
adcs r2,r2,r4
adcs r2,r2,r5
adcs r2,r2,r6
adcs r2,r2,r7
adcs r2,r2,r8
adcs r2,r2,r9
adc r2,r2,#0 @ add in pending carry
subs r1,r1,#56
beq fold
b loop1
loop1done:
@ do 4-byte (one word) blocks
loop2:
cmp r1,#4
blt loop2done
ldr r4,[r0],#4
adds r2,r2,r4
adc r2,r2,#0
subs r1,r1,#4
beq fold
b loop2
loop2done:
mvn r3,#0 @ r3 <= 0xffffffff
@ finish up with partial word (byte count <= 3)
ldr r4,[r0] @ load last byte[s]
@ mask r4 using byte count
mov r5,r1,lsl #3 @ byteCount * 8
bic r4,r4,r3,lsr r5 @ clear unused bits
adds r2,r2,r4
adc r2,r2,#0
fold:
mvn r3,#0 @ r3 <= 0xffffffff
adds r2,r2,r2,lsl #16 @ add lower half into upper half
rsc r0,r3,r2,lsr #16 @ r0 <= (r2>>16) + Carry. Honest.
@ if those last 7 lines of code were not confusing, then you
@ were not paying attention.
mov pc,lr