149 lines
2.3 KiB
ArmAsm
149 lines
2.3 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Barely optimized memmove routine for sw64.
|
|
* This is hand-massaged output from the original memcpy.c. We defer to
|
|
* memcpy whenever possible; the backwards copy loops are not unrolled.
|
|
*/
|
|
#include <asm/export.h>
|
|
.set noat
|
|
.set noreorder
|
|
.text
|
|
|
|
.align 4
|
|
.globl memmove
|
|
.ent memmove
|
|
memmove:
|
|
ldgp $29, 0($27)
|
|
unop
|
|
.prologue 1
|
|
|
|
addl $16, $18, $4
|
|
addl $17, $18, $5
|
|
cmpule $4, $17, $1 # dest + n <= src
|
|
cmpule $5, $16, $2 # dest >= src + n
|
|
|
|
bis $1, $2, $1
|
|
mov $16, $0
|
|
xor $16, $17, $2
|
|
bne $1, memcpy # samegp
|
|
|
|
and $2, 7, $2 # Test for src/dest co-alignment.
|
|
and $16, 7, $1
|
|
cmpule $16, $17, $3
|
|
bne $3, $memmove_up # dest < src
|
|
|
|
and $4, 7, $1
|
|
bne $2, $misaligned_dn
|
|
unop
|
|
beq $1, $skip_aligned_byte_loop_head_dn
|
|
|
|
$aligned_byte_loop_head_dn:
|
|
ldi $4, -1($4)
|
|
ldi $5, -1($5)
|
|
unop
|
|
ble $18, $egress
|
|
|
|
ldbu $1, 0($5)
|
|
ldi $18, -1($18)
|
|
stb $1, 0($4)
|
|
|
|
and $4, 7, $6
|
|
bne $6, $aligned_byte_loop_head_dn
|
|
|
|
$skip_aligned_byte_loop_head_dn:
|
|
ldi $18, -8($18)
|
|
blt $18, $skip_aligned_word_loop_dn
|
|
|
|
$aligned_word_loop_dn:
|
|
ldl $1, -8($5)
|
|
ldi $5, -8($5)
|
|
ldi $18, -8($18)
|
|
|
|
stl $1, -8($4)
|
|
ldi $4, -8($4)
|
|
bge $18, $aligned_word_loop_dn
|
|
|
|
$skip_aligned_word_loop_dn:
|
|
ldi $18, 8($18)
|
|
bgt $18, $byte_loop_tail_dn
|
|
unop
|
|
ret $31, ($26), 1
|
|
|
|
.align 4
|
|
$misaligned_dn:
|
|
fnop
|
|
unop
|
|
beq $18, $egress
|
|
|
|
$byte_loop_tail_dn:
|
|
ldbu $1, -1($5)
|
|
ldi $5, -1($5)
|
|
ldi $4, -1($4)
|
|
|
|
ldi $18, -1($18)
|
|
stb $1, 0($4)
|
|
|
|
bgt $18, $byte_loop_tail_dn
|
|
br $egress
|
|
|
|
$memmove_up:
|
|
mov $16, $4
|
|
mov $17, $5
|
|
bne $2, $misaligned_up
|
|
beq $1, $skip_aligned_byte_loop_head_up
|
|
|
|
$aligned_byte_loop_head_up:
|
|
unop
|
|
ble $18, $egress
|
|
ldbu $1, 0($5)
|
|
|
|
ldi $18, -1($18)
|
|
|
|
ldi $5, 1($5)
|
|
stb $1, 0($4)
|
|
ldi $4, 1($4)
|
|
|
|
and $4, 7, $6
|
|
bne $6, $aligned_byte_loop_head_up
|
|
|
|
$skip_aligned_byte_loop_head_up:
|
|
ldi $18, -8($18)
|
|
blt $18, $skip_aligned_word_loop_up
|
|
|
|
$aligned_word_loop_up:
|
|
ldl $1, 0($5)
|
|
ldi $5, 8($5)
|
|
ldi $18, -8($18)
|
|
|
|
stl $1, 0($4)
|
|
ldi $4, 8($4)
|
|
bge $18, $aligned_word_loop_up
|
|
|
|
$skip_aligned_word_loop_up:
|
|
ldi $18, 8($18)
|
|
bgt $18, $byte_loop_tail_up
|
|
unop
|
|
ret $31, ($26), 1
|
|
|
|
.align 4
|
|
$misaligned_up:
|
|
fnop
|
|
unop
|
|
beq $18, $egress
|
|
|
|
$byte_loop_tail_up:
|
|
ldbu $1, 0($5)
|
|
ldi $18, -1($18)
|
|
|
|
stb $1, 0($4)
|
|
|
|
ldi $5, 1($5)
|
|
ldi $4, 1($4)
|
|
bgt $18, $byte_loop_tail_up
|
|
|
|
$egress:
|
|
ret $31, ($26), 1
|
|
|
|
.end memmove
|
|
EXPORT_SYMBOL(memmove)
|