/* SPDX-License-Identifier: GPL-2.0 */ /* * Optimized strcpy() for SW64 * Copyright (C) Mao Minkai * Author: Mao Minkai * * Copy a null-terminated string from SRC to DST. * * Input: * $16: DST, clobbered * $17: SRC, clobbered * * Output: * $0: DST * * Temporaries: * $1: unaligned parts of addr (0 means aligned addr) * $4: current data to copy (could have 1 byte or 8 bytes) * $5: parts of current data, compare result * $6: number of bytes left to copy * * Tag naming: * co: SRC and DST are co-aligned * mis: SRC and DST are not co-aligned * a: SRC or DST has aligned address * una: SRC or DST has unaligned address * */ #include .text .align 4 .globl strcpy .ent strcpy strcpy: .frame $30, 0, $26 .prologue 0 bis $31, $16, $0 # set return value xor $16, $17, $1 and $1, 7, $1 bne $1, $mis_aligned /* src and dst are co-aligned */ and $16, 7, $1 bne $1, $co_una_head /* do the copy in loop, for (co)-aligned src and dst with (a)ligned addr */ $co_a_loop: ldl $4, 0($17) cmpgeb $31, $4, $5 bne $5, $tail_loop # we find null stl $4, 0($16) addl $17, 8, $17 addl $16, 8, $16 br $31, $co_a_loop /* src and dst are co-aligned but have unaligned address */ $co_una_head: ldl_u $4, 0($17) extll $4, $16, $4 cmpgeb $31, $4, $5 bne $5, $tail_loop # we find null ldi $6, 8($31) subl $6, $1, $6 addl $17, $6, $17 # prepare addr of middle part /* copy the unaligned part in loop */ $co_una_head_loop: stb $4, 0($16) addl $16, 1, $16 subl $6, 1, $6 beq $6, $co_a_loop addl $4, 1, $4 br $31, $co_una_head_loop /* src and dst are not co-aligned */ $mis_aligned: and $16, 7, $1 beq $1, $mis_a_dst ldi $6, 8($31) subl $6, $1, $6 /* copy the first few bytes to make dst aligned */ $mis_una_head_loop: bis $31, $31, $6 ldbu $4, 0($17) stb $4, 0($16) beq $4, $out # we have reached null, return addl $17, 1, $17 addl $16, 1, $16 subl $6, 1, $6 beq $6, $mis_a_dst br $31, $mis_una_head_loop /* dst has aligned addr */ $mis_a_dst: and $17, 7, $1 $mis_a_dst_loop: ldl_u $4, 0($17) ldl_u $5, 7($17) extll $4, $1, $4 exthl $5, $1, $5 bis $4, $5, $4 cmpgeb $31, $4, $5 bne $5, $tail_loop # we find null stl $4, 0($16) addl $17, 8, $17 addl $16, 8, $16 br $31, $mis_a_dst_loop /* we have find null in the last few bytes, copy one byte each time */ $tail_loop: ldbu $4, 0($17) stb $4, 0($16) beq $4, $out # we have reached null, return addl $17, 1, $17 addl $16, 1, $16 br $31, $tail_loop /* copy is done, return */ $out: ret .end strcpy EXPORT_SYMBOL(strcpy)