Skip to content

Commit

Permalink
ARCv2: lib: memcpy: use local symbols
Browse files Browse the repository at this point in the history
Otherwise perf profiles don't charge tme to memcpy

Signed-off-by: Vineet Gupta <[email protected]>
  • Loading branch information
vineetgarc committed Nov 3, 2015
1 parent 5a364c2 commit ac506b7
Showing 1 changed file with 26 additions and 26 deletions.
52 changes: 26 additions & 26 deletions arch/arc/lib/memcpy-archs.S
Original file line number Diff line number Diff line change
Expand Up @@ -50,26 +50,26 @@ ENTRY(memcpy)

;;; if size <= 8
cmp r2, 8
bls.d @smallchunk
bls.d @.Lsmallchunk
mov.f lp_count, r2

and.f r4, r0, 0x03
rsub lp_count, r4, 4
lpnz @aligndestination
lpnz @.Laligndestination
;; LOOP BEGIN
ldb.ab r5, [r1,1]
sub r2, r2, 1
stb.ab r5, [r3,1]
aligndestination:
.Laligndestination:

;;; Check the alignment of the source
and.f r4, r1, 0x03
bnz.d @sourceunaligned
bnz.d @.Lsourceunaligned

;;; CASE 0: Both source and destination are 32bit aligned
;;; Convert len to Dwords, unfold x4
lsr.f lp_count, r2, ZOLSHFT
lpnz @copy32_64bytes
lpnz @.Lcopy32_64bytes
;; LOOP START
LOADX (r6, r1)
PREFETCH_READ (r1)
Expand All @@ -81,25 +81,25 @@ aligndestination:
STOREX (r8, r3)
STOREX (r10, r3)
STOREX (r4, r3)
copy32_64bytes:
.Lcopy32_64bytes:

and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
smallchunk:
lpnz @copyremainingbytes
.Lsmallchunk:
lpnz @.Lcopyremainingbytes
;; LOOP START
ldb.ab r5, [r1,1]
stb.ab r5, [r3,1]
copyremainingbytes:
.Lcopyremainingbytes:

j [blink]
;;; END CASE 0

sourceunaligned:
.Lsourceunaligned:
cmp r4, 2
beq.d @unalignedOffby2
beq.d @.LunalignedOffby2
sub r2, r2, 1

bhi.d @unalignedOffby3
bhi.d @.LunalignedOffby3
ldb.ab r5, [r1, 1]

;;; CASE 1: The source is unaligned, off by 1
Expand All @@ -114,7 +114,7 @@ sourceunaligned:
or r5, r5, r6

;; Both src and dst are aligned
lpnz @copy8bytes_1
lpnz @.Lcopy8bytes_1
;; LOOP START
ld.ab r6, [r1, 4]
prefetch [r1, 28] ;Prefetch the next read location
Expand All @@ -131,7 +131,7 @@ sourceunaligned:

st.ab r7, [r3, 4]
st.ab r9, [r3, 4]
copy8bytes_1:
.Lcopy8bytes_1:

;; Write back the remaining 16bits
EXTRACT_1 (r6, r5, 16)
Expand All @@ -141,14 +141,14 @@ copy8bytes_1:
stb.ab r5, [r3, 1]

and.f lp_count, r2, 0x07 ;Last 8bytes
lpnz @copybytewise_1
lpnz @.Lcopybytewise_1
;; LOOP START
ldb.ab r6, [r1,1]
stb.ab r6, [r3,1]
copybytewise_1:
.Lcopybytewise_1:
j [blink]

unalignedOffby2:
.LunalignedOffby2:
;;; CASE 2: The source is unaligned, off by 2
ldh.ab r5, [r1, 2]
sub r2, r2, 1
Expand All @@ -159,7 +159,7 @@ unalignedOffby2:
#ifdef __BIG_ENDIAN__
asl.nz r5, r5, 16
#endif
lpnz @copy8bytes_2
lpnz @.Lcopy8bytes_2
;; LOOP START
ld.ab r6, [r1, 4]
prefetch [r1, 28] ;Prefetch the next read location
Expand All @@ -176,22 +176,22 @@ unalignedOffby2:

st.ab r7, [r3, 4]
st.ab r9, [r3, 4]
copy8bytes_2:
.Lcopy8bytes_2:

#ifdef __BIG_ENDIAN__
lsr.nz r5, r5, 16
#endif
sth.ab r5, [r3, 2]

and.f lp_count, r2, 0x07 ;Last 8bytes
lpnz @copybytewise_2
lpnz @.Lcopybytewise_2
;; LOOP START
ldb.ab r6, [r1,1]
stb.ab r6, [r3,1]
copybytewise_2:
.Lcopybytewise_2:
j [blink]

unalignedOffby3:
.LunalignedOffby3:
;;; CASE 3: The source is unaligned, off by 3
;;; Hence, I need to read 1byte for achieve the 32bit alignment

Expand All @@ -201,7 +201,7 @@ unalignedOffby3:
#ifdef __BIG_ENDIAN__
asl.ne r5, r5, 24
#endif
lpnz @copy8bytes_3
lpnz @.Lcopy8bytes_3
;; LOOP START
ld.ab r6, [r1, 4]
prefetch [r1, 28] ;Prefetch the next read location
Expand All @@ -218,19 +218,19 @@ unalignedOffby3:

st.ab r7, [r3, 4]
st.ab r9, [r3, 4]
copy8bytes_3:
.Lcopy8bytes_3:

#ifdef __BIG_ENDIAN__
lsr.nz r5, r5, 24
#endif
stb.ab r5, [r3, 1]

and.f lp_count, r2, 0x07 ;Last 8bytes
lpnz @copybytewise_3
lpnz @.Lcopybytewise_3
;; LOOP START
ldb.ab r6, [r1,1]
stb.ab r6, [r3,1]
copybytewise_3:
.Lcopybytewise_3:
j [blink]

END(memcpy)

0 comments on commit ac506b7

Please sign in to comment.