Skip to content

Commit

Permalink
arm64 kernels: add accelerated crc32 routines
Browse files Browse the repository at this point in the history
Incorporate changes from Linux 4.20 to accelerate the kernel's
crc32_le and __crc32c_le helpers.

Incorporates:

9784d82db ("make core crc32() routines weak so they can be
overridden")
7481cddf2 ("arm64/lib: add accelerated crc32 routines")
ff98e20ef ("lib/crc32.c: mark c4c32_le_base/__crc32_le_base alias as
__pure")

But omits the runtime selection which uses machinery that differs
significantly in Linux 4.1. We assume CRC is always available.
  • Loading branch information
kjbracey2 committed Jan 12, 2022
1 parent 74d4ea0 commit 4fbd5f3
Show file tree
Hide file tree
Showing 12 changed files with 210 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ config ARM64
select CLONE_BACKWARDS
select COMMON_CLK
select CPU_PM if (SUSPEND || CPU_IDLE)
select CRC32
select DCACHE_WORD_ACCESS
select GENERIC_ALLOCATOR
select GENERIC_CLOCKEVENTS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \
clear_page.o memchr.o memcpy.o memmove.o memset.o \
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
strchr.o strrchr.o

obj-$(CONFIG_CRC32) += crc32.o
60 changes: 60 additions & 0 deletions release/src-rt-5.02axhnd/kernel/linux-4.1/arch/arm64/lib/crc32.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Accelerated CRC32(C) using AArch64 CRC instructions
*
* Copyright (C) 2016 - 2018 Linaro Ltd <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/

#include <linux/linkage.h>
//#include <asm/alternative.h>
#include <asm/assembler.h>

.arch armv8-a+crc

.macro __crc32, c
0: subs x2, x2, #16
b.mi 8f
ldp x3, x4, [x1], #16
CPU_BE( rev x3, x3 )
CPU_BE( rev x4, x4 )
crc32\c\()x w0, w0, x3
crc32\c\()x w0, w0, x4
b.ne 0b
ret

8: tbz x2, #3, 4f
ldr x3, [x1], #8
CPU_BE( rev x3, x3 )
crc32\c\()x w0, w0, x3
4: tbz x2, #2, 2f
ldr w3, [x1], #4
CPU_BE( rev w3, w3 )
crc32\c\()w w0, w0, w3
2: tbz x2, #1, 1f
ldrh w3, [x1], #2
CPU_BE( rev16 w3, w3 )
crc32\c\()h w0, w0, w3
1: tbz x2, #0, 0f
ldrb w3, [x1]
crc32\c\()b w0, w0, w3
0: ret
.endm

.align 5
ENTRY(crc32_le)
//alternative_if_not ARM64_HAS_CRC32
// b crc32_le_base
//alternative_else_nop_endif
__crc32
ENDPROC(crc32_le)

.align 5
ENTRY(__crc32c_le)
//alternative_if_not ARM64_HAS_CRC32
// b __crc32c_le_base
//alternative_else_nop_endif
__crc32 c
ENDPROC(__crc32c_le)
11 changes: 7 additions & 4 deletions release/src-rt-5.02axhnd/kernel/linux-4.1/lib/crc32.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,21 +182,21 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
}

#if CRC_LE_BITS == 1
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRCPOLY_LE);
}
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
}
#else
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len,
(const u32 (*)[256])crc32table_le, CRCPOLY_LE);
}
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len,
(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
Expand All @@ -205,6 +205,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
EXPORT_SYMBOL(crc32_le);
EXPORT_SYMBOL(__crc32c_le);

u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);

/*
* This multiplies the polynomials x and y modulo the given modulus.
* This follows the "little-endian" CRC convention that the lsbit
Expand Down
1 change: 1 addition & 0 deletions release/src-rt-5.02hnd/kernel/linux-4.1/arch/arm64/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ config ARM64
select CLONE_BACKWARDS
select COMMON_CLK
select CPU_PM if (SUSPEND || CPU_IDLE)
select CRC32
select DCACHE_WORD_ACCESS
select GENERIC_ALLOCATOR
select GENERIC_CLOCKEVENTS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \
clear_page.o memchr.o memcpy.o memmove.o memset.o \
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
strchr.o strrchr.o

obj-$(CONFIG_CRC32) += crc32.o
60 changes: 60 additions & 0 deletions release/src-rt-5.02hnd/kernel/linux-4.1/arch/arm64/lib/crc32.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Accelerated CRC32(C) using AArch64 CRC instructions
*
* Copyright (C) 2016 - 2018 Linaro Ltd <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/

#include <linux/linkage.h>
//#include <asm/alternative.h>
#include <asm/assembler.h>

.arch armv8-a+crc

.macro __crc32, c
0: subs x2, x2, #16
b.mi 8f
ldp x3, x4, [x1], #16
CPU_BE( rev x3, x3 )
CPU_BE( rev x4, x4 )
crc32\c\()x w0, w0, x3
crc32\c\()x w0, w0, x4
b.ne 0b
ret

8: tbz x2, #3, 4f
ldr x3, [x1], #8
CPU_BE( rev x3, x3 )
crc32\c\()x w0, w0, x3
4: tbz x2, #2, 2f
ldr w3, [x1], #4
CPU_BE( rev w3, w3 )
crc32\c\()w w0, w0, w3
2: tbz x2, #1, 1f
ldrh w3, [x1], #2
CPU_BE( rev16 w3, w3 )
crc32\c\()h w0, w0, w3
1: tbz x2, #0, 0f
ldrb w3, [x1]
crc32\c\()b w0, w0, w3
0: ret
.endm

.align 5
ENTRY(crc32_le)
//alternative_if_not ARM64_HAS_CRC32
// b crc32_le_base
//alternative_else_nop_endif
__crc32
ENDPROC(crc32_le)

.align 5
ENTRY(__crc32c_le)
//alternative_if_not ARM64_HAS_CRC32
// b __crc32c_le_base
//alternative_else_nop_endif
__crc32 c
ENDPROC(__crc32c_le)
11 changes: 7 additions & 4 deletions release/src-rt-5.02hnd/kernel/linux-4.1/lib/crc32.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,21 +182,21 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
}

#if CRC_LE_BITS == 1
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRCPOLY_LE);
}
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
}
#else
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len,
(const u32 (*)[256])crc32table_le, CRCPOLY_LE);
}
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len,
(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
Expand All @@ -205,6 +205,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
EXPORT_SYMBOL(crc32_le);
EXPORT_SYMBOL(__crc32c_le);

u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);

/*
* This multiplies the polynomials x and y modulo the given modulus.
* This follows the "little-endian" CRC convention that the lsbit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ config ARM64
select CLONE_BACKWARDS
select COMMON_CLK
select CPU_PM if (SUSPEND || CPU_IDLE)
select CRC32
select DCACHE_WORD_ACCESS
select GENERIC_ALLOCATOR
select GENERIC_CLOCKEVENTS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \
clear_page.o memchr.o memcpy.o memmove.o memset.o \
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
strchr.o strrchr.o

obj-$(CONFIG_CRC32) += crc32.o
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Accelerated CRC32(C) using AArch64 CRC instructions
*
* Copyright (C) 2016 - 2018 Linaro Ltd <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/

#include <linux/linkage.h>
//#include <asm/alternative.h>
#include <asm/assembler.h>

.arch armv8-a+crc

.macro __crc32, c
0: subs x2, x2, #16
b.mi 8f
ldp x3, x4, [x1], #16
CPU_BE( rev x3, x3 )
CPU_BE( rev x4, x4 )
crc32\c\()x w0, w0, x3
crc32\c\()x w0, w0, x4
b.ne 0b
ret

8: tbz x2, #3, 4f
ldr x3, [x1], #8
CPU_BE( rev x3, x3 )
crc32\c\()x w0, w0, x3
4: tbz x2, #2, 2f
ldr w3, [x1], #4
CPU_BE( rev w3, w3 )
crc32\c\()w w0, w0, w3
2: tbz x2, #1, 1f
ldrh w3, [x1], #2
CPU_BE( rev16 w3, w3 )
crc32\c\()h w0, w0, w3
1: tbz x2, #0, 0f
ldrb w3, [x1]
crc32\c\()b w0, w0, w3
0: ret
.endm

.align 5
ENTRY(crc32_le)
//alternative_if_not ARM64_HAS_CRC32
// b crc32_le_base
//alternative_else_nop_endif
__crc32
ENDPROC(crc32_le)

.align 5
ENTRY(__crc32c_le)
//alternative_if_not ARM64_HAS_CRC32
// b __crc32c_le_base
//alternative_else_nop_endif
__crc32 c
ENDPROC(__crc32c_le)
11 changes: 7 additions & 4 deletions release/src-rt-5.02p1axhnd.675x/kernel/linux-4.1/lib/crc32.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,21 +182,21 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
}

#if CRC_LE_BITS == 1
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRCPOLY_LE);
}
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
}
#else
u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len,
(const u32 (*)[256])crc32table_le, CRCPOLY_LE);
}
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len,
(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
Expand All @@ -205,6 +205,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
EXPORT_SYMBOL(crc32_le);
EXPORT_SYMBOL(__crc32c_le);

u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);

/*
* This multiplies the polynomials x and y modulo the given modulus.
* This follows the "little-endian" CRC convention that the lsbit
Expand Down

0 comments on commit 4fbd5f3

Please sign in to comment.