[Libre-soc-isa] [Bug 560] big-endian little-endian SV regfile layout idea
bugzilla-daemon at libre-soc.org
bugzilla-daemon at libre-soc.org
Tue Jan 5 18:30:56 GMT 2021
https://bugs.libre-soc.org/show_bug.cgi?id=560
Jacob Lifshay <programmerjake at gmail.com> changed:
What |Removed |Added
----------------------------------------------------------------------------
Resolution|INVALID |---
Status|RESOLVED |IN_PROGRESS
--- Comment #87 from Jacob Lifshay <programmerjake at gmail.com> ---
Reopening since this needs more evaluation:
If we copy what VSX does, then we'll have to implement the byteswapping in the
ALUs, since that's what VSX does:
Notice how the big-endian and little-endian Power code is identical -- implying
that the registers switch between big-endian and little-endian, however it
changes on AArch64 (64-bit Arm) since they define the registers to be little
endian only and need to insert explicit byteswapping instructions.
https://godbolt.org/z/nbM5qq
C Source:
#include <stdint.h>
typedef uint8_t u8x16 __attribute__((vector_size(16)));
typedef uint16_t u16x8 __attribute__((vector_size(16)));
typedef uint32_t u32x4 __attribute__((vector_size(16)));
typedef uint64_t u64x2 __attribute__((vector_size(16)));
void ld_st(u8x16 *a, u8x16 *b, u16x8 *c, u16x8 *r) {
u16x8 temp = (u16x8)(*a + *b);
*r = temp + *c;
}
u16x8 by_value(u8x16 a, u8x16 b, u16x8 c) {
u16x8 temp = (u16x8)(a + b);
return temp + c;
}
u16x8 load_array(uint16_t *a) {
a = (uint16_t *)__builtin_assume_aligned(a, 16);
u16x8 retval;
for(int i = 0; i < 8; i++)
retval[i] = a[i];
return retval;
}
Generated big-endian powerpc64:
ld_st: # @ld_st
.quad .Lfunc_begin0
.quad .TOC. at tocbase
.quad 0
.Lfunc_begin0:
lxv 34, 0(3)
lxv 35, 0(4)
vaddubm 2, 3, 2
lxv 35, 0(5)
vadduhm 2, 3, 2
stxv 34, 0(6)
blr
.long 0
.quad 0
by_value: # @by_value
.quad .Lfunc_begin1
.quad .TOC. at tocbase
.quad 0
.Lfunc_begin1:
vaddubm 2, 3, 2
vadduhm 2, 2, 4
blr
.long 0
.quad 0
load_array: # @load_array
.quad .Lfunc_begin2
.quad .TOC. at tocbase
.quad 0
.Lfunc_begin2:
lxv 34, 0(3)
blr
.long 0
.quad 0
Generated little-endian powerpc64le:
ld_st: # @ld_st
lxv 34, 0(3)
lxv 35, 0(4)
vaddubm 2, 3, 2
lxv 35, 0(5)
vadduhm 2, 3, 2
stxv 34, 0(6)
blr
.long 0
.quad 0
by_value: # @by_value
vaddubm 2, 3, 2
vadduhm 2, 2, 4
blr
.long 0
.quad 0
load_array: # @load_array
lxv 34, 0(3)
blr
.long 0
.quad 0
Generated big-endian AArch64:
ld_st: // @ld_st
ld1 { v0.16b }, [x0]
ld1 { v1.16b }, [x1]
ld1 { v2.8h }, [x2]
add v0.16b, v1.16b, v0.16b
rev16 v0.16b, v0.16b
add v0.8h, v2.8h, v0.8h
st1 { v0.8h }, [x3]
ret
by_value: // @by_value
rev64 v0.16b, v0.16b
rev64 v1.16b, v1.16b
ext v0.16b, v0.16b, v0.16b, #8
ext v1.16b, v1.16b, v1.16b, #8
rev64 v2.8h, v2.8h
add v0.16b, v1.16b, v0.16b
ext v2.16b, v2.16b, v2.16b, #8
rev16 v0.16b, v0.16b
add v0.8h, v0.8h, v2.8h
rev64 v0.8h, v0.8h
ext v0.16b, v0.16b, v0.16b, #8
ret
load_array: // @load_array
ldr q0, [x0]
ret
Generated little-endian AArch64:
ld_st: // @ld_st
ldr q0, [x0]
ldr q1, [x1]
ldr q2, [x2]
add v0.16b, v1.16b, v0.16b
add v0.8h, v2.8h, v0.8h
str q0, [x3]
ret
by_value: // @by_value
add v0.16b, v1.16b, v0.16b
add v0.8h, v0.8h, v2.8h
ret
load_array: // @load_array
ldr q0, [x0]
ret
Generated x86_64:
ld_st:
movdqa xmm0, XMMWORD PTR [rdi]
paddb xmm0, XMMWORD PTR [rsi]
paddw xmm0, XMMWORD PTR [rdx]
movaps XMMWORD PTR [rcx], xmm0
ret
by_value:
paddb xmm0, xmm1
paddw xmm0, xmm2
ret
load_array:
movdqa xmm0, XMMWORD PTR [rdi]
ret
--
You are receiving this mail because:
You are on the CC list for the bug.
More information about the Libre-SOC-ISA
mailing list