GCC with patches for OS216
Revision | 770ebe99fe48aca10f3553c4195deba1757d328a (tree) |
---|---|
Time | 2018-03-21 02:13:16 |
Author | Kyrylo Tkachov <ktkachov@gcc....> |
Commiter | Kyrylo Tkachov |
This PR shows that we get the load/store_lanes logic wrong for arm big-endian.
It is tricky to get right. Aarch64 does it by adding the appropriate lane-swapping
operations during expansion.
I'd like to do the same on arm eventually, but we'd need to port and validate the VTBL-generating
code and add it to all the right places and I'm not comfortable enough doing it for GCC 8, but I am keen
in getting the wrong-code fixed.
As I say in the PR, vectorisation on armeb is already severely restricted (we disable many patterns on BYTES_BIG_ENDIAN)
and the load/store_lanes patterns really were not working properly at all, so disabling them is not
a radical approach.
The way to do that is to return false in ARRAY_MODE_SUPPORTED_P for BYTES_BIG_ENDIAN.
Bootstrapped and tested on arm-none-linux-gnueabihf.
Also tested on armeb-none-eabi.
From-SVN: r258687
@@ -27162,7 +27162,10 @@ static bool | ||
27162 | 27162 | arm_array_mode_supported_p (machine_mode mode, |
27163 | 27163 | unsigned HOST_WIDE_INT nelems) |
27164 | 27164 | { |
27165 | - if (TARGET_NEON | |
27165 | + /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN | |
27166 | + for now, as the lane-swapping logic needs to be extended in the expanders. | |
27167 | + See PR target/82518. */ | |
27168 | + if (TARGET_NEON && !BYTES_BIG_ENDIAN | |
27166 | 27169 | && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) |
27167 | 27170 | && (nelems >= 2 && nelems <= 4)) |
27168 | 27171 | return true; |
@@ -0,0 +1,29 @@ | ||
1 | +/* { dg-do run } */ | |
2 | +/* { dg-require-effective-target arm_neon_ok } */ | |
3 | +/* { dg-additional-options "-O3 -fno-inline -std=gnu99" } */ | |
4 | +/* { dg-add-options arm_neon } */ | |
5 | + | |
6 | +typedef struct { int x, y; } X; | |
7 | + | |
8 | +void f4(X *p, int n) | |
9 | +{ | |
10 | + for (int i = 0; i < n; i++) | |
11 | + { p[i].x = i; | |
12 | + p[i].y = i + 1; | |
13 | + } | |
14 | +} | |
15 | + | |
16 | +__attribute ((aligned (16))) X arr[100]; | |
17 | + | |
18 | +int main(void) | |
19 | +{ | |
20 | + volatile int fail = 0; | |
21 | + f4 (arr, 100); | |
22 | + for (int i = 0; i < 100; i++) | |
23 | + if (arr[i].y != i+1 || arr[i].x != i) | |
24 | + fail = 1; | |
25 | + if (fail) | |
26 | + __builtin_abort (); | |
27 | + | |
28 | + return 0; | |
29 | +} |
@@ -6611,7 +6611,8 @@ proc check_effective_target_vect_load_lanes { } { | ||
6611 | 6611 | verbose "check_effective_target_vect_load_lanes: using cached result" 2 |
6612 | 6612 | } else { |
6613 | 6613 | set et_vect_load_lanes 0 |
6614 | - if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) | |
6614 | + # We don't support load_lanes correctly on big-endian arm. | |
6615 | + if { ([istarget arm-*-*] && [check_effective_target_arm_neon_ok]) | |
6615 | 6616 | || [istarget aarch64*-*-*] } { |
6616 | 6617 | set et_vect_load_lanes 1 |
6617 | 6618 | } |