Revision | adb196cbd5cff26547bc32a208074f03f4c4a627 (tree) |
---|---|
Time | 2018-03-16 01:55:04 |
Author | Richard Henderson <richard.henderson@lina...> |
Commiter | Richard Henderson |
tcg: Add choose_vector_size
This unifies 5 copies of checks for supported vector size,
and in the process fixes a missing check in tcg_gen_gvec_2s.
This lead to an assertion failure for 64-bit vector multiply,
which is not available in the AVX instruction set.
Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
@@ -351,6 +351,42 @@ static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in) | ||
351 | 351 | } |
352 | 352 | } |
353 | 353 | |
354 | +/* Select a supported vector type for implementing an operation on SIZE | |
355 | + * bytes. If OP is 0, assume that the real operation to be performed is | |
356 | + * required by all backends. Otherwise, make sure than OP can be performed | |
357 | + * on elements of size VECE in the selected type. Do not select V64 if | |
358 | + * PREFER_I64 is true. Return 0 if no vector type is selected. | |
359 | + */ | |
360 | +static TCGType choose_vector_type(TCGOpcode op, unsigned vece, uint32_t size, | |
361 | + bool prefer_i64) | |
362 | +{ | |
363 | + if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) { | |
364 | + if (op == 0) { | |
365 | + return TCG_TYPE_V256; | |
366 | + } | |
367 | + /* Recall that ARM SVE allows vector sizes that are not a | |
368 | + * power of 2, but always a multiple of 16. The intent is | |
369 | + * that e.g. size == 80 would be expanded with 2x32 + 1x16. | |
370 | + * It is hard to imagine a case in which v256 is supported | |
371 | + * but v128 is not, but check anyway. | |
372 | + */ | |
373 | + if (tcg_can_emit_vec_op(op, TCG_TYPE_V256, vece) | |
374 | + && (size % 32 == 0 | |
375 | + || tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) { | |
376 | + return TCG_TYPE_V256; | |
377 | + } | |
378 | + } | |
379 | + if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16) | |
380 | + && (op == 0 || tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) { | |
381 | + return TCG_TYPE_V128; | |
382 | + } | |
383 | + if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8) | |
384 | + && (op == 0 || tcg_can_emit_vec_op(op, TCG_TYPE_V64, vece))) { | |
385 | + return TCG_TYPE_V64; | |
386 | + } | |
387 | + return 0; | |
388 | +} | |
389 | + | |
354 | 390 | /* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C. |
355 | 391 | * Only one of IN_32 or IN_64 may be set; |
356 | 392 | * IN_C is used if IN_32 and IN_64 are unset. |
@@ -376,19 +412,12 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, | ||
376 | 412 | } |
377 | 413 | } |
378 | 414 | |
379 | - type = 0; | |
380 | - if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) { | |
381 | - type = TCG_TYPE_V256; | |
382 | - } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) { | |
383 | - type = TCG_TYPE_V128; | |
384 | - } else if (TCG_TARGET_HAS_v64 && check_size_impl(oprsz, 8) | |
385 | - /* Prefer integer when 64-bit host and no variable dup. */ | |
386 | - && !(TCG_TARGET_REG_BITS == 64 && in_32 == NULL | |
387 | - && (in_64 == NULL || vece == MO_64))) { | |
388 | - type = TCG_TYPE_V64; | |
389 | - } | |
390 | - | |
391 | - /* Implement inline with a vector type, if possible. */ | |
415 | + /* Implement inline with a vector type, if possible. | |
416 | + * Prefer integer when 64-bit host and no variable dup. | |
417 | + */ | |
418 | + type = choose_vector_type(0, vece, oprsz, | |
419 | + (TCG_TARGET_REG_BITS == 64 && in_32 == NULL | |
420 | + && (in_64 == NULL || vece == MO_64))); | |
392 | 421 | if (type != 0) { |
393 | 422 | TCGv_vec t_vec = tcg_temp_new_vec(type); |
394 | 423 |
@@ -414,21 +443,30 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, | ||
414 | 443 | } |
415 | 444 | |
416 | 445 | i = 0; |
417 | - if (TCG_TARGET_HAS_v256) { | |
446 | + switch (type) { | |
447 | + case TCG_TYPE_V256: | |
448 | + /* Recall that ARM SVE allows vector sizes that are not a | |
449 | + * power of 2, but always a multiple of 16. The intent is | |
450 | + * that e.g. size == 80 would be expanded with 2x32 + 1x16. | |
451 | + */ | |
418 | 452 | for (; i + 32 <= oprsz; i += 32) { |
419 | 453 | tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256); |
420 | 454 | } |
421 | - } | |
422 | - if (TCG_TARGET_HAS_v128) { | |
455 | + /* fallthru */ | |
456 | + case TCG_TYPE_V128: | |
423 | 457 | for (; i + 16 <= oprsz; i += 16) { |
424 | 458 | tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128); |
425 | 459 | } |
426 | - } | |
427 | - if (TCG_TARGET_HAS_v64) { | |
460 | + break; | |
461 | + case TCG_TYPE_V64: | |
428 | 462 | for (; i < oprsz; i += 8) { |
429 | 463 | tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64); |
430 | 464 | } |
465 | + break; | |
466 | + default: | |
467 | + g_assert_not_reached(); | |
431 | 468 | } |
469 | + | |
432 | 470 | tcg_temp_free_vec(t_vec); |
433 | 471 | goto done; |
434 | 472 | } |
@@ -484,7 +522,7 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz, | ||
484 | 522 | } |
485 | 523 | tcg_temp_free_i64(t_64); |
486 | 524 | goto done; |
487 | - } | |
525 | + } | |
488 | 526 | } |
489 | 527 | |
490 | 528 | /* Otherwise implement out of line. */ |
@@ -866,49 +904,55 @@ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs, | ||
866 | 904 | void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, |
867 | 905 | uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g) |
868 | 906 | { |
907 | + TCGType type; | |
908 | + uint32_t some; | |
909 | + | |
869 | 910 | check_size_align(oprsz, maxsz, dofs | aofs); |
870 | 911 | check_overlap_2(dofs, aofs, maxsz); |
871 | 912 | |
872 | - /* Recall that ARM SVE allows vector sizes that are not a power of 2. | |
873 | - Expand with successively smaller host vector sizes. The intent is | |
874 | - that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */ | |
875 | - /* ??? For maxsz > oprsz, the host may be able to use an opr-sized | |
876 | - operation, zeroing the balance of the register. We can then | |
877 | - use a max-sized store to implement the clearing without an extra | |
878 | - store operation. This is true for aarch64 and x86_64 hosts. */ | |
879 | - | |
880 | - if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32) | |
881 | - && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) { | |
882 | - uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32); | |
913 | + type = 0; | |
914 | + if (g->fniv) { | |
915 | + type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); | |
916 | + } | |
917 | + switch (type) { | |
918 | + case TCG_TYPE_V256: | |
919 | + /* Recall that ARM SVE allows vector sizes that are not a | |
920 | + * power of 2, but always a multiple of 16. The intent is | |
921 | + * that e.g. size == 80 would be expanded with 2x32 + 1x16. | |
922 | + */ | |
923 | + some = QEMU_ALIGN_DOWN(oprsz, 32); | |
883 | 924 | expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv); |
884 | 925 | if (some == oprsz) { |
885 | - goto done; | |
926 | + break; | |
886 | 927 | } |
887 | 928 | dofs += some; |
888 | 929 | aofs += some; |
889 | 930 | oprsz -= some; |
890 | 931 | maxsz -= some; |
891 | - } | |
892 | - | |
893 | - if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16) | |
894 | - && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) { | |
932 | + /* fallthru */ | |
933 | + case TCG_TYPE_V128: | |
895 | 934 | expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv); |
896 | - } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64 | |
897 | - && g->fniv && check_size_impl(oprsz, 8) | |
898 | - && (!g->opc | |
899 | - || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) { | |
935 | + break; | |
936 | + case TCG_TYPE_V64: | |
900 | 937 | expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv); |
901 | - } else if (g->fni8 && check_size_impl(oprsz, 8)) { | |
902 | - expand_2_i64(dofs, aofs, oprsz, g->fni8); | |
903 | - } else if (g->fni4 && check_size_impl(oprsz, 4)) { | |
904 | - expand_2_i32(dofs, aofs, oprsz, g->fni4); | |
905 | - } else { | |
906 | - assert(g->fno != NULL); | |
907 | - tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno); | |
908 | - return; | |
938 | + break; | |
939 | + | |
940 | + case 0: | |
941 | + if (g->fni8 && check_size_impl(oprsz, 8)) { | |
942 | + expand_2_i64(dofs, aofs, oprsz, g->fni8); | |
943 | + } else if (g->fni4 && check_size_impl(oprsz, 4)) { | |
944 | + expand_2_i32(dofs, aofs, oprsz, g->fni4); | |
945 | + } else { | |
946 | + assert(g->fno != NULL); | |
947 | + tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno); | |
948 | + return; | |
949 | + } | |
950 | + break; | |
951 | + | |
952 | + default: | |
953 | + g_assert_not_reached(); | |
909 | 954 | } |
910 | 955 | |
911 | - done: | |
912 | 956 | if (oprsz < maxsz) { |
913 | 957 | expand_clr(dofs + oprsz, maxsz - oprsz); |
914 | 958 | } |
@@ -918,53 +962,64 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs, | ||
918 | 962 | void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz, |
919 | 963 | uint32_t maxsz, int64_t c, const GVecGen2i *g) |
920 | 964 | { |
965 | + TCGType type; | |
966 | + uint32_t some; | |
967 | + | |
921 | 968 | check_size_align(oprsz, maxsz, dofs | aofs); |
922 | 969 | check_overlap_2(dofs, aofs, maxsz); |
923 | 970 | |
924 | - /* Recall that ARM SVE allows vector sizes that are not a power of 2. | |
925 | - Expand with successively smaller host vector sizes. The intent is | |
926 | - that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */ | |
927 | - | |
928 | - if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32) | |
929 | - && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) { | |
930 | - uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32); | |
971 | + type = 0; | |
972 | + if (g->fniv) { | |
973 | + type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); | |
974 | + } | |
975 | + switch (type) { | |
976 | + case TCG_TYPE_V256: | |
977 | + /* Recall that ARM SVE allows vector sizes that are not a | |
978 | + * power of 2, but always a multiple of 16. The intent is | |
979 | + * that e.g. size == 80 would be expanded with 2x32 + 1x16. | |
980 | + */ | |
981 | + some = QEMU_ALIGN_DOWN(oprsz, 32); | |
931 | 982 | expand_2i_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, |
932 | 983 | c, g->load_dest, g->fniv); |
933 | 984 | if (some == oprsz) { |
934 | - goto done; | |
985 | + break; | |
935 | 986 | } |
936 | 987 | dofs += some; |
937 | 988 | aofs += some; |
938 | 989 | oprsz -= some; |
939 | 990 | maxsz -= some; |
940 | - } | |
941 | - | |
942 | - if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16) | |
943 | - && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) { | |
991 | + /* fallthru */ | |
992 | + case TCG_TYPE_V128: | |
944 | 993 | expand_2i_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, |
945 | 994 | c, g->load_dest, g->fniv); |
946 | - } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64 | |
947 | - && g->fniv && check_size_impl(oprsz, 8) | |
948 | - && (!g->opc | |
949 | - || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) { | |
995 | + break; | |
996 | + case TCG_TYPE_V64: | |
950 | 997 | expand_2i_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, |
951 | 998 | c, g->load_dest, g->fniv); |
952 | - } else if (g->fni8 && check_size_impl(oprsz, 8)) { | |
953 | - expand_2i_i64(dofs, aofs, oprsz, c, g->load_dest, g->fni8); | |
954 | - } else if (g->fni4 && check_size_impl(oprsz, 4)) { | |
955 | - expand_2i_i32(dofs, aofs, oprsz, c, g->load_dest, g->fni4); | |
956 | - } else { | |
957 | - if (g->fno) { | |
958 | - tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno); | |
999 | + break; | |
1000 | + | |
1001 | + case 0: | |
1002 | + if (g->fni8 && check_size_impl(oprsz, 8)) { | |
1003 | + expand_2i_i64(dofs, aofs, oprsz, c, g->load_dest, g->fni8); | |
1004 | + } else if (g->fni4 && check_size_impl(oprsz, 4)) { | |
1005 | + expand_2i_i32(dofs, aofs, oprsz, c, g->load_dest, g->fni4); | |
959 | 1006 | } else { |
960 | - TCGv_i64 tcg_c = tcg_const_i64(c); | |
961 | - tcg_gen_gvec_2i_ool(dofs, aofs, tcg_c, oprsz, maxsz, c, g->fnoi); | |
962 | - tcg_temp_free_i64(tcg_c); | |
1007 | + if (g->fno) { | |
1008 | + tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno); | |
1009 | + } else { | |
1010 | + TCGv_i64 tcg_c = tcg_const_i64(c); | |
1011 | + tcg_gen_gvec_2i_ool(dofs, aofs, tcg_c, oprsz, | |
1012 | + maxsz, c, g->fnoi); | |
1013 | + tcg_temp_free_i64(tcg_c); | |
1014 | + } | |
1015 | + return; | |
963 | 1016 | } |
964 | - return; | |
1017 | + break; | |
1018 | + | |
1019 | + default: | |
1020 | + g_assert_not_reached(); | |
965 | 1021 | } |
966 | 1022 | |
967 | - done: | |
968 | 1023 | if (oprsz < maxsz) { |
969 | 1024 | expand_clr(dofs + oprsz, maxsz - oprsz); |
970 | 1025 | } |
@@ -981,37 +1036,30 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, | ||
981 | 1036 | |
982 | 1037 | type = 0; |
983 | 1038 | if (g->fniv) { |
984 | - if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) { | |
985 | - type = TCG_TYPE_V256; | |
986 | - } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) { | |
987 | - type = TCG_TYPE_V128; | |
988 | - } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64 | |
989 | - && check_size_impl(oprsz, 8)) { | |
990 | - type = TCG_TYPE_V64; | |
991 | - } | |
1039 | + type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); | |
992 | 1040 | } |
993 | 1041 | if (type != 0) { |
994 | 1042 | TCGv_vec t_vec = tcg_temp_new_vec(type); |
1043 | + uint32_t some; | |
995 | 1044 | |
996 | 1045 | tcg_gen_dup_i64_vec(g->vece, t_vec, c); |
997 | 1046 | |
998 | - /* Recall that ARM SVE allows vector sizes that are not a power of 2. | |
999 | - Expand with successively smaller host vector sizes. The intent is | |
1000 | - that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */ | |
1001 | 1047 | switch (type) { |
1002 | 1048 | case TCG_TYPE_V256: |
1003 | - { | |
1004 | - uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32); | |
1005 | - expand_2s_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, | |
1006 | - t_vec, g->scalar_first, g->fniv); | |
1007 | - if (some == oprsz) { | |
1008 | - break; | |
1009 | - } | |
1010 | - dofs += some; | |
1011 | - aofs += some; | |
1012 | - oprsz -= some; | |
1013 | - maxsz -= some; | |
1049 | + /* Recall that ARM SVE allows vector sizes that are not a | |
1050 | + * power of 2, but always a multiple of 16. The intent is | |
1051 | + * that e.g. size == 80 would be expanded with 2x32 + 1x16. | |
1052 | + */ | |
1053 | + some = QEMU_ALIGN_DOWN(oprsz, 32); | |
1054 | + expand_2s_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, | |
1055 | + t_vec, g->scalar_first, g->fniv); | |
1056 | + if (some == oprsz) { | |
1057 | + break; | |
1014 | 1058 | } |
1059 | + dofs += some; | |
1060 | + aofs += some; | |
1061 | + oprsz -= some; | |
1062 | + maxsz -= some; | |
1015 | 1063 | /* fallthru */ |
1016 | 1064 | |
1017 | 1065 | case TCG_TYPE_V128: |
@@ -1055,48 +1103,60 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz, | ||
1055 | 1103 | void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, |
1056 | 1104 | uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g) |
1057 | 1105 | { |
1106 | + TCGType type; | |
1107 | + uint32_t some; | |
1108 | + | |
1058 | 1109 | check_size_align(oprsz, maxsz, dofs | aofs | bofs); |
1059 | 1110 | check_overlap_3(dofs, aofs, bofs, maxsz); |
1060 | 1111 | |
1061 | - /* Recall that ARM SVE allows vector sizes that are not a power of 2. | |
1062 | - Expand with successively smaller host vector sizes. The intent is | |
1063 | - that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */ | |
1064 | - | |
1065 | - if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32) | |
1066 | - && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) { | |
1067 | - uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32); | |
1112 | + type = 0; | |
1113 | + if (g->fniv) { | |
1114 | + type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); | |
1115 | + } | |
1116 | + switch (type) { | |
1117 | + case TCG_TYPE_V256: | |
1118 | + /* Recall that ARM SVE allows vector sizes that are not a | |
1119 | + * power of 2, but always a multiple of 16. The intent is | |
1120 | + * that e.g. size == 80 would be expanded with 2x32 + 1x16. | |
1121 | + */ | |
1122 | + some = QEMU_ALIGN_DOWN(oprsz, 32); | |
1068 | 1123 | expand_3_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, |
1069 | 1124 | g->load_dest, g->fniv); |
1070 | 1125 | if (some == oprsz) { |
1071 | - goto done; | |
1126 | + break; | |
1072 | 1127 | } |
1073 | 1128 | dofs += some; |
1074 | 1129 | aofs += some; |
1075 | 1130 | bofs += some; |
1076 | 1131 | oprsz -= some; |
1077 | 1132 | maxsz -= some; |
1078 | - } | |
1079 | - | |
1080 | - if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16) | |
1081 | - && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) { | |
1133 | + /* fallthru */ | |
1134 | + case TCG_TYPE_V128: | |
1082 | 1135 | expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, |
1083 | 1136 | g->load_dest, g->fniv); |
1084 | - } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64 | |
1085 | - && g->fniv && check_size_impl(oprsz, 8) | |
1086 | - && (!g->opc | |
1087 | - || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) { | |
1137 | + break; | |
1138 | + case TCG_TYPE_V64: | |
1088 | 1139 | expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, |
1089 | 1140 | g->load_dest, g->fniv); |
1090 | - } else if (g->fni8 && check_size_impl(oprsz, 8)) { | |
1091 | - expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8); | |
1092 | - } else if (g->fni4 && check_size_impl(oprsz, 4)) { | |
1093 | - expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4); | |
1094 | - } else { | |
1095 | - assert(g->fno != NULL); | |
1096 | - tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, g->data, g->fno); | |
1141 | + break; | |
1142 | + | |
1143 | + case 0: | |
1144 | + if (g->fni8 && check_size_impl(oprsz, 8)) { | |
1145 | + expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8); | |
1146 | + } else if (g->fni4 && check_size_impl(oprsz, 4)) { | |
1147 | + expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4); | |
1148 | + } else { | |
1149 | + assert(g->fno != NULL); | |
1150 | + tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, | |
1151 | + maxsz, g->data, g->fno); | |
1152 | + return; | |
1153 | + } | |
1154 | + break; | |
1155 | + | |
1156 | + default: | |
1157 | + g_assert_not_reached(); | |
1097 | 1158 | } |
1098 | 1159 | |
1099 | - done: | |
1100 | 1160 | if (oprsz < maxsz) { |
1101 | 1161 | expand_clr(dofs + oprsz, maxsz - oprsz); |
1102 | 1162 | } |
@@ -1106,20 +1166,27 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs, | ||
1106 | 1166 | void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, |
1107 | 1167 | uint32_t oprsz, uint32_t maxsz, const GVecGen4 *g) |
1108 | 1168 | { |
1169 | + TCGType type; | |
1170 | + uint32_t some; | |
1171 | + | |
1109 | 1172 | check_size_align(oprsz, maxsz, dofs | aofs | bofs | cofs); |
1110 | 1173 | check_overlap_4(dofs, aofs, bofs, cofs, maxsz); |
1111 | 1174 | |
1112 | - /* Recall that ARM SVE allows vector sizes that are not a power of 2. | |
1113 | - Expand with successively smaller host vector sizes. The intent is | |
1114 | - that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */ | |
1115 | - | |
1116 | - if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32) | |
1117 | - && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) { | |
1118 | - uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32); | |
1175 | + type = 0; | |
1176 | + if (g->fniv) { | |
1177 | + type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); | |
1178 | + } | |
1179 | + switch (type) { | |
1180 | + case TCG_TYPE_V256: | |
1181 | + /* Recall that ARM SVE allows vector sizes that are not a | |
1182 | + * power of 2, but always a multiple of 16. The intent is | |
1183 | + * that e.g. size == 80 would be expanded with 2x32 + 1x16. | |
1184 | + */ | |
1185 | + some = QEMU_ALIGN_DOWN(oprsz, 32); | |
1119 | 1186 | expand_4_vec(g->vece, dofs, aofs, bofs, cofs, some, |
1120 | 1187 | 32, TCG_TYPE_V256, g->fniv); |
1121 | 1188 | if (some == oprsz) { |
1122 | - goto done; | |
1189 | + break; | |
1123 | 1190 | } |
1124 | 1191 | dofs += some; |
1125 | 1192 | aofs += some; |
@@ -1127,30 +1194,33 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, | ||
1127 | 1194 | cofs += some; |
1128 | 1195 | oprsz -= some; |
1129 | 1196 | maxsz -= some; |
1130 | - } | |
1131 | - | |
1132 | - if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16) | |
1133 | - && (!g->opc || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) { | |
1197 | + /* fallthru */ | |
1198 | + case TCG_TYPE_V128: | |
1134 | 1199 | expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz, |
1135 | 1200 | 16, TCG_TYPE_V128, g->fniv); |
1136 | - } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64 | |
1137 | - && g->fniv && check_size_impl(oprsz, 8) | |
1138 | - && (!g->opc | |
1139 | - || tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) { | |
1201 | + break; | |
1202 | + case TCG_TYPE_V64: | |
1140 | 1203 | expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz, |
1141 | 1204 | 8, TCG_TYPE_V64, g->fniv); |
1142 | - } else if (g->fni8 && check_size_impl(oprsz, 8)) { | |
1143 | - expand_4_i64(dofs, aofs, bofs, cofs, oprsz, g->fni8); | |
1144 | - } else if (g->fni4 && check_size_impl(oprsz, 4)) { | |
1145 | - expand_4_i32(dofs, aofs, bofs, cofs, oprsz, g->fni4); | |
1146 | - } else { | |
1147 | - assert(g->fno != NULL); | |
1148 | - tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs, | |
1149 | - oprsz, maxsz, g->data, g->fno); | |
1150 | - return; | |
1205 | + break; | |
1206 | + | |
1207 | + case 0: | |
1208 | + if (g->fni8 && check_size_impl(oprsz, 8)) { | |
1209 | + expand_4_i64(dofs, aofs, bofs, cofs, oprsz, g->fni8); | |
1210 | + } else if (g->fni4 && check_size_impl(oprsz, 4)) { | |
1211 | + expand_4_i32(dofs, aofs, bofs, cofs, oprsz, g->fni4); | |
1212 | + } else { | |
1213 | + assert(g->fno != NULL); | |
1214 | + tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs, | |
1215 | + oprsz, maxsz, g->data, g->fno); | |
1216 | + return; | |
1217 | + } | |
1218 | + break; | |
1219 | + | |
1220 | + default: | |
1221 | + g_assert_not_reached(); | |
1151 | 1222 | } |
1152 | 1223 | |
1153 | - done: | |
1154 | 1224 | if (oprsz < maxsz) { |
1155 | 1225 | expand_clr(dofs + oprsz, maxsz - oprsz); |
1156 | 1226 | } |
@@ -2155,6 +2225,8 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, | ||
2155 | 2225 | [TCG_COND_LTU] = ltu_fn, |
2156 | 2226 | [TCG_COND_LEU] = leu_fn, |
2157 | 2227 | }; |
2228 | + TCGType type; | |
2229 | + uint32_t some; | |
2158 | 2230 | |
2159 | 2231 | check_size_align(oprsz, maxsz, dofs | aofs | bofs); |
2160 | 2232 | check_overlap_3(dofs, aofs, bofs, maxsz); |
@@ -2165,51 +2237,59 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs, | ||
2165 | 2237 | return; |
2166 | 2238 | } |
2167 | 2239 | |
2168 | - /* Recall that ARM SVE allows vector sizes that are not a power of 2. | |
2169 | - Expand with successively smaller host vector sizes. The intent is | |
2170 | - that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */ | |
2171 | - | |
2172 | - if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32) | |
2173 | - && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V256, vece)) { | |
2174 | - uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32); | |
2240 | + /* Implement inline with a vector type, if possible. | |
2241 | + * Prefer integer when 64-bit host and 64-bit comparison. | |
2242 | + */ | |
2243 | + type = choose_vector_type(INDEX_op_cmp_vec, vece, oprsz, | |
2244 | + TCG_TARGET_REG_BITS == 64 && vece == MO_64); | |
2245 | + switch (type) { | |
2246 | + case TCG_TYPE_V256: | |
2247 | + /* Recall that ARM SVE allows vector sizes that are not a | |
2248 | + * power of 2, but always a multiple of 16. The intent is | |
2249 | + * that e.g. size == 80 would be expanded with 2x32 + 1x16. | |
2250 | + */ | |
2251 | + some = QEMU_ALIGN_DOWN(oprsz, 32); | |
2175 | 2252 | expand_cmp_vec(vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, cond); |
2176 | 2253 | if (some == oprsz) { |
2177 | - goto done; | |
2254 | + break; | |
2178 | 2255 | } |
2179 | 2256 | dofs += some; |
2180 | 2257 | aofs += some; |
2181 | 2258 | bofs += some; |
2182 | 2259 | oprsz -= some; |
2183 | 2260 | maxsz -= some; |
2184 | - } | |
2185 | - | |
2186 | - if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16) | |
2187 | - && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V128, vece)) { | |
2261 | + /* fallthru */ | |
2262 | + case TCG_TYPE_V128: | |
2188 | 2263 | expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, cond); |
2189 | - } else if (TCG_TARGET_HAS_v64 | |
2190 | - && check_size_impl(oprsz, 8) | |
2191 | - && (TCG_TARGET_REG_BITS == 32 || vece != MO_64) | |
2192 | - && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V64, vece)) { | |
2264 | + break; | |
2265 | + case TCG_TYPE_V64: | |
2193 | 2266 | expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, cond); |
2194 | - } else if (vece == MO_64 && check_size_impl(oprsz, 8)) { | |
2195 | - expand_cmp_i64(dofs, aofs, bofs, oprsz, cond); | |
2196 | - } else if (vece == MO_32 && check_size_impl(oprsz, 4)) { | |
2197 | - expand_cmp_i32(dofs, aofs, bofs, oprsz, cond); | |
2198 | - } else { | |
2199 | - gen_helper_gvec_3 * const *fn = fns[cond]; | |
2200 | - | |
2201 | - if (fn == NULL) { | |
2202 | - uint32_t tmp; | |
2203 | - tmp = aofs, aofs = bofs, bofs = tmp; | |
2204 | - cond = tcg_swap_cond(cond); | |
2205 | - fn = fns[cond]; | |
2206 | - assert(fn != NULL); | |
2267 | + break; | |
2268 | + | |
2269 | + case 0: | |
2270 | + if (vece == MO_64 && check_size_impl(oprsz, 8)) { | |
2271 | + expand_cmp_i64(dofs, aofs, bofs, oprsz, cond); | |
2272 | + } else if (vece == MO_32 && check_size_impl(oprsz, 4)) { | |
2273 | + expand_cmp_i32(dofs, aofs, bofs, oprsz, cond); | |
2274 | + } else { | |
2275 | + gen_helper_gvec_3 * const *fn = fns[cond]; | |
2276 | + | |
2277 | + if (fn == NULL) { | |
2278 | + uint32_t tmp; | |
2279 | + tmp = aofs, aofs = bofs, bofs = tmp; | |
2280 | + cond = tcg_swap_cond(cond); | |
2281 | + fn = fns[cond]; | |
2282 | + assert(fn != NULL); | |
2283 | + } | |
2284 | + tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, fn[vece]); | |
2285 | + return; | |
2207 | 2286 | } |
2208 | - tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, fn[vece]); | |
2209 | - return; | |
2287 | + break; | |
2288 | + | |
2289 | + default: | |
2290 | + g_assert_not_reached(); | |
2210 | 2291 | } |
2211 | 2292 | |
2212 | - done: | |
2213 | 2293 | if (oprsz < maxsz) { |
2214 | 2294 | expand_clr(dofs + oprsz, maxsz - oprsz); |
2215 | 2295 | } |