OSDN > Developer > ysato > Chamber > qemu > Commit

qemu RX
Fork

(Original repository, No fork origin)

R/O
HTTP
SSH
HTTPS

Commit

Commit MetaInfo

Revision	adb196cbd5cff26547bc32a208074f03f4c4a627 (tree)
Time	2018-03-16 01:55:04
Author	Richard Henderson <richard.henderson@lina...>
Commiter	Richard Henderson

Log Message

tcg: Add choose_vector_size

This unifies 5 copies of checks for supported vector size,
and in the process fixes a missing check in tcg_gen_gvec_2s.

This lead to an assertion failure for 64-bit vector multiply,
which is not available in the AVX instruction set.

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Change Summary

modified: tcg/tcg-op-gvec.c (diff)

Incremental Difference

--- a/tcg/tcg-op-gvec.c

+++ b/tcg/tcg-op-gvec.c

		@@ -351,6 +351,42 @@ static void gen_dup_i64(unsigned vece, TCGv_i64 out, TCGv_i64 in)
351	351	}
352	352	}
353	353
	354	+/* Select a supported vector type for implementing an operation on SIZE
	355	+ * bytes. If OP is 0, assume that the real operation to be performed is
	356	+ * required by all backends. Otherwise, make sure than OP can be performed
	357	+ * on elements of size VECE in the selected type. Do not select V64 if
	358	+ * PREFER_I64 is true. Return 0 if no vector type is selected.
	359	+ */
	360	+static TCGType choose_vector_type(TCGOpcode op, unsigned vece, uint32_t size,
	361	+ bool prefer_i64)
	362	+{
	363	+ if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) {
	364	+ if (op == 0) {
	365	+ return TCG_TYPE_V256;
	366	+ }
	367	+ /* Recall that ARM SVE allows vector sizes that are not a
	368	+ * power of 2, but always a multiple of 16. The intent is
	369	+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
	370	+ * It is hard to imagine a case in which v256 is supported
	371	+ * but v128 is not, but check anyway.
	372	+ */
	373	+ if (tcg_can_emit_vec_op(op, TCG_TYPE_V256, vece)
	374	+ && (size % 32 == 0
	375	+ \|\| tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) {
	376	+ return TCG_TYPE_V256;
	377	+ }
	378	+ }
	379	+ if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16)
	380	+ && (op == 0 \|\| tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) {
	381	+ return TCG_TYPE_V128;
	382	+ }
	383	+ if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8)
	384	+ && (op == 0 \|\| tcg_can_emit_vec_op(op, TCG_TYPE_V64, vece))) {
	385	+ return TCG_TYPE_V64;
	386	+ }
	387	+ return 0;
	388	+}
	389	+
354	390	/* Set OPRSZ bytes at DOFS to replications of IN_32, IN_64 or IN_C.
355	391	* Only one of IN_32 or IN_64 may be set;
356	392	* IN_C is used if IN_32 and IN_64 are unset.

		@@ -376,19 +412,12 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
376	412	}
377	413	}
378	414
379		- type = 0;
380		- if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) {
381		- type = TCG_TYPE_V256;
382		- } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) {
383		- type = TCG_TYPE_V128;
384		- } else if (TCG_TARGET_HAS_v64 && check_size_impl(oprsz, 8)
385		- /* Prefer integer when 64-bit host and no variable dup. */
386		- && !(TCG_TARGET_REG_BITS == 64 && in_32 == NULL
387		- && (in_64 == NULL \|\| vece == MO_64))) {
388		- type = TCG_TYPE_V64;
389		- }
390		-
391		- /* Implement inline with a vector type, if possible. */
	415	+ /* Implement inline with a vector type, if possible.
	416	+ * Prefer integer when 64-bit host and no variable dup.
	417	+ */
	418	+ type = choose_vector_type(0, vece, oprsz,
	419	+ (TCG_TARGET_REG_BITS == 64 && in_32 == NULL
	420	+ && (in_64 == NULL \|\| vece == MO_64)));
392	421	if (type != 0) {
393	422	TCGv_vec t_vec = tcg_temp_new_vec(type);
394	423

		@@ -414,21 +443,30 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
414	443	}
415	444
416	445	i = 0;
417		- if (TCG_TARGET_HAS_v256) {
	446	+ switch (type) {
	447	+ case TCG_TYPE_V256:
	448	+ /* Recall that ARM SVE allows vector sizes that are not a
	449	+ * power of 2, but always a multiple of 16. The intent is
	450	+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
	451	+ */
418	452	for (; i + 32 <= oprsz; i += 32) {
419	453	tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256);
420	454	}
421		- }
422		- if (TCG_TARGET_HAS_v128) {
	455	+ /* fallthru */
	456	+ case TCG_TYPE_V128:
423	457	for (; i + 16 <= oprsz; i += 16) {
424	458	tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128);
425	459	}
426		- }
427		- if (TCG_TARGET_HAS_v64) {
	460	+ break;
	461	+ case TCG_TYPE_V64:
428	462	for (; i < oprsz; i += 8) {
429	463	tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
430	464	}
	465	+ break;
	466	+ default:
	467	+ g_assert_not_reached();
431	468	}
	469	+
432	470	tcg_temp_free_vec(t_vec);
433	471	goto done;
434	472	}

		@@ -484,7 +522,7 @@ static void do_dup(unsigned vece, uint32_t dofs, uint32_t oprsz,
484	522	}
485	523	tcg_temp_free_i64(t_64);
486	524	goto done;
487		- }
	525	+ }
488	526	}
489	527
490	528	/* Otherwise implement out of line. */

		@@ -866,49 +904,55 @@ static void expand_4_vec(unsigned vece, uint32_t dofs, uint32_t aofs,
866	904	void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
867	905	uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g)
868	906	{
	907	+ TCGType type;
	908	+ uint32_t some;
	909	+
869	910	check_size_align(oprsz, maxsz, dofs \| aofs);
870	911	check_overlap_2(dofs, aofs, maxsz);
871	912
872		- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
873		- Expand with successively smaller host vector sizes. The intent is
874		- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
875		- /* ??? For maxsz > oprsz, the host may be able to use an opr-sized
876		- operation, zeroing the balance of the register. We can then
877		- use a max-sized store to implement the clearing without an extra
878		- store operation. This is true for aarch64 and x86_64 hosts. */
879		-
880		- if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
881		- && (!g->opc \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
882		- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
	913	+ type = 0;
	914	+ if (g->fniv) {
	915	+ type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
	916	+ }
	917	+ switch (type) {
	918	+ case TCG_TYPE_V256:
	919	+ /* Recall that ARM SVE allows vector sizes that are not a
	920	+ * power of 2, but always a multiple of 16. The intent is
	921	+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
	922	+ */
	923	+ some = QEMU_ALIGN_DOWN(oprsz, 32);
883	924	expand_2_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256, g->fniv);
884	925	if (some == oprsz) {
885		- goto done;
	926	+ break;
886	927	}
887	928	dofs += some;
888	929	aofs += some;
889	930	oprsz -= some;
890	931	maxsz -= some;
891		- }
892		-
893		- if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
894		- && (!g->opc \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
	932	+ /* fallthru */
	933	+ case TCG_TYPE_V128:
895	934	expand_2_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128, g->fniv);
896		- } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
897		- && g->fniv && check_size_impl(oprsz, 8)
898		- && (!g->opc
899		- \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
	935	+ break;
	936	+ case TCG_TYPE_V64:
900	937	expand_2_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64, g->fniv);
901		- } else if (g->fni8 && check_size_impl(oprsz, 8)) {
902		- expand_2_i64(dofs, aofs, oprsz, g->fni8);
903		- } else if (g->fni4 && check_size_impl(oprsz, 4)) {
904		- expand_2_i32(dofs, aofs, oprsz, g->fni4);
905		- } else {
906		- assert(g->fno != NULL);
907		- tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
908		- return;
	938	+ break;
	939	+
	940	+ case 0:
	941	+ if (g->fni8 && check_size_impl(oprsz, 8)) {
	942	+ expand_2_i64(dofs, aofs, oprsz, g->fni8);
	943	+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
	944	+ expand_2_i32(dofs, aofs, oprsz, g->fni4);
	945	+ } else {
	946	+ assert(g->fno != NULL);
	947	+ tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, g->data, g->fno);
	948	+ return;
	949	+ }
	950	+ break;
	951	+
	952	+ default:
	953	+ g_assert_not_reached();
909	954	}
910	955
911		- done:
912	956	if (oprsz < maxsz) {
913	957	expand_clr(dofs + oprsz, maxsz - oprsz);
914	958	}

		@@ -918,53 +962,64 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
918	962	void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
919	963	uint32_t maxsz, int64_t c, const GVecGen2i *g)
920	964	{
	965	+ TCGType type;
	966	+ uint32_t some;
	967	+
921	968	check_size_align(oprsz, maxsz, dofs \| aofs);
922	969	check_overlap_2(dofs, aofs, maxsz);
923	970
924		- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
925		- Expand with successively smaller host vector sizes. The intent is
926		- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
927		-
928		- if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
929		- && (!g->opc \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
930		- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
	971	+ type = 0;
	972	+ if (g->fniv) {
	973	+ type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
	974	+ }
	975	+ switch (type) {
	976	+ case TCG_TYPE_V256:
	977	+ /* Recall that ARM SVE allows vector sizes that are not a
	978	+ * power of 2, but always a multiple of 16. The intent is
	979	+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
	980	+ */
	981	+ some = QEMU_ALIGN_DOWN(oprsz, 32);
931	982	expand_2i_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
932	983	c, g->load_dest, g->fniv);
933	984	if (some == oprsz) {
934		- goto done;
	985	+ break;
935	986	}
936	987	dofs += some;
937	988	aofs += some;
938	989	oprsz -= some;
939	990	maxsz -= some;
940		- }
941		-
942		- if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
943		- && (!g->opc \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
	991	+ /* fallthru */
	992	+ case TCG_TYPE_V128:
944	993	expand_2i_vec(g->vece, dofs, aofs, oprsz, 16, TCG_TYPE_V128,
945	994	c, g->load_dest, g->fniv);
946		- } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
947		- && g->fniv && check_size_impl(oprsz, 8)
948		- && (!g->opc
949		- \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
	995	+ break;
	996	+ case TCG_TYPE_V64:
950	997	expand_2i_vec(g->vece, dofs, aofs, oprsz, 8, TCG_TYPE_V64,
951	998	c, g->load_dest, g->fniv);
952		- } else if (g->fni8 && check_size_impl(oprsz, 8)) {
953		- expand_2i_i64(dofs, aofs, oprsz, c, g->load_dest, g->fni8);
954		- } else if (g->fni4 && check_size_impl(oprsz, 4)) {
955		- expand_2i_i32(dofs, aofs, oprsz, c, g->load_dest, g->fni4);
956		- } else {
957		- if (g->fno) {
958		- tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno);
	999	+ break;
	1000	+
	1001	+ case 0:
	1002	+ if (g->fni8 && check_size_impl(oprsz, 8)) {
	1003	+ expand_2i_i64(dofs, aofs, oprsz, c, g->load_dest, g->fni8);
	1004	+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
	1005	+ expand_2i_i32(dofs, aofs, oprsz, c, g->load_dest, g->fni4);
959	1006	} else {
960		- TCGv_i64 tcg_c = tcg_const_i64(c);
961		- tcg_gen_gvec_2i_ool(dofs, aofs, tcg_c, oprsz, maxsz, c, g->fnoi);
962		- tcg_temp_free_i64(tcg_c);
	1007	+ if (g->fno) {
	1008	+ tcg_gen_gvec_2_ool(dofs, aofs, oprsz, maxsz, c, g->fno);
	1009	+ } else {
	1010	+ TCGv_i64 tcg_c = tcg_const_i64(c);
	1011	+ tcg_gen_gvec_2i_ool(dofs, aofs, tcg_c, oprsz,
	1012	+ maxsz, c, g->fnoi);
	1013	+ tcg_temp_free_i64(tcg_c);
	1014	+ }
	1015	+ return;
963	1016	}
964		- return;
	1017	+ break;
	1018	+
	1019	+ default:
	1020	+ g_assert_not_reached();
965	1021	}
966	1022
967		- done:
968	1023	if (oprsz < maxsz) {
969	1024	expand_clr(dofs + oprsz, maxsz - oprsz);
970	1025	}

		@@ -981,37 +1036,30 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
981	1036
982	1037	type = 0;
983	1038	if (g->fniv) {
984		- if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) {
985		- type = TCG_TYPE_V256;
986		- } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) {
987		- type = TCG_TYPE_V128;
988		- } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
989		- && check_size_impl(oprsz, 8)) {
990		- type = TCG_TYPE_V64;
991		- }
	1039	+ type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
992	1040	}
993	1041	if (type != 0) {
994	1042	TCGv_vec t_vec = tcg_temp_new_vec(type);
	1043	+ uint32_t some;
995	1044
996	1045	tcg_gen_dup_i64_vec(g->vece, t_vec, c);
997	1046
998		- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
999		- Expand with successively smaller host vector sizes. The intent is
1000		- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
1001	1047	switch (type) {
1002	1048	case TCG_TYPE_V256:
1003		- {
1004		- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
1005		- expand_2s_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
1006		- t_vec, g->scalar_first, g->fniv);
1007		- if (some == oprsz) {
1008		- break;
1009		- }
1010		- dofs += some;
1011		- aofs += some;
1012		- oprsz -= some;
1013		- maxsz -= some;
	1049	+ /* Recall that ARM SVE allows vector sizes that are not a
	1050	+ * power of 2, but always a multiple of 16. The intent is
	1051	+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
	1052	+ */
	1053	+ some = QEMU_ALIGN_DOWN(oprsz, 32);
	1054	+ expand_2s_vec(g->vece, dofs, aofs, some, 32, TCG_TYPE_V256,
	1055	+ t_vec, g->scalar_first, g->fniv);
	1056	+ if (some == oprsz) {
	1057	+ break;
1014	1058	}
	1059	+ dofs += some;
	1060	+ aofs += some;
	1061	+ oprsz -= some;
	1062	+ maxsz -= some;
1015	1063	/* fallthru */
1016	1064
1017	1065	case TCG_TYPE_V128:

		@@ -1055,48 +1103,60 @@ void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
1055	1103	void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
1056	1104	uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g)
1057	1105	{
	1106	+ TCGType type;
	1107	+ uint32_t some;
	1108	+
1058	1109	check_size_align(oprsz, maxsz, dofs \| aofs \| bofs);
1059	1110	check_overlap_3(dofs, aofs, bofs, maxsz);
1060	1111
1061		- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
1062		- Expand with successively smaller host vector sizes. The intent is
1063		- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
1064		-
1065		- if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
1066		- && (!g->opc \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
1067		- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
	1112	+ type = 0;
	1113	+ if (g->fniv) {
	1114	+ type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
	1115	+ }
	1116	+ switch (type) {
	1117	+ case TCG_TYPE_V256:
	1118	+ /* Recall that ARM SVE allows vector sizes that are not a
	1119	+ * power of 2, but always a multiple of 16. The intent is
	1120	+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
	1121	+ */
	1122	+ some = QEMU_ALIGN_DOWN(oprsz, 32);
1068	1123	expand_3_vec(g->vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256,
1069	1124	g->load_dest, g->fniv);
1070	1125	if (some == oprsz) {
1071		- goto done;
	1126	+ break;
1072	1127	}
1073	1128	dofs += some;
1074	1129	aofs += some;
1075	1130	bofs += some;
1076	1131	oprsz -= some;
1077	1132	maxsz -= some;
1078		- }
1079		-
1080		- if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
1081		- && (!g->opc \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
	1133	+ /* fallthru */
	1134	+ case TCG_TYPE_V128:
1082	1135	expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128,
1083	1136	g->load_dest, g->fniv);
1084		- } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
1085		- && g->fniv && check_size_impl(oprsz, 8)
1086		- && (!g->opc
1087		- \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
	1137	+ break;
	1138	+ case TCG_TYPE_V64:
1088	1139	expand_3_vec(g->vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64,
1089	1140	g->load_dest, g->fniv);
1090		- } else if (g->fni8 && check_size_impl(oprsz, 8)) {
1091		- expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8);
1092		- } else if (g->fni4 && check_size_impl(oprsz, 4)) {
1093		- expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4);
1094		- } else {
1095		- assert(g->fno != NULL);
1096		- tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, g->data, g->fno);
	1141	+ break;
	1142	+
	1143	+ case 0:
	1144	+ if (g->fni8 && check_size_impl(oprsz, 8)) {
	1145	+ expand_3_i64(dofs, aofs, bofs, oprsz, g->load_dest, g->fni8);
	1146	+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
	1147	+ expand_3_i32(dofs, aofs, bofs, oprsz, g->load_dest, g->fni4);
	1148	+ } else {
	1149	+ assert(g->fno != NULL);
	1150	+ tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz,
	1151	+ maxsz, g->data, g->fno);
	1152	+ return;
	1153	+ }
	1154	+ break;
	1155	+
	1156	+ default:
	1157	+ g_assert_not_reached();
1097	1158	}
1098	1159
1099		- done:
1100	1160	if (oprsz < maxsz) {
1101	1161	expand_clr(dofs + oprsz, maxsz - oprsz);
1102	1162	}

		@@ -1106,20 +1166,27 @@ void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
1106	1166	void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
1107	1167	uint32_t oprsz, uint32_t maxsz, const GVecGen4 *g)
1108	1168	{
	1169	+ TCGType type;
	1170	+ uint32_t some;
	1171	+
1109	1172	check_size_align(oprsz, maxsz, dofs \| aofs \| bofs \| cofs);
1110	1173	check_overlap_4(dofs, aofs, bofs, cofs, maxsz);
1111	1174
1112		- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
1113		- Expand with successively smaller host vector sizes. The intent is
1114		- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
1115		-
1116		- if (TCG_TARGET_HAS_v256 && g->fniv && check_size_impl(oprsz, 32)
1117		- && (!g->opc \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V256, g->vece))) {
1118		- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
	1175	+ type = 0;
	1176	+ if (g->fniv) {
	1177	+ type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64);
	1178	+ }
	1179	+ switch (type) {
	1180	+ case TCG_TYPE_V256:
	1181	+ /* Recall that ARM SVE allows vector sizes that are not a
	1182	+ * power of 2, but always a multiple of 16. The intent is
	1183	+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
	1184	+ */
	1185	+ some = QEMU_ALIGN_DOWN(oprsz, 32);
1119	1186	expand_4_vec(g->vece, dofs, aofs, bofs, cofs, some,
1120	1187	32, TCG_TYPE_V256, g->fniv);
1121	1188	if (some == oprsz) {
1122		- goto done;
	1189	+ break;
1123	1190	}
1124	1191	dofs += some;
1125	1192	aofs += some;

		@@ -1127,30 +1194,33 @@ void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
1127	1194	cofs += some;
1128	1195	oprsz -= some;
1129	1196	maxsz -= some;
1130		- }
1131		-
1132		- if (TCG_TARGET_HAS_v128 && g->fniv && check_size_impl(oprsz, 16)
1133		- && (!g->opc \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V128, g->vece))) {
	1197	+ /* fallthru */
	1198	+ case TCG_TYPE_V128:
1134	1199	expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
1135	1200	16, TCG_TYPE_V128, g->fniv);
1136		- } else if (TCG_TARGET_HAS_v64 && !g->prefer_i64
1137		- && g->fniv && check_size_impl(oprsz, 8)
1138		- && (!g->opc
1139		- \|\| tcg_can_emit_vec_op(g->opc, TCG_TYPE_V64, g->vece))) {
	1201	+ break;
	1202	+ case TCG_TYPE_V64:
1140	1203	expand_4_vec(g->vece, dofs, aofs, bofs, cofs, oprsz,
1141	1204	8, TCG_TYPE_V64, g->fniv);
1142		- } else if (g->fni8 && check_size_impl(oprsz, 8)) {
1143		- expand_4_i64(dofs, aofs, bofs, cofs, oprsz, g->fni8);
1144		- } else if (g->fni4 && check_size_impl(oprsz, 4)) {
1145		- expand_4_i32(dofs, aofs, bofs, cofs, oprsz, g->fni4);
1146		- } else {
1147		- assert(g->fno != NULL);
1148		- tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs,
1149		- oprsz, maxsz, g->data, g->fno);
1150		- return;
	1205	+ break;
	1206	+
	1207	+ case 0:
	1208	+ if (g->fni8 && check_size_impl(oprsz, 8)) {
	1209	+ expand_4_i64(dofs, aofs, bofs, cofs, oprsz, g->fni8);
	1210	+ } else if (g->fni4 && check_size_impl(oprsz, 4)) {
	1211	+ expand_4_i32(dofs, aofs, bofs, cofs, oprsz, g->fni4);
	1212	+ } else {
	1213	+ assert(g->fno != NULL);
	1214	+ tcg_gen_gvec_4_ool(dofs, aofs, bofs, cofs,
	1215	+ oprsz, maxsz, g->data, g->fno);
	1216	+ return;
	1217	+ }
	1218	+ break;
	1219	+
	1220	+ default:
	1221	+ g_assert_not_reached();
1151	1222	}
1152	1223
1153		- done:
1154	1224	if (oprsz < maxsz) {
1155	1225	expand_clr(dofs + oprsz, maxsz - oprsz);
1156	1226	}

		@@ -2155,6 +2225,8 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
2155	2225	[TCG_COND_LTU] = ltu_fn,
2156	2226	[TCG_COND_LEU] = leu_fn,
2157	2227	};
	2228	+ TCGType type;
	2229	+ uint32_t some;
2158	2230
2159	2231	check_size_align(oprsz, maxsz, dofs \| aofs \| bofs);
2160	2232	check_overlap_3(dofs, aofs, bofs, maxsz);

		@@ -2165,51 +2237,59 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
2165	2237	return;
2166	2238	}
2167	2239
2168		- /* Recall that ARM SVE allows vector sizes that are not a power of 2.
2169		- Expand with successively smaller host vector sizes. The intent is
2170		- that e.g. oprsz == 80 would be expanded with 2x32 + 1x16. */
2171		-
2172		- if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)
2173		- && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V256, vece)) {
2174		- uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32);
	2240	+ /* Implement inline with a vector type, if possible.
	2241	+ * Prefer integer when 64-bit host and 64-bit comparison.
	2242	+ */
	2243	+ type = choose_vector_type(INDEX_op_cmp_vec, vece, oprsz,
	2244	+ TCG_TARGET_REG_BITS == 64 && vece == MO_64);
	2245	+ switch (type) {
	2246	+ case TCG_TYPE_V256:
	2247	+ /* Recall that ARM SVE allows vector sizes that are not a
	2248	+ * power of 2, but always a multiple of 16. The intent is
	2249	+ * that e.g. size == 80 would be expanded with 2x32 + 1x16.
	2250	+ */
	2251	+ some = QEMU_ALIGN_DOWN(oprsz, 32);
2175	2252	expand_cmp_vec(vece, dofs, aofs, bofs, some, 32, TCG_TYPE_V256, cond);
2176	2253	if (some == oprsz) {
2177		- goto done;
	2254	+ break;
2178	2255	}
2179	2256	dofs += some;
2180	2257	aofs += some;
2181	2258	bofs += some;
2182	2259	oprsz -= some;
2183	2260	maxsz -= some;
2184		- }
2185		-
2186		- if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)
2187		- && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V128, vece)) {
	2261	+ /* fallthru */
	2262	+ case TCG_TYPE_V128:
2188	2263	expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 16, TCG_TYPE_V128, cond);
2189		- } else if (TCG_TARGET_HAS_v64
2190		- && check_size_impl(oprsz, 8)
2191		- && (TCG_TARGET_REG_BITS == 32 \|\| vece != MO_64)
2192		- && tcg_can_emit_vec_op(INDEX_op_cmp_vec, TCG_TYPE_V64, vece)) {
	2264	+ break;
	2265	+ case TCG_TYPE_V64:
2193	2266	expand_cmp_vec(vece, dofs, aofs, bofs, oprsz, 8, TCG_TYPE_V64, cond);
2194		- } else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
2195		- expand_cmp_i64(dofs, aofs, bofs, oprsz, cond);
2196		- } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
2197		- expand_cmp_i32(dofs, aofs, bofs, oprsz, cond);
2198		- } else {
2199		- gen_helper_gvec_3 * const *fn = fns[cond];
2200		-
2201		- if (fn == NULL) {
2202		- uint32_t tmp;
2203		- tmp = aofs, aofs = bofs, bofs = tmp;
2204		- cond = tcg_swap_cond(cond);
2205		- fn = fns[cond];
2206		- assert(fn != NULL);
	2267	+ break;
	2268	+
	2269	+ case 0:
	2270	+ if (vece == MO_64 && check_size_impl(oprsz, 8)) {
	2271	+ expand_cmp_i64(dofs, aofs, bofs, oprsz, cond);
	2272	+ } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
	2273	+ expand_cmp_i32(dofs, aofs, bofs, oprsz, cond);
	2274	+ } else {
	2275	+ gen_helper_gvec_3 * const *fn = fns[cond];
	2276	+
	2277	+ if (fn == NULL) {
	2278	+ uint32_t tmp;
	2279	+ tmp = aofs, aofs = bofs, bofs = tmp;
	2280	+ cond = tcg_swap_cond(cond);
	2281	+ fn = fns[cond];
	2282	+ assert(fn != NULL);
	2283	+ }
	2284	+ tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, fn[vece]);
	2285	+ return;
2207	2286	}
2208		- tcg_gen_gvec_3_ool(dofs, aofs, bofs, oprsz, maxsz, 0, fn[vece]);
2209		- return;
	2287	+ break;
	2288	+
	2289	+ default:
	2290	+ g_assert_not_reached();
2210	2291	}
2211	2292
2212		- done:
2213	2293	if (oprsz < maxsz) {
2214	2294	expand_clr(dofs + oprsz, maxsz - oprsz);
2215	2295	}

qemu RX Fork