GCC with patches for Dreamcast
Revision | a324c13210c321057b238bac94605fa1f8278d09 (tree) |
---|---|
Time | 2021-05-06 21:01:37 |
Author | Roman Zhuykov <zhroma@ispr...> |
Commiter | Roman Zhuykov |
modulo-sched: skip loops with strange register defs [PR100225]
PR84878 fix adds an assertion which can fail, e.g. when stack pointer
is adjusted inside the loop. We have to prevent it and search earlier
for any 'strange' instruction. The solution is to skip the whole loop
if using 'note_stores' we found that one of hard registers is in
'df->regular_block_artificial_uses' set.
Also patch properly prohibit not single-set instruction in loop body.
gcc/ChangeLog:
PR rtl-optimization/100225
PR rtl-optimization/84878
* modulo-sched.c (sms_schedule): Use note_stores to skip loops
where we have an instruction which touches (writes) any hard
register from df->regular_block_artificial_uses set.
Allow not-single-set instruction only right before basic block
tail.
gcc/testsuite/ChangeLog:
PR rtl-optimization/100225
PR rtl-optimization/84878
* gcc.dg/pr100225.c: New test.
libgomp/ChangeLog:
* testsuite/libgomp.oacc-c-c++-common/atomic_capture-3.c: New test.
(cherry picked from commit 4cf3b10f27b1994cf4a9eb12079d85412ebc7cad)
@@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see | ||
44 | 44 | #include "tree-pass.h" |
45 | 45 | #include "dbgcnt.h" |
46 | 46 | #include "loop-unroll.h" |
47 | +#include "hard-reg-set.h" | |
47 | 48 | |
48 | 49 | #ifdef INSN_SCHEDULING |
49 | 50 |
@@ -1358,6 +1359,7 @@ sms_schedule (void) | ||
1358 | 1359 | basic_block condition_bb = NULL; |
1359 | 1360 | edge latch_edge; |
1360 | 1361 | HOST_WIDE_INT trip_count, max_trip_count; |
1362 | + HARD_REG_SET prohibited_regs; | |
1361 | 1363 | |
1362 | 1364 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS |
1363 | 1365 | | LOOPS_HAVE_RECORDED_EXITS); |
@@ -1387,6 +1389,8 @@ sms_schedule (void) | ||
1387 | 1389 | We use loop->num as index into this array. */ |
1388 | 1390 | g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun)); |
1389 | 1391 | |
1392 | + REG_SET_TO_HARD_REG_SET (prohibited_regs, &df->regular_block_artificial_uses); | |
1393 | + | |
1390 | 1394 | if (dump_file) |
1391 | 1395 | { |
1392 | 1396 | fprintf (dump_file, "\n\nSMS analysis phase\n"); |
@@ -1475,23 +1479,31 @@ sms_schedule (void) | ||
1475 | 1479 | } |
1476 | 1480 | |
1477 | 1481 | /* Don't handle BBs with calls or barriers |
1478 | - or !single_set with the exception of instructions that include | |
1479 | - count_reg---these instructions are part of the control part | |
1480 | - that do-loop recognizes. | |
1482 | + or !single_set with the exception of do-loop control part insns. | |
1481 | 1483 | ??? Should handle insns defining subregs. */ |
1482 | - for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) | |
1483 | - { | |
1484 | - rtx set; | |
1485 | - | |
1486 | - if (CALL_P (insn) | |
1487 | - || BARRIER_P (insn) | |
1488 | - || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) | |
1489 | - && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE | |
1490 | - && !reg_mentioned_p (count_reg, insn)) | |
1491 | - || (INSN_P (insn) && (set = single_set (insn)) | |
1492 | - && GET_CODE (SET_DEST (set)) == SUBREG)) | |
1493 | - break; | |
1494 | - } | |
1484 | + for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) | |
1485 | + { | |
1486 | + if (INSN_P (insn)) | |
1487 | + { | |
1488 | + HARD_REG_SET regs; | |
1489 | + CLEAR_HARD_REG_SET (regs); | |
1490 | + note_stores (PATTERN (insn), record_hard_reg_sets, ®s); | |
1491 | + if (hard_reg_set_intersect_p (regs, prohibited_regs)) | |
1492 | + break; | |
1493 | + } | |
1494 | + | |
1495 | + if (CALL_P (insn) | |
1496 | + || BARRIER_P (insn) | |
1497 | + || (INSN_P (insn) && single_set (insn) | |
1498 | + && GET_CODE (SET_DEST (single_set (insn))) == SUBREG) | |
1499 | + /* Not a single set. */ | |
1500 | + || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) | |
1501 | + && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE | |
1502 | + /* But non-single-set allowed in one special case. */ | |
1503 | + && (insn != prev_nondebug_insn (tail) | |
1504 | + || !reg_mentioned_p (count_reg, insn)))) | |
1505 | + break; | |
1506 | + } | |
1495 | 1507 | |
1496 | 1508 | if (insn != NEXT_INSN (tail)) |
1497 | 1509 | { |
@@ -1501,11 +1513,13 @@ sms_schedule (void) | ||
1501 | 1513 | fprintf (dump_file, "SMS loop-with-call\n"); |
1502 | 1514 | else if (BARRIER_P (insn)) |
1503 | 1515 | fprintf (dump_file, "SMS loop-with-barrier\n"); |
1504 | - else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn) | |
1505 | - && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)) | |
1506 | - fprintf (dump_file, "SMS loop-with-not-single-set\n"); | |
1507 | - else | |
1508 | - fprintf (dump_file, "SMS loop with subreg in lhs\n"); | |
1516 | + else if (INSN_P (insn) && single_set (insn) | |
1517 | + && GET_CODE (SET_DEST (single_set (insn))) == SUBREG) | |
1518 | + fprintf (dump_file, "SMS loop with subreg in lhs\n"); | |
1519 | + else | |
1520 | + fprintf (dump_file, | |
1521 | + "SMS loop-with-not-single-set-or-prohibited-reg\n"); | |
1522 | + | |
1509 | 1523 | print_rtl_single (dump_file, insn); |
1510 | 1524 | } |
1511 | 1525 |
@@ -0,0 +1,15 @@ | ||
1 | +/* PR rtl-optimization/100225 */ | |
2 | +/* { dg-do compile } */ | |
3 | +/* { dg-options "-O1 -fmodulo-sched" } */ | |
4 | + | |
5 | +void | |
6 | +vorbis_synthesis_lapout (void); | |
7 | + | |
8 | +void | |
9 | +ov_info (int **lappcm, int ov_info_i) | |
10 | +{ | |
11 | + while (ov_info_i < 1) | |
12 | + lappcm[ov_info_i++] = __builtin_alloca (1); | |
13 | + | |
14 | + vorbis_synthesis_lapout (); | |
15 | +} |
@@ -0,0 +1,1627 @@ | ||
1 | +/* { dg-do run } */ | |
2 | +/* { dg-additional-options "-fmodulo-sched -fmodulo-sched-allow-regmoves" } */ | |
3 | + | |
4 | +#include <stdlib.h> | |
5 | + | |
6 | +int | |
7 | +main(int argc, char **argv) | |
8 | +{ | |
9 | + int iexp, igot, imax, imin; | |
10 | + long long lexp, lgot; | |
11 | + int N = 32; | |
12 | + int i; | |
13 | + int idata[N]; | |
14 | + long long ldata[N]; | |
15 | + float fexp, fgot; | |
16 | + float fdata[N]; | |
17 | + | |
18 | + igot = 1234; | |
19 | + iexp = 31; | |
20 | + | |
21 | + for (i = 0; i < N; i++) | |
22 | + idata[i] = i; | |
23 | + | |
24 | +#pragma acc data copy (igot, idata[0:N]) | |
25 | + { | |
26 | +#pragma acc parallel loop | |
27 | + for (i = 0; i < N; i++) | |
28 | +#pragma acc atomic capture | |
29 | + { idata[i] = igot; igot = i; } | |
30 | + } | |
31 | + | |
32 | + imax = 0; | |
33 | + imin = N; | |
34 | + | |
35 | + for (i = 0; i < N; i++) | |
36 | + { | |
37 | + imax = idata[i] > imax ? idata[i] : imax; | |
38 | + imin = idata[i] < imin ? idata[i] : imin; | |
39 | + } | |
40 | + | |
41 | + if (imax != 1234 || imin != 0) | |
42 | + abort (); | |
43 | + | |
44 | + return 0; | |
45 | + | |
46 | + igot = 0; | |
47 | + iexp = 32; | |
48 | + | |
49 | +#pragma acc data copy (igot, idata[0:N]) | |
50 | + { | |
51 | +#pragma acc parallel loop | |
52 | + for (i = 0; i < N; i++) | |
53 | +#pragma acc atomic capture | |
54 | + { idata[i] = igot; igot++; } | |
55 | + } | |
56 | + | |
57 | + if (iexp != igot) | |
58 | + abort (); | |
59 | + | |
60 | + igot = 0; | |
61 | + iexp = 32; | |
62 | + | |
63 | +#pragma acc data copy (igot, idata[0:N]) | |
64 | + { | |
65 | +#pragma acc parallel loop | |
66 | + for (i = 0; i < N; i++) | |
67 | +#pragma acc atomic capture | |
68 | + { idata[i] = igot; ++igot; } | |
69 | + } | |
70 | + | |
71 | + if (iexp != igot) | |
72 | + abort (); | |
73 | + | |
74 | + igot = 0; | |
75 | + iexp = 32; | |
76 | + | |
77 | +#pragma acc data copy (igot, idata[0:N]) | |
78 | + { | |
79 | +#pragma acc parallel loop | |
80 | + for (i = 0; i < N; i++) | |
81 | +#pragma acc atomic capture | |
82 | + { ++igot; idata[i] = igot; } | |
83 | + } | |
84 | + | |
85 | + if (iexp != igot) | |
86 | + abort (); | |
87 | + | |
88 | + igot = 0; | |
89 | + iexp = 32; | |
90 | + | |
91 | +#pragma acc data copy (igot, idata[0:N]) | |
92 | + { | |
93 | +#pragma acc parallel loop | |
94 | + for (i = 0; i < N; i++) | |
95 | +#pragma acc atomic capture | |
96 | + { igot++; idata[i] = igot; } | |
97 | + } | |
98 | + | |
99 | + if (iexp != igot) | |
100 | + abort (); | |
101 | + | |
102 | + igot = 32; | |
103 | + iexp = 0; | |
104 | + | |
105 | +#pragma acc data copy (igot, idata[0:N]) | |
106 | + { | |
107 | +#pragma acc parallel loop | |
108 | + for (i = 0; i < N; i++) | |
109 | +#pragma acc atomic capture | |
110 | + { idata[i] = igot; igot--; } | |
111 | + } | |
112 | + | |
113 | + if (iexp != igot) | |
114 | + abort (); | |
115 | + | |
116 | + igot = 32; | |
117 | + iexp = 0; | |
118 | + | |
119 | +#pragma acc data copy (igot, idata[0:N]) | |
120 | + { | |
121 | +#pragma acc parallel loop | |
122 | + for (i = 0; i < N; i++) | |
123 | +#pragma acc atomic capture | |
124 | + { idata[i] = igot; --igot; } | |
125 | + } | |
126 | + | |
127 | + if (iexp != igot) | |
128 | + abort (); | |
129 | + | |
130 | + igot = 32; | |
131 | + iexp = 0; | |
132 | + | |
133 | +#pragma acc data copy (igot, idata[0:N]) | |
134 | + { | |
135 | +#pragma acc parallel loop | |
136 | + for (i = 0; i < N; i++) | |
137 | +#pragma acc atomic capture | |
138 | + { --igot; idata[i] = igot; } | |
139 | + } | |
140 | + | |
141 | + if (iexp != igot) | |
142 | + abort (); | |
143 | + | |
144 | + igot = 32; | |
145 | + iexp = 0; | |
146 | + | |
147 | +#pragma acc data copy (igot, idata[0:N]) | |
148 | + { | |
149 | +#pragma acc parallel loop | |
150 | + for (i = 0; i < N; i++) | |
151 | +#pragma acc atomic capture | |
152 | + { igot--; idata[i] = igot; } | |
153 | + } | |
154 | + | |
155 | + if (iexp != igot) | |
156 | + abort (); | |
157 | + | |
158 | + /* BINOP = + */ | |
159 | + igot = 0; | |
160 | + iexp = 32; | |
161 | + | |
162 | +#pragma acc data copy (igot, idata[0:N]) | |
163 | + { | |
164 | +#pragma acc parallel loop | |
165 | + for (i = 0; i < N; i++) | |
166 | + { | |
167 | + int expr = 1; | |
168 | + | |
169 | +#pragma acc atomic capture | |
170 | + { idata[i] = igot; igot += expr; } | |
171 | + } | |
172 | + } | |
173 | + | |
174 | + if (iexp != igot) | |
175 | + abort (); | |
176 | + | |
177 | + igot = 0; | |
178 | + iexp = 32; | |
179 | + | |
180 | +#pragma acc data copy (igot, idata[0:N]) | |
181 | + { | |
182 | +#pragma acc parallel loop | |
183 | + for (i = 0; i < N; i++) | |
184 | + { | |
185 | + int expr = 1; | |
186 | + | |
187 | +#pragma acc atomic capture | |
188 | + { igot += expr; idata[i] = igot; } | |
189 | + } | |
190 | + } | |
191 | + | |
192 | + if (iexp != igot) | |
193 | + abort (); | |
194 | + | |
195 | + igot = 0; | |
196 | + iexp = 32; | |
197 | + | |
198 | +#pragma acc data copy (igot, idata[0:N]) | |
199 | + { | |
200 | +#pragma acc parallel loop | |
201 | + for (i = 0; i < N; i++) | |
202 | + { | |
203 | + int expr = 1; | |
204 | + | |
205 | +#pragma acc atomic capture | |
206 | + { idata[i] = igot; igot = igot + expr; } | |
207 | + } | |
208 | + } | |
209 | + | |
210 | + if (iexp != igot) | |
211 | + abort (); | |
212 | + | |
213 | + igot = 0; | |
214 | + iexp = 32; | |
215 | + | |
216 | +#pragma acc data copy (igot, idata[0:N]) | |
217 | + { | |
218 | +#pragma acc parallel loop | |
219 | + for (i = 0; i < N; i++) | |
220 | + { | |
221 | + int expr = 1; | |
222 | + | |
223 | +#pragma acc atomic capture | |
224 | + { idata[i] = igot; igot = expr + igot; } | |
225 | + } | |
226 | + } | |
227 | + | |
228 | + if (iexp != igot) | |
229 | + abort (); | |
230 | + | |
231 | + igot = 0; | |
232 | + iexp = 32; | |
233 | + | |
234 | +#pragma acc data copy (igot, idata[0:N]) | |
235 | + { | |
236 | +#pragma acc parallel loop | |
237 | + for (i = 0; i < N; i++) | |
238 | + { | |
239 | + int expr = 1; | |
240 | + | |
241 | +#pragma acc atomic capture | |
242 | + { igot = igot + expr; idata[i] = igot; } | |
243 | + } | |
244 | + } | |
245 | + | |
246 | + if (iexp != igot) | |
247 | + abort (); | |
248 | + | |
249 | + | |
250 | + igot = 0; | |
251 | + iexp = 32; | |
252 | + | |
253 | +#pragma acc data copy (igot, idata[0:N]) | |
254 | + { | |
255 | +#pragma acc parallel loop | |
256 | + for (i = 0; i < N; i++) | |
257 | + { | |
258 | + int expr = 1; | |
259 | + | |
260 | +#pragma acc atomic capture | |
261 | + { igot = expr + igot; idata[i] = igot; } | |
262 | + } | |
263 | + } | |
264 | + | |
265 | + if (iexp != igot) | |
266 | + abort (); | |
267 | + | |
268 | + /* BINOP = * */ | |
269 | + lgot = 1LL; | |
270 | + lexp = 1LL << 32; | |
271 | + | |
272 | +#pragma acc data copy (lgot, ldata[0:N]) | |
273 | + { | |
274 | +#pragma acc parallel loop | |
275 | + for (i = 0; i < N; i++) | |
276 | + { | |
277 | + long long expr = 2LL; | |
278 | + | |
279 | +#pragma acc atomic capture | |
280 | + { ldata[i] = lgot; lgot *= expr; } | |
281 | + } | |
282 | + } | |
283 | + | |
284 | + if (lexp != lgot) | |
285 | + abort (); | |
286 | + | |
287 | + lgot = 1LL; | |
288 | + lexp = 1LL << 32; | |
289 | + | |
290 | +#pragma acc data copy (lgot, ldata[0:N]) | |
291 | + { | |
292 | +#pragma acc parallel loop | |
293 | + for (i = 0; i < N; i++) | |
294 | + { | |
295 | + long long expr = 2LL; | |
296 | + | |
297 | +#pragma acc atomic capture | |
298 | + { lgot *= expr; ldata[i] = lgot; } | |
299 | + } | |
300 | + } | |
301 | + | |
302 | + if (lexp != lgot) | |
303 | + abort (); | |
304 | + | |
305 | + lgot = 1LL; | |
306 | + lexp = 1LL << 32; | |
307 | + | |
308 | +#pragma acc data copy (lgot, ldata[0:N]) | |
309 | + { | |
310 | +#pragma acc parallel loop | |
311 | + for (i = 0; i < N; i++) | |
312 | + { | |
313 | + long long expr = 2LL; | |
314 | + | |
315 | +#pragma acc atomic capture | |
316 | + { ldata[i] = lgot; lgot = lgot * expr; } | |
317 | + } | |
318 | + } | |
319 | + | |
320 | + if (lexp != lgot) | |
321 | + abort (); | |
322 | + | |
323 | + lgot = 1LL; | |
324 | + lexp = 1LL << 32; | |
325 | + | |
326 | +#pragma acc data copy (lgot, ldata[0:N]) | |
327 | + { | |
328 | +#pragma acc parallel loop | |
329 | + for (i = 0; i < N; i++) | |
330 | + { | |
331 | + long long expr = 2LL; | |
332 | + | |
333 | +#pragma acc atomic capture | |
334 | + { ldata[i] = lgot; lgot = expr * lgot; } | |
335 | + } | |
336 | + } | |
337 | + | |
338 | + if (lexp != lgot) | |
339 | + abort (); | |
340 | + | |
341 | + lgot = 1LL; | |
342 | + lexp = 1LL << 32; | |
343 | + | |
344 | +#pragma acc data copy (lgot, ldata[0:N]) | |
345 | + { | |
346 | +#pragma acc parallel loop | |
347 | + for (i = 0; i < N; i++) | |
348 | + { | |
349 | + long long expr = 2LL; | |
350 | + | |
351 | +#pragma acc atomic capture | |
352 | + { lgot = lgot * expr; ldata[i] = lgot; } | |
353 | + } | |
354 | + } | |
355 | + | |
356 | + if (lexp != lgot) | |
357 | + abort (); | |
358 | + | |
359 | + lgot = 1LL; | |
360 | + lexp = 1LL << 32; | |
361 | + | |
362 | +#pragma acc data copy (lgot, ldata[0:N]) | |
363 | + { | |
364 | +#pragma acc parallel loop | |
365 | + for (i = 0; i < N; i++) | |
366 | + { | |
367 | + long long expr = 2; | |
368 | + | |
369 | +#pragma acc atomic capture | |
370 | + { lgot = expr * lgot; ldata[i] = lgot; } | |
371 | + } | |
372 | + } | |
373 | + | |
374 | + if (lexp != lgot) | |
375 | + abort (); | |
376 | + | |
377 | + /* BINOP = - */ | |
378 | + igot = 32; | |
379 | + iexp = 0; | |
380 | + | |
381 | +#pragma acc data copy (igot, idata[0:N]) | |
382 | + { | |
383 | +#pragma acc parallel loop | |
384 | + for (i = 0; i < N; i++) | |
385 | + { | |
386 | + int expr = 1; | |
387 | + | |
388 | +#pragma acc atomic capture | |
389 | + { idata[i] = igot; igot -= expr; } | |
390 | + } | |
391 | + } | |
392 | + | |
393 | + if (iexp != igot) | |
394 | + abort (); | |
395 | + | |
396 | + igot = 32; | |
397 | + iexp = 0; | |
398 | + | |
399 | +#pragma acc data copy (igot, idata[0:N]) | |
400 | + { | |
401 | +#pragma acc parallel loop | |
402 | + for (i = 0; i < N; i++) | |
403 | + { | |
404 | + int expr = 1; | |
405 | + | |
406 | +#pragma acc atomic capture | |
407 | + { igot -= expr; idata[i] = igot; } | |
408 | + } | |
409 | + } | |
410 | + | |
411 | + if (iexp != igot) | |
412 | + abort (); | |
413 | + | |
414 | + igot = 32; | |
415 | + iexp = 0; | |
416 | + | |
417 | +#pragma acc data copy (igot, idata[0:N]) | |
418 | + { | |
419 | +#pragma acc parallel loop | |
420 | + for (i = 0; i < N; i++) | |
421 | + { | |
422 | + int expr = 1; | |
423 | + | |
424 | +#pragma acc atomic capture | |
425 | + { idata[i] = igot; igot = igot - expr; } | |
426 | + } | |
427 | + } | |
428 | + | |
429 | + if (iexp != igot) | |
430 | + abort (); | |
431 | + | |
432 | + igot = 1; | |
433 | + iexp = 1; | |
434 | + | |
435 | +#pragma acc data copy (igot, idata[0:N]) | |
436 | + { | |
437 | +#pragma acc parallel loop | |
438 | + for (i = 0; i < N; i++) | |
439 | + { | |
440 | + int expr = 1; | |
441 | + | |
442 | +#pragma acc atomic capture | |
443 | + { idata[i] = igot; igot = expr - igot; } | |
444 | + } | |
445 | + } | |
446 | + | |
447 | + for (i = 0; i < N; i++) | |
448 | + if (i % 2 == 0) | |
449 | + { | |
450 | + if (idata[i] != 1) | |
451 | + abort (); | |
452 | + } | |
453 | + else | |
454 | + { | |
455 | + if (idata[i] != 0) | |
456 | + abort (); | |
457 | + } | |
458 | + | |
459 | + if (iexp != igot) | |
460 | + abort (); | |
461 | + | |
462 | + igot = 1; | |
463 | + iexp = -31; | |
464 | + | |
465 | +#pragma acc data copy (igot, idata[0:N]) | |
466 | + { | |
467 | +#pragma acc parallel loop | |
468 | + for (i = 0; i < N; i++) | |
469 | + { | |
470 | + int expr = 1; | |
471 | + | |
472 | +#pragma acc atomic capture | |
473 | + { igot = igot - expr; idata[i] = igot; } | |
474 | + } | |
475 | + } | |
476 | + | |
477 | + if (iexp != igot) | |
478 | + abort (); | |
479 | + | |
480 | + igot = 1; | |
481 | + iexp = 1; | |
482 | + | |
483 | +#pragma acc data copy (igot, idata[0:N]) | |
484 | + { | |
485 | +#pragma acc parallel loop | |
486 | + for (i = 0; i < N; i++) | |
487 | + { | |
488 | + int expr = 1; | |
489 | + | |
490 | +#pragma acc atomic capture | |
491 | + { igot = expr - igot; idata[i] = igot; } | |
492 | + } | |
493 | + } | |
494 | + | |
495 | + for (i = 0; i < N; i++) | |
496 | + if (i % 2 == 0) | |
497 | + { | |
498 | + if (idata[i] != 0) | |
499 | + abort (); | |
500 | + } | |
501 | + else | |
502 | + { | |
503 | + if (idata[i] != 1) | |
504 | + abort (); | |
505 | + } | |
506 | + | |
507 | + if (iexp != igot) | |
508 | + abort (); | |
509 | + | |
510 | + /* BINOP = / */ | |
511 | + lgot = 1LL << 32; | |
512 | + lexp = 1LL; | |
513 | + | |
514 | +#pragma acc data copy (lgot, ldata[0:N]) | |
515 | + { | |
516 | +#pragma acc parallel loop | |
517 | + for (i = 0; i < N; i++) | |
518 | + { | |
519 | + long long expr = 2LL; | |
520 | + | |
521 | +#pragma acc atomic capture | |
522 | + { ldata[i] = lgot; lgot /= expr; } | |
523 | + } | |
524 | + } | |
525 | + | |
526 | + if (lexp != lgot) | |
527 | + abort (); | |
528 | + | |
529 | + lgot = 1LL << 32; | |
530 | + lexp = 1LL; | |
531 | + | |
532 | +#pragma acc data copy (lgot, ldata[0:N]) | |
533 | + { | |
534 | +#pragma acc parallel loop | |
535 | + for (i = 0; i < N; i++) | |
536 | + { | |
537 | + long long expr = 2LL; | |
538 | + | |
539 | +#pragma acc atomic capture | |
540 | + { lgot /= expr; ldata[i] = lgot; } | |
541 | + } | |
542 | + } | |
543 | + | |
544 | + if (lexp != lgot) | |
545 | + abort (); | |
546 | + | |
547 | + lgot = 1LL << 32; | |
548 | + lexp = 1LL; | |
549 | + | |
550 | +#pragma acc data copy (lgot, ldata[0:N]) | |
551 | + { | |
552 | +#pragma acc parallel loop | |
553 | + for (i = 0; i < N; i++) | |
554 | + { | |
555 | + long long expr = 2LL; | |
556 | + | |
557 | +#pragma acc atomic capture | |
558 | + { ldata[i] = lgot; lgot = lgot / expr; } | |
559 | + } | |
560 | + } | |
561 | + | |
562 | + if (lexp != lgot) | |
563 | + abort (); | |
564 | + | |
565 | + lgot = 2LL; | |
566 | + lexp = 2LL; | |
567 | + | |
568 | +#pragma acc data copy (lgot, ldata[0:N]) | |
569 | + { | |
570 | +#pragma acc parallel loop | |
571 | + for (i = 0; i < N; i++) | |
572 | + { | |
573 | + long long expr = 1LL << N; | |
574 | + | |
575 | +#pragma acc atomic capture | |
576 | + { ldata[i] = lgot; lgot = expr / lgot; } | |
577 | + } | |
578 | + } | |
579 | + | |
580 | + if (lexp != lgot) | |
581 | + abort (); | |
582 | + | |
583 | + lgot = 2LL; | |
584 | + lexp = 2LL; | |
585 | + | |
586 | +#pragma acc data copy (lgot, ldata[0:N]) | |
587 | + { | |
588 | +#pragma acc parallel loop | |
589 | + for (i = 0; i < N; i++) | |
590 | + { | |
591 | + long long expr = 1LL << N; | |
592 | + | |
593 | +#pragma acc atomic capture | |
594 | + { lgot = lgot / expr; ldata[i] = lgot; } | |
595 | + } | |
596 | + } | |
597 | + | |
598 | + if (lexp != lgot) | |
599 | + abort (); | |
600 | + | |
601 | + lgot = 2LL; | |
602 | + lexp = 2LL; | |
603 | + | |
604 | +#pragma acc data copy (lgot, ldata[0:N]) | |
605 | + { | |
606 | +#pragma acc parallel loop | |
607 | + for (i = 0; i < N; i++) | |
608 | + { | |
609 | + long long expr = 1LL << N; | |
610 | + | |
611 | +#pragma acc atomic capture | |
612 | + { lgot = expr / lgot; ldata[i] = lgot; } | |
613 | + } | |
614 | + } | |
615 | + | |
616 | + if (lexp != lgot) | |
617 | + abort (); | |
618 | + | |
619 | + /* BINOP = & */ | |
620 | + lgot = ~0LL; | |
621 | + lexp = 0LL; | |
622 | + | |
623 | +#pragma acc data copy (lgot, ldata[0:N]) | |
624 | + { | |
625 | +#pragma acc parallel loop | |
626 | + for (i = 0; i < N; i++) | |
627 | + { | |
628 | + long long expr = ~(1 << i); | |
629 | + | |
630 | +#pragma acc atomic capture | |
631 | + { ldata[i] = lgot; lgot &= expr; } | |
632 | + } | |
633 | + } | |
634 | + | |
635 | + if (lexp != lgot) | |
636 | + abort (); | |
637 | + | |
638 | + lgot = ~0LL; | |
639 | + iexp = 0LL; | |
640 | + | |
641 | +#pragma acc data copy (lgot, ldata[0:N]) | |
642 | + { | |
643 | +#pragma acc parallel loop | |
644 | + for (i = 0; i < N; i++) | |
645 | + { | |
646 | + long long expr = ~(1 << i); | |
647 | + | |
648 | +#pragma acc atomic capture | |
649 | + { lgot &= expr; ldata[i] = lgot; } | |
650 | + } | |
651 | + } | |
652 | + | |
653 | + if (lexp != lgot) | |
654 | + abort (); | |
655 | + | |
656 | + lgot = ~0LL; | |
657 | + lexp = 0LL; | |
658 | + | |
659 | +#pragma acc data copy (lgot, ldata[0:N]) | |
660 | + { | |
661 | +#pragma acc parallel loop | |
662 | + for (i = 0; i < N; i++) | |
663 | + { | |
664 | + long long expr = ~(1 << i); | |
665 | + | |
666 | +#pragma acc atomic capture | |
667 | + { ldata[i] = lgot; lgot = lgot & expr; } | |
668 | + } | |
669 | + } | |
670 | + | |
671 | + if (lexp != lgot) | |
672 | + abort (); | |
673 | + | |
674 | + lgot = ~0LL; | |
675 | + lexp = 0LL; | |
676 | + | |
677 | +#pragma acc data copy (lgot, ldata[0:N]) | |
678 | + { | |
679 | +#pragma acc parallel loop | |
680 | + for (i = 0; i < N; i++) | |
681 | + { | |
682 | + long long expr = ~(1 << i); | |
683 | + | |
684 | +#pragma acc atomic capture | |
685 | + { ldata[i] = lgot; lgot = expr & lgot; } | |
686 | + } | |
687 | + } | |
688 | + | |
689 | + if (lexp != lgot) | |
690 | + abort (); | |
691 | + | |
692 | + lgot = ~0LL; | |
693 | + iexp = 0LL; | |
694 | + | |
695 | +#pragma acc data copy (lgot, ldata[0:N]) | |
696 | + { | |
697 | +#pragma acc parallel loop | |
698 | + for (i = 0; i < N; i++) | |
699 | + { | |
700 | + long long expr = ~(1 << i); | |
701 | + | |
702 | +#pragma acc atomic capture | |
703 | + { lgot = lgot & expr; ldata[i] = lgot; } | |
704 | + } | |
705 | + } | |
706 | + | |
707 | + if (lexp != lgot) | |
708 | + abort (); | |
709 | + | |
710 | + lgot = ~0LL; | |
711 | + lexp = 0LL; | |
712 | + | |
713 | +#pragma acc data copy (lgot, ldata[0:N]) | |
714 | + { | |
715 | +#pragma acc parallel loop | |
716 | + for (i = 0; i < N; i++) | |
717 | + { | |
718 | + long long expr = ~(1 << i); | |
719 | + | |
720 | +#pragma acc atomic capture | |
721 | + { lgot = expr & lgot; ldata[i] = lgot; } | |
722 | + } | |
723 | + } | |
724 | + | |
725 | + if (lexp != lgot) | |
726 | + abort (); | |
727 | + | |
728 | + /* BINOP = ^ */ | |
729 | + lgot = ~0LL; | |
730 | + lexp = 0LL; | |
731 | + | |
732 | +#pragma acc data copy (lgot, ldata[0:N]) | |
733 | + { | |
734 | +#pragma acc parallel loop | |
735 | + for (i = 0; i < N; i++) | |
736 | + { | |
737 | + long long expr = 1 << i; | |
738 | + | |
739 | +#pragma acc atomic capture | |
740 | + { ldata[i] = lgot; lgot ^= expr; } | |
741 | + } | |
742 | + } | |
743 | + | |
744 | + if (lexp != lgot) | |
745 | + abort (); | |
746 | + | |
747 | + lgot = ~0LL; | |
748 | + iexp = 0LL; | |
749 | + | |
750 | +#pragma acc data copy (lgot, ldata[0:N]) | |
751 | + { | |
752 | +#pragma acc parallel loop | |
753 | + for (i = 0; i < N; i++) | |
754 | + { | |
755 | + long long expr = ~(1 << i); | |
756 | + | |
757 | +#pragma acc atomic capture | |
758 | + { lgot ^= expr; ldata[i] = lgot; } | |
759 | + } | |
760 | + } | |
761 | + | |
762 | + if (lexp != lgot) | |
763 | + abort (); | |
764 | + | |
765 | + lgot = ~0LL; | |
766 | + lexp = 0LL; | |
767 | + | |
768 | +#pragma acc data copy (lgot, ldata[0:N]) | |
769 | + { | |
770 | +#pragma acc parallel loop | |
771 | + for (i = 0; i < N; i++) | |
772 | + { | |
773 | + long long expr = ~(1 << i); | |
774 | + | |
775 | +#pragma acc atomic capture | |
776 | + { ldata[i] = lgot; lgot = lgot ^ expr; } | |
777 | + } | |
778 | + } | |
779 | + | |
780 | + if (lexp != lgot) | |
781 | + abort (); | |
782 | + | |
783 | + lgot = ~0LL; | |
784 | + lexp = 0LL; | |
785 | + | |
786 | +#pragma acc data copy (lgot, ldata[0:N]) | |
787 | + { | |
788 | +#pragma acc parallel loop | |
789 | + for (i = 0; i < N; i++) | |
790 | + { | |
791 | + long long expr = ~(1 << i); | |
792 | + | |
793 | +#pragma acc atomic capture | |
794 | + { ldata[i] = lgot; lgot = expr ^ lgot; } | |
795 | + } | |
796 | + } | |
797 | + | |
798 | + if (lexp != lgot) | |
799 | + abort (); | |
800 | + | |
801 | + lgot = ~0LL; | |
802 | + iexp = 0LL; | |
803 | + | |
804 | +#pragma acc data copy (lgot, ldata[0:N]) | |
805 | + { | |
806 | +#pragma acc parallel loop | |
807 | + for (i = 0; i < N; i++) | |
808 | + { | |
809 | + long long expr = ~(1 << i); | |
810 | + | |
811 | +#pragma acc atomic capture | |
812 | + { lgot = lgot ^ expr; ldata[i] = lgot; } | |
813 | + } | |
814 | + } | |
815 | + | |
816 | + if (lexp != lgot) | |
817 | + abort (); | |
818 | + | |
819 | + lgot = ~0LL; | |
820 | + lexp = 0LL; | |
821 | + | |
822 | +#pragma acc data copy (lgot, ldata[0:N]) | |
823 | + { | |
824 | +#pragma acc parallel loop | |
825 | + for (i = 0; i < N; i++) | |
826 | + { | |
827 | + long long expr = ~(1 << i); | |
828 | + | |
829 | +#pragma acc atomic capture | |
830 | + { lgot = expr ^ lgot; ldata[i] = lgot; } | |
831 | + } | |
832 | + } | |
833 | + | |
834 | + if (lexp != lgot) | |
835 | + abort (); | |
836 | + | |
837 | + /* BINOP = | */ | |
838 | + lgot = 0LL; | |
839 | + lexp = ~0LL; | |
840 | + | |
841 | +#pragma acc data copy (lgot, ldata[0:N]) | |
842 | + { | |
843 | +#pragma acc parallel loop | |
844 | + for (i = 0; i < N; i++) | |
845 | + { | |
846 | + long long expr = 1 << i; | |
847 | + | |
848 | +#pragma acc atomic capture | |
849 | + { ldata[i] = lgot; lgot |= expr; } | |
850 | + } | |
851 | + } | |
852 | + | |
853 | + if (lexp != lgot) | |
854 | + abort (); | |
855 | + | |
856 | + lgot = 0LL; | |
857 | + iexp = ~0LL; | |
858 | + | |
859 | +#pragma acc data copy (lgot, ldata[0:N]) | |
860 | + { | |
861 | +#pragma acc parallel loop | |
862 | + for (i = 0; i < N; i++) | |
863 | + { | |
864 | + long long expr = ~(1 << i); | |
865 | + | |
866 | +#pragma acc atomic capture | |
867 | + { lgot |= expr; ldata[i] = lgot; } | |
868 | + } | |
869 | + } | |
870 | + | |
871 | + if (lexp != lgot) | |
872 | + abort (); | |
873 | + | |
874 | + lgot = 0LL; | |
875 | + lexp = ~0LL; | |
876 | + | |
877 | +#pragma acc data copy (lgot, ldata[0:N]) | |
878 | + { | |
879 | +#pragma acc parallel loop | |
880 | + for (i = 0; i < N; i++) | |
881 | + { | |
882 | + long long expr = ~(1 << i); | |
883 | + | |
884 | +#pragma acc atomic capture | |
885 | + { ldata[i] = lgot; lgot = lgot | expr; } | |
886 | + } | |
887 | + } | |
888 | + | |
889 | + if (lexp != lgot) | |
890 | + abort (); | |
891 | + | |
892 | + lgot = 0LL; | |
893 | + lexp = ~0LL; | |
894 | + | |
895 | +#pragma acc data copy (lgot, ldata[0:N]) | |
896 | + { | |
897 | +#pragma acc parallel loop | |
898 | + for (i = 0; i < N; i++) | |
899 | + { | |
900 | + long long expr = ~(1 << i); | |
901 | + | |
902 | +#pragma acc atomic capture | |
903 | + { ldata[i] = lgot; lgot = expr | lgot; } | |
904 | + } | |
905 | + } | |
906 | + | |
907 | + if (lexp != lgot) | |
908 | + abort (); | |
909 | + | |
910 | + lgot = 0LL; | |
911 | + iexp = ~0LL; | |
912 | + | |
913 | +#pragma acc data copy (lgot, ldata[0:N]) | |
914 | + { | |
915 | +#pragma acc parallel loop | |
916 | + for (i = 0; i < N; i++) | |
917 | + { | |
918 | + long long expr = ~(1 << i); | |
919 | + | |
920 | +#pragma acc atomic capture | |
921 | + { lgot = lgot | expr; ldata[i] = lgot; } | |
922 | + } | |
923 | + } | |
924 | + | |
925 | + if (lexp != lgot) | |
926 | + abort (); | |
927 | + | |
928 | + lgot = 0LL; | |
929 | + lexp = ~0LL; | |
930 | + | |
931 | +#pragma acc data copy (lgot, ldata[0:N]) | |
932 | + { | |
933 | +#pragma acc parallel loop | |
934 | + for (i = 0; i < N; i++) | |
935 | + { | |
936 | + long long expr = ~(1 << i); | |
937 | + | |
938 | +#pragma acc atomic capture | |
939 | + { lgot = expr | lgot; ldata[i] = lgot; } | |
940 | + } | |
941 | + } | |
942 | + | |
943 | + if (lexp != lgot) | |
944 | + abort (); | |
945 | + | |
946 | + /* BINOP = << */ | |
947 | + lgot = 1LL; | |
948 | + lexp = 1LL << N; | |
949 | + | |
950 | +#pragma acc data copy (lgot, ldata[0:N]) | |
951 | + { | |
952 | +#pragma acc parallel loop | |
953 | + for (i = 0; i < N; i++) | |
954 | + { | |
955 | + long long expr = 1LL; | |
956 | + | |
957 | +#pragma acc atomic capture | |
958 | + { ldata[i] = lgot; lgot <<= expr; } | |
959 | + } | |
960 | + } | |
961 | + | |
962 | + if (lexp != lgot) | |
963 | + abort (); | |
964 | + | |
965 | + lgot = 1LL; | |
966 | + iexp = 1LL << N; | |
967 | + | |
968 | +#pragma acc data copy (lgot, ldata[0:N]) | |
969 | + { | |
970 | +#pragma acc parallel loop | |
971 | + for (i = 0; i < N; i++) | |
972 | + { | |
973 | + long long expr = 1LL; | |
974 | + | |
975 | +#pragma acc atomic capture | |
976 | + { lgot <<= expr; ldata[i] = lgot; } | |
977 | + } | |
978 | + } | |
979 | + | |
980 | + if (lexp != lgot) | |
981 | + abort (); | |
982 | + | |
983 | + lgot = 1LL; | |
984 | + lexp = 1LL << N; | |
985 | + | |
986 | +#pragma acc data copy (lgot, ldata[0:N]) | |
987 | + { | |
988 | +#pragma acc parallel loop | |
989 | + for (i = 0; i < N; i++) | |
990 | + { | |
991 | + long long expr = 1LL; | |
992 | + | |
993 | +#pragma acc atomic capture | |
994 | + { ldata[i] = lgot; lgot = lgot << expr; } | |
995 | + } | |
996 | + } | |
997 | + | |
998 | + if (lexp != lgot) | |
999 | + abort (); | |
1000 | + | |
1001 | + lgot = 1LL; | |
1002 | + lexp = 2LL; | |
1003 | + | |
1004 | +#pragma acc data copy (lgot, ldata[0:N]) | |
1005 | + { | |
1006 | +#pragma acc parallel loop | |
1007 | + for (i = 0; i < 1; i++) | |
1008 | + { | |
1009 | + long long expr = 1LL; | |
1010 | + | |
1011 | +#pragma acc atomic capture | |
1012 | + { ldata[i] = lgot; lgot = expr << lgot; } | |
1013 | + } | |
1014 | + } | |
1015 | + | |
1016 | + if (lexp != lgot) | |
1017 | + abort (); | |
1018 | + | |
1019 | + lgot = 1LL; | |
1020 | + lexp = 2LL; | |
1021 | + | |
1022 | +#pragma acc data copy (lgot, ldata[0:N]) | |
1023 | + { | |
1024 | +#pragma acc parallel loop | |
1025 | + for (i = 0; i < 1; i++) | |
1026 | + { | |
1027 | + long long expr = 1LL; | |
1028 | + | |
1029 | +#pragma acc atomic capture | |
1030 | + { lgot = lgot << expr; ldata[i] = lgot; } | |
1031 | + } | |
1032 | + } | |
1033 | + | |
1034 | + if (lexp != lgot) | |
1035 | + abort (); | |
1036 | + | |
1037 | + lgot = 1LL; | |
1038 | + lexp = 2LL; | |
1039 | + | |
1040 | +#pragma acc data copy (lgot, ldata[0:N]) | |
1041 | + { | |
1042 | +#pragma acc parallel loop | |
1043 | + for (i = 0; i < 1; i++) | |
1044 | + { | |
1045 | + long long expr = 1LL; | |
1046 | + | |
1047 | +#pragma acc atomic capture | |
1048 | + { lgot = expr << lgot; ldata[i] = lgot; } | |
1049 | + } | |
1050 | + } | |
1051 | + | |
1052 | + if (lexp != lgot) | |
1053 | + abort (); | |
1054 | + | |
1055 | + /* BINOP = >> */ | |
1056 | + lgot = 1LL << N; | |
1057 | + lexp = 1LL; | |
1058 | + | |
1059 | +#pragma acc data copy (lgot, ldata[0:N]) | |
1060 | + { | |
1061 | +#pragma acc parallel loop | |
1062 | + for (i = 0; i < N; i++) | |
1063 | + { | |
1064 | + long long expr = 1LL; | |
1065 | + | |
1066 | +#pragma acc atomic capture | |
1067 | + { ldata[i] = lgot; lgot >>= expr; } | |
1068 | + } | |
1069 | + } | |
1070 | + | |
1071 | + if (lexp != lgot) | |
1072 | + abort (); | |
1073 | + | |
1074 | + lgot = 1LL << N; | |
1075 | + iexp = 1LL; | |
1076 | + | |
1077 | +#pragma acc data copy (lgot, ldata[0:N]) | |
1078 | + { | |
1079 | +#pragma acc parallel loop | |
1080 | + for (i = 0; i < N; i++) | |
1081 | + { | |
1082 | + long long expr = 1LL; | |
1083 | + | |
1084 | +#pragma acc atomic capture | |
1085 | + { lgot >>= expr; ldata[i] = lgot; } | |
1086 | + } | |
1087 | + } | |
1088 | + | |
1089 | + if (lexp != lgot) | |
1090 | + abort (); | |
1091 | + | |
1092 | + lgot = 1LL << N; | |
1093 | + lexp = 1LL; | |
1094 | + | |
1095 | +#pragma acc data copy (lgot, ldata[0:N]) | |
1096 | + { | |
1097 | +#pragma acc parallel loop | |
1098 | + for (i = 0; i < N; i++) | |
1099 | + { | |
1100 | + long long expr = 1LL; | |
1101 | + | |
1102 | +#pragma acc atomic capture | |
1103 | + { ldata[i] = lgot; lgot = lgot >> expr; } | |
1104 | + } | |
1105 | + } | |
1106 | + | |
1107 | + if (lexp != lgot) | |
1108 | + abort (); | |
1109 | + | |
1110 | + lgot = 1LL; | |
1111 | + lexp = 1LL << (N - 1); | |
1112 | + | |
1113 | +#pragma acc data copy (lgot, ldata[0:N]) | |
1114 | + { | |
1115 | +#pragma acc parallel loop | |
1116 | + for (i = 0; i < 1; i++) | |
1117 | + { | |
1118 | + long long expr = 1LL << N; | |
1119 | + | |
1120 | +#pragma acc atomic capture | |
1121 | + { ldata[i] = lgot; lgot = expr >> lgot; } | |
1122 | + } | |
1123 | + } | |
1124 | + | |
1125 | + if (lexp != lgot) | |
1126 | + abort (); | |
1127 | + | |
1128 | + lgot = 1LL << N; | |
1129 | + lexp = 1LL; | |
1130 | + | |
1131 | +#pragma acc data copy (lgot, ldata[0:N]) | |
1132 | + { | |
1133 | +#pragma acc parallel loop | |
1134 | + for (i = 0; i < N; i++) | |
1135 | + { | |
1136 | + long long expr = 1LL; | |
1137 | + | |
1138 | +#pragma acc atomic capture | |
1139 | + { lgot = lgot >> expr; ldata[i] = lgot; } | |
1140 | + } | |
1141 | + } | |
1142 | + | |
1143 | + if (lexp != lgot) | |
1144 | + abort (); | |
1145 | + | |
1146 | + lgot = 1LL; | |
1147 | + lexp = 1LL << (N - 1); | |
1148 | + | |
1149 | +#pragma acc data copy (lgot, ldata[0:N]) | |
1150 | + { | |
1151 | +#pragma acc parallel loop | |
1152 | + for (i = 0; i < 1; i++) | |
1153 | + { | |
1154 | + long long expr = 1LL << N; | |
1155 | + | |
1156 | +#pragma acc atomic capture | |
1157 | + { lgot = expr >> lgot; ldata[i] = lgot; } | |
1158 | + } | |
1159 | + } | |
1160 | + | |
1161 | + if (lexp != lgot) | |
1162 | + abort (); | |
1163 | + | |
1164 | + // FLOAT FLOAT FLOAT | |
1165 | + | |
1166 | + /* BINOP = + */ | |
1167 | + fgot = 0.0; | |
1168 | + fexp = 32.0; | |
1169 | + | |
1170 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1171 | + { | |
1172 | +#pragma acc parallel loop | |
1173 | + for (i = 0; i < N; i++) | |
1174 | + { | |
1175 | + float expr = 1.0; | |
1176 | + | |
1177 | +#pragma acc atomic capture | |
1178 | + { fdata[i] = fgot; fgot += expr; } | |
1179 | + } | |
1180 | + } | |
1181 | + | |
1182 | + if (fexp != fgot) | |
1183 | + abort (); | |
1184 | + | |
1185 | + fgot = 0.0; | |
1186 | + fexp = 32.0; | |
1187 | + | |
1188 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1189 | + { | |
1190 | +#pragma acc parallel loop | |
1191 | + for (i = 0; i < N; i++) | |
1192 | + { | |
1193 | + float expr = 1.0; | |
1194 | + | |
1195 | +#pragma acc atomic capture | |
1196 | + { fgot += expr; fdata[i] = fgot; } | |
1197 | + } | |
1198 | + } | |
1199 | + | |
1200 | + if (fexp != fgot) | |
1201 | + abort (); | |
1202 | + | |
1203 | + fgot = 0.0; | |
1204 | + fexp = 32.0; | |
1205 | + | |
1206 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1207 | + { | |
1208 | +#pragma acc parallel loop | |
1209 | + for (i = 0; i < N; i++) | |
1210 | + { | |
1211 | + float expr = 1.0; | |
1212 | + | |
1213 | +#pragma acc atomic capture | |
1214 | + { idata[i] = fgot; fgot = fgot + expr; } | |
1215 | + } | |
1216 | + } | |
1217 | + | |
1218 | + if (fexp != fgot) | |
1219 | + abort (); | |
1220 | + | |
1221 | + fgot = 0.0; | |
1222 | + fexp = 32.0; | |
1223 | + | |
1224 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1225 | + { | |
1226 | +#pragma acc parallel loop | |
1227 | + for (i = 0; i < N; i++) | |
1228 | + { | |
1229 | + float expr = 1.0; | |
1230 | + | |
1231 | +#pragma acc atomic capture | |
1232 | + { fdata[i] = fgot; fgot = expr + fgot; } | |
1233 | + } | |
1234 | + } | |
1235 | + | |
1236 | + if (fexp != fgot) | |
1237 | + abort (); | |
1238 | + | |
1239 | + fgot = 0.0; | |
1240 | + fexp = 32.0; | |
1241 | + | |
1242 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1243 | + { | |
1244 | +#pragma acc parallel loop | |
1245 | + for (i = 0; i < N; i++) | |
1246 | + { | |
1247 | + float expr = 1.0; | |
1248 | + | |
1249 | +#pragma acc atomic capture | |
1250 | + { fgot = fgot + expr; fdata[i] = fgot; } | |
1251 | + } | |
1252 | + } | |
1253 | + | |
1254 | + if (fexp != fgot) | |
1255 | + abort (); | |
1256 | + | |
1257 | + fgot = 0.0; | |
1258 | + fexp = 32.0; | |
1259 | + | |
1260 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1261 | + { | |
1262 | +#pragma acc parallel loop | |
1263 | + for (i = 0; i < N; i++) | |
1264 | + { | |
1265 | + float expr = 1.0; | |
1266 | + | |
1267 | +#pragma acc atomic capture | |
1268 | + { fgot = expr + fgot; fdata[i] = fgot; } | |
1269 | + } | |
1270 | + } | |
1271 | + | |
1272 | + if (fexp != fgot) | |
1273 | + abort (); | |
1274 | + | |
1275 | + /* BINOP = * */ | |
1276 | + fgot = 1.0; | |
1277 | + fexp = 8192.0*8192.0*64.0; | |
1278 | + | |
1279 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1280 | + { | |
1281 | +#pragma acc parallel loop | |
1282 | + for (i = 0; i < N; i++) | |
1283 | + { | |
1284 | + float expr = 2.0; | |
1285 | + | |
1286 | +#pragma acc atomic capture | |
1287 | + { fdata[i] = fgot; fgot *= expr; } | |
1288 | + } | |
1289 | + } | |
1290 | + | |
1291 | + if (fexp != fgot) | |
1292 | + abort (); | |
1293 | + | |
1294 | + fgot = 1.0; | |
1295 | + fexp = 8192.0*8192.0*64.0; | |
1296 | + | |
1297 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1298 | + { | |
1299 | +#pragma acc parallel loop | |
1300 | + for (i = 0; i < N; i++) | |
1301 | + { | |
1302 | + float expr = 2.0; | |
1303 | + | |
1304 | +#pragma acc atomic capture | |
1305 | + { fgot *= expr; fdata[i] = fgot; } | |
1306 | + } | |
1307 | + } | |
1308 | + | |
1309 | + if (fexp != fgot) | |
1310 | + abort (); | |
1311 | + | |
1312 | + fgot = 1.0; | |
1313 | + fexp = 8192.0*8192.0*64.0; | |
1314 | + | |
1315 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1316 | + { | |
1317 | +#pragma acc parallel loop | |
1318 | + for (i = 0; i < N; i++) | |
1319 | + { | |
1320 | + float expr = 2.0; | |
1321 | + | |
1322 | +#pragma acc atomic capture | |
1323 | + { fdata[i] = fgot; fgot = fgot * expr; } | |
1324 | + } | |
1325 | + } | |
1326 | + | |
1327 | + if (fexp != fgot) | |
1328 | + abort (); | |
1329 | + | |
1330 | + fgot = 1.0; | |
1331 | + fexp = 8192.0*8192.0*64.0; | |
1332 | + | |
1333 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1334 | + { | |
1335 | +#pragma acc parallel loop | |
1336 | + for (i = 0; i < N; i++) | |
1337 | + { | |
1338 | + float expr = 2.0; | |
1339 | + | |
1340 | +#pragma acc atomic capture | |
1341 | + { fdata[i] = fgot; fgot = expr * fgot; } | |
1342 | + } | |
1343 | + } | |
1344 | + | |
1345 | + if (fexp != fgot) | |
1346 | + abort (); | |
1347 | + | |
1348 | + lgot = 1LL; | |
1349 | + lexp = 1LL << 32; | |
1350 | + | |
1351 | +#pragma acc data copy (lgot, ldata[0:N]) | |
1352 | + { | |
1353 | +#pragma acc parallel loop | |
1354 | + for (i = 0; i < N; i++) | |
1355 | + { | |
1356 | + long long expr = 2LL; | |
1357 | + | |
1358 | +#pragma acc atomic capture | |
1359 | + { lgot = lgot * expr; ldata[i] = lgot; } | |
1360 | + } | |
1361 | + } | |
1362 | + | |
1363 | + if (lexp != lgot) | |
1364 | + abort (); | |
1365 | + | |
1366 | + fgot = 1.0; | |
1367 | + fexp = 8192.0*8192.0*64.0; | |
1368 | + | |
1369 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1370 | + { | |
1371 | +#pragma acc parallel loop | |
1372 | + for (i = 0; i < N; i++) | |
1373 | + { | |
1374 | + long long expr = 2; | |
1375 | + | |
1376 | +#pragma acc atomic capture | |
1377 | + { fgot = expr * fgot; fdata[i] = fgot; } | |
1378 | + } | |
1379 | + } | |
1380 | + | |
1381 | + if (fexp != fgot) | |
1382 | + abort (); | |
1383 | + | |
1384 | + /* BINOP = - */ | |
1385 | + fgot = 32.0; | |
1386 | + fexp = 0.0; | |
1387 | + | |
1388 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1389 | + { | |
1390 | +#pragma acc parallel loop | |
1391 | + for (i = 0; i < N; i++) | |
1392 | + { | |
1393 | + float expr = 1.0; | |
1394 | + | |
1395 | +#pragma acc atomic capture | |
1396 | + { fdata[i] = fgot; fgot -= expr; } | |
1397 | + } | |
1398 | + } | |
1399 | + | |
1400 | + if (fexp != fgot) | |
1401 | + abort (); | |
1402 | + | |
1403 | + fgot = 32.0; | |
1404 | + fexp = 0.0; | |
1405 | + | |
1406 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1407 | + { | |
1408 | +#pragma acc parallel loop | |
1409 | + for (i = 0; i < N; i++) | |
1410 | + { | |
1411 | + float expr = 1.0; | |
1412 | + | |
1413 | +#pragma acc atomic capture | |
1414 | + { fgot -= expr; fdata[i] = fgot; } | |
1415 | + } | |
1416 | + } | |
1417 | + | |
1418 | + if (fexp != fgot) | |
1419 | + abort (); | |
1420 | + | |
1421 | + fgot = 32.0; | |
1422 | + fexp = 0.0; | |
1423 | + | |
1424 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1425 | + { | |
1426 | +#pragma acc parallel loop | |
1427 | + for (i = 0; i < N; i++) | |
1428 | + { | |
1429 | + float expr = 1.0; | |
1430 | + | |
1431 | +#pragma acc atomic capture | |
1432 | + { fdata[i] = fgot; fgot = fgot - expr; } | |
1433 | + } | |
1434 | + } | |
1435 | + | |
1436 | + if (fexp != fgot) | |
1437 | + abort (); | |
1438 | + | |
1439 | + fgot = 1.0; | |
1440 | + fexp = 1.0; | |
1441 | + | |
1442 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1443 | + { | |
1444 | +#pragma acc parallel loop | |
1445 | + for (i = 0; i < N; i++) | |
1446 | + { | |
1447 | + float expr = 1.0; | |
1448 | + | |
1449 | +#pragma acc atomic capture | |
1450 | + { fdata[i] = fgot; fgot = expr - fgot; } | |
1451 | + } | |
1452 | + } | |
1453 | + | |
1454 | + for (i = 0; i < N; i++) | |
1455 | + if (i % 2 == 0) | |
1456 | + { | |
1457 | + if (fdata[i] != 1.0) | |
1458 | + abort (); | |
1459 | + } | |
1460 | + else | |
1461 | + { | |
1462 | + if (fdata[i] != 0.0) | |
1463 | + abort (); | |
1464 | + } | |
1465 | + | |
1466 | + if (fexp != fgot) | |
1467 | + abort (); | |
1468 | + | |
1469 | + fgot = 1.0; | |
1470 | + fexp = -31.0; | |
1471 | + | |
1472 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1473 | + { | |
1474 | +#pragma acc parallel loop | |
1475 | + for (i = 0; i < N; i++) | |
1476 | + { | |
1477 | + float expr = 1.0; | |
1478 | + | |
1479 | +#pragma acc atomic capture | |
1480 | + { fgot = fgot - expr; fdata[i] = fgot; } | |
1481 | + } | |
1482 | + } | |
1483 | + | |
1484 | + if (fexp != fgot) | |
1485 | + abort (); | |
1486 | + | |
1487 | + fgot = 1.0; | |
1488 | + fexp = 1.0; | |
1489 | + | |
1490 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1491 | + { | |
1492 | +#pragma acc parallel loop | |
1493 | + for (i = 0; i < N; i++) | |
1494 | + { | |
1495 | + float expr = 1.0; | |
1496 | + | |
1497 | +#pragma acc atomic capture | |
1498 | + { fgot = expr - fgot; fdata[i] = fgot; } | |
1499 | + } | |
1500 | + } | |
1501 | + | |
1502 | + for (i = 0; i < N; i++) | |
1503 | + if (i % 2 == 0) | |
1504 | + { | |
1505 | + if (fdata[i] != 0.0) | |
1506 | + abort (); | |
1507 | + } | |
1508 | + else | |
1509 | + { | |
1510 | + if (fdata[i] != 1.0) | |
1511 | + abort (); | |
1512 | + } | |
1513 | + | |
1514 | + if (fexp != fgot) | |
1515 | + abort (); | |
1516 | + | |
1517 | + /* BINOP = / */ | |
1518 | + fgot = 8192.0*8192.0*64.0; | |
1519 | + fexp = 1.0; | |
1520 | + | |
1521 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1522 | + { | |
1523 | +#pragma acc parallel loop | |
1524 | + for (i = 0; i < N; i++) | |
1525 | + { | |
1526 | + float expr = 2.0; | |
1527 | + | |
1528 | +#pragma acc atomic capture | |
1529 | + { fdata[i] = fgot; fgot /= expr; } | |
1530 | + } | |
1531 | + } | |
1532 | + | |
1533 | + if (fexp != fgot) | |
1534 | + abort (); | |
1535 | + | |
1536 | + fgot = 8192.0*8192.0*64.0; | |
1537 | + fexp = 1.0; | |
1538 | + | |
1539 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1540 | + { | |
1541 | +#pragma acc parallel loop | |
1542 | + for (i = 0; i < N; i++) | |
1543 | + { | |
1544 | + float expr = 2.0; | |
1545 | + | |
1546 | +#pragma acc atomic capture | |
1547 | + { fgot /= expr; fdata[i] = fgot; } | |
1548 | + } | |
1549 | + } | |
1550 | + | |
1551 | + if (fexp != fgot) | |
1552 | + abort (); | |
1553 | + | |
1554 | + fgot = 8192.0*8192.0*64.0; | |
1555 | + fexp = 1.0; | |
1556 | + | |
1557 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1558 | + { | |
1559 | +#pragma acc parallel loop | |
1560 | + for (i = 0; i < N; i++) | |
1561 | + { | |
1562 | + float expr = 2.0; | |
1563 | + | |
1564 | +#pragma acc atomic capture | |
1565 | + { fdata[i] = fgot; fgot = fgot / expr; } | |
1566 | + } | |
1567 | + } | |
1568 | + | |
1569 | + if (fexp != fgot) | |
1570 | + abort (); | |
1571 | + | |
1572 | + fgot = 8192.0*8192.0*64.0; | |
1573 | + fexp = 1.0; | |
1574 | + | |
1575 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1576 | + { | |
1577 | +#pragma acc parallel loop | |
1578 | + for (i = 0; i < N; i++) | |
1579 | + { | |
1580 | + float expr = 1.0; | |
1581 | + | |
1582 | +#pragma acc atomic capture | |
1583 | + { fdata[i] = fgot; fgot = expr / fgot; } | |
1584 | + } | |
1585 | + } | |
1586 | + | |
1587 | + if (fexp != fgot) | |
1588 | + abort (); | |
1589 | + | |
1590 | + fgot = 4.0; | |
1591 | + fexp = 4.0; | |
1592 | + | |
1593 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1594 | + { | |
1595 | +#pragma acc parallel loop | |
1596 | + for (i = 0; i < N; i++) | |
1597 | + { | |
1598 | + long long expr = 1LL << N; | |
1599 | + | |
1600 | +#pragma acc atomic capture | |
1601 | + { fgot = fgot / expr; fdata[i] = fgot; } | |
1602 | + } | |
1603 | + } | |
1604 | + | |
1605 | + if (fexp != fgot) | |
1606 | + abort (); | |
1607 | + | |
1608 | + fgot = 4.0; | |
1609 | + fexp = 4.0; | |
1610 | + | |
1611 | +#pragma acc data copy (fgot, fdata[0:N]) | |
1612 | + { | |
1613 | +#pragma acc parallel loop | |
1614 | + for (i = 0; i < N; i++) | |
1615 | + { | |
1616 | + float expr = 2.0; | |
1617 | + | |
1618 | +#pragma acc atomic capture | |
1619 | + { fgot = expr / fgot; fdata[i] = fgot; } | |
1620 | + } | |
1621 | + } | |
1622 | + | |
1623 | + if (fexp != fgot) | |
1624 | + abort (); | |
1625 | + | |
1626 | + return 0; | |
1627 | +} |