summaryrefslogtreecommitdiff
path: root/share/doc/gcc-linaro-aarch64-linux-gnu/html/gcc/X86-Built_002din-Functions.html
blob: b08c5b6150f4f0c6778f2796fe8a38bc27821293 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
<html lang="en">
<head>
<title>X86 Built-in Functions - Using the GNU Compiler Collection (GCC)</title>
<meta http-equiv="Content-Type" content="text/html">
<meta name="description" content="Using the GNU Compiler Collection (GCC)">
<meta name="generator" content="makeinfo 4.13">
<link title="Top" rel="start" href="index.html#Top">
<link rel="up" href="Target-Builtins.html#Target-Builtins" title="Target Builtins">
<link rel="prev" href="FR_002dV-Built_002din-Functions.html#FR_002dV-Built_002din-Functions" title="FR-V Built-in Functions">
<link rel="next" href="MIPS-DSP-Built_002din-Functions.html#MIPS-DSP-Built_002din-Functions" title="MIPS DSP Built-in Functions">
<link href="http://www.gnu.org/software/texinfo/" rel="generator-home" title="Texinfo Homepage">
<!--
Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997,
1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
2010 Free Software Foundation, Inc.

Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.3 or
any later version published by the Free Software Foundation; with the
Invariant Sections being ``Funding Free Software'', the Front-Cover
Texts being (a) (see below), and with the Back-Cover Texts being (b)
(see below).  A copy of the license is included in the section entitled
``GNU Free Documentation License''.

(a) The FSF's Front-Cover Text is:

     A GNU Manual

(b) The FSF's Back-Cover Text is:

     You have freedom to copy and modify this GNU Manual, like GNU
     software.  Copies published by the Free Software Foundation raise
     funds for GNU development.-->
<meta http-equiv="Content-Style-Type" content="text/css">
<style type="text/css"><!--
  pre.display { font-family:inherit }
  pre.format  { font-family:inherit }
  pre.smalldisplay { font-family:inherit; font-size:smaller }
  pre.smallformat  { font-family:inherit; font-size:smaller }
  pre.smallexample { font-size:smaller }
  pre.smalllisp    { font-size:smaller }
  span.sc    { font-variant:small-caps }
  span.roman { font-family:serif; font-weight:normal; } 
  span.sansserif { font-family:sans-serif; font-weight:normal; } 
--></style>
</head>
<body>
<div class="node">
<a name="X86-Built-in-Functions"></a>
<a name="X86-Built_002din-Functions"></a>
<p>
Next:&nbsp;<a rel="next" accesskey="n" href="MIPS-DSP-Built_002din-Functions.html#MIPS-DSP-Built_002din-Functions">MIPS DSP Built-in Functions</a>,
Previous:&nbsp;<a rel="previous" accesskey="p" href="FR_002dV-Built_002din-Functions.html#FR_002dV-Built_002din-Functions">FR-V Built-in Functions</a>,
Up:&nbsp;<a rel="up" accesskey="u" href="Target-Builtins.html#Target-Builtins">Target Builtins</a>
<hr>
</div>

<h4 class="subsection">6.55.7 X86 Built-in Functions</h4>

<p>These built-in functions are available for the i386 and x86-64 family
of computers, depending on the command-line switches used.

 <p>Note that, if you specify command-line switches such as <samp><span class="option">-msse</span></samp>,
the compiler could use the extended instruction sets even if the built-ins
are not used explicitly in the program.  For this reason, applications
which perform runtime CPU detection must compile separate files for each
supported architecture, using the appropriate flags.  In particular,
the file containing the CPU detection code should be compiled without
these options.

 <p>The following machine modes are available for use with MMX built-in functions
(see <a href="Vector-Extensions.html#Vector-Extensions">Vector Extensions</a>): <code>V2SI</code> for a vector of two 32-bit integers,
<code>V4HI</code> for a vector of four 16-bit integers, and <code>V8QI</code> for a
vector of eight 8-bit integers.  Some of the built-in functions operate on
MMX registers as a whole 64-bit entity, these use <code>V1DI</code> as their mode.

 <p>If 3DNow! extensions are enabled, <code>V2SF</code> is used as a mode for a vector
of two 32-bit floating point values.

 <p>If SSE extensions are enabled, <code>V4SF</code> is used for a vector of four 32-bit
floating point values.  Some instructions use a vector of four 32-bit
integers, these use <code>V4SI</code>.  Finally, some instructions operate on an
entire vector register, interpreting it as a 128-bit integer, these use mode
<code>TI</code>.

 <p>In 64-bit mode, the x86-64 family of processors uses additional built-in
functions for efficient use of <code>TF</code> (<code>__float128</code>) 128-bit
floating point and <code>TC</code> 128-bit complex floating point values.

 <p>The following floating point built-in functions are available in 64-bit
mode.  All of them implement the function that is part of the name.

<pre class="smallexample">     __float128 __builtin_fabsq (__float128)
     __float128 __builtin_copysignq (__float128, __float128)
</pre>
 <p>The following built-in function is always available.

     <dl>
<dt><code>void __builtin_ia32_pause (void)</code><dd>Generates the <code>pause</code> machine instruction with a compiler memory
barrier. 
</dl>

 <p>The following floating point built-in functions are made available in the
64-bit mode.

     <dl>
<dt><code>__float128 __builtin_infq (void)</code><dd>Similar to <code>__builtin_inf</code>, except the return type is <code>__float128</code>. 
<a name="index-g_t_005f_005fbuiltin_005finfq-3327"></a>
<br><dt><code>__float128 __builtin_huge_valq (void)</code><dd>Similar to <code>__builtin_huge_val</code>, except the return type is <code>__float128</code>. 
<a name="index-g_t_005f_005fbuiltin_005fhuge_005fvalq-3328"></a></dl>

 <p>The following built-in functions are made available by <samp><span class="option">-mmmx</span></samp>. 
All of them generate the machine instruction that is part of the name.

<pre class="smallexample">     v8qi __builtin_ia32_paddb (v8qi, v8qi)
     v4hi __builtin_ia32_paddw (v4hi, v4hi)
     v2si __builtin_ia32_paddd (v2si, v2si)
     v8qi __builtin_ia32_psubb (v8qi, v8qi)
     v4hi __builtin_ia32_psubw (v4hi, v4hi)
     v2si __builtin_ia32_psubd (v2si, v2si)
     v8qi __builtin_ia32_paddsb (v8qi, v8qi)
     v4hi __builtin_ia32_paddsw (v4hi, v4hi)
     v8qi __builtin_ia32_psubsb (v8qi, v8qi)
     v4hi __builtin_ia32_psubsw (v4hi, v4hi)
     v8qi __builtin_ia32_paddusb (v8qi, v8qi)
     v4hi __builtin_ia32_paddusw (v4hi, v4hi)
     v8qi __builtin_ia32_psubusb (v8qi, v8qi)
     v4hi __builtin_ia32_psubusw (v4hi, v4hi)
     v4hi __builtin_ia32_pmullw (v4hi, v4hi)
     v4hi __builtin_ia32_pmulhw (v4hi, v4hi)
     di __builtin_ia32_pand (di, di)
     di __builtin_ia32_pandn (di,di)
     di __builtin_ia32_por (di, di)
     di __builtin_ia32_pxor (di, di)
     v8qi __builtin_ia32_pcmpeqb (v8qi, v8qi)
     v4hi __builtin_ia32_pcmpeqw (v4hi, v4hi)
     v2si __builtin_ia32_pcmpeqd (v2si, v2si)
     v8qi __builtin_ia32_pcmpgtb (v8qi, v8qi)
     v4hi __builtin_ia32_pcmpgtw (v4hi, v4hi)
     v2si __builtin_ia32_pcmpgtd (v2si, v2si)
     v8qi __builtin_ia32_punpckhbw (v8qi, v8qi)
     v4hi __builtin_ia32_punpckhwd (v4hi, v4hi)
     v2si __builtin_ia32_punpckhdq (v2si, v2si)
     v8qi __builtin_ia32_punpcklbw (v8qi, v8qi)
     v4hi __builtin_ia32_punpcklwd (v4hi, v4hi)
     v2si __builtin_ia32_punpckldq (v2si, v2si)
     v8qi __builtin_ia32_packsswb (v4hi, v4hi)
     v4hi __builtin_ia32_packssdw (v2si, v2si)
     v8qi __builtin_ia32_packuswb (v4hi, v4hi)
     
     v4hi __builtin_ia32_psllw (v4hi, v4hi)
     v2si __builtin_ia32_pslld (v2si, v2si)
     v1di __builtin_ia32_psllq (v1di, v1di)
     v4hi __builtin_ia32_psrlw (v4hi, v4hi)
     v2si __builtin_ia32_psrld (v2si, v2si)
     v1di __builtin_ia32_psrlq (v1di, v1di)
     v4hi __builtin_ia32_psraw (v4hi, v4hi)
     v2si __builtin_ia32_psrad (v2si, v2si)
     v4hi __builtin_ia32_psllwi (v4hi, int)
     v2si __builtin_ia32_pslldi (v2si, int)
     v1di __builtin_ia32_psllqi (v1di, int)
     v4hi __builtin_ia32_psrlwi (v4hi, int)
     v2si __builtin_ia32_psrldi (v2si, int)
     v1di __builtin_ia32_psrlqi (v1di, int)
     v4hi __builtin_ia32_psrawi (v4hi, int)
     v2si __builtin_ia32_psradi (v2si, int)
     
</pre>
 <p>The following built-in functions are made available either with
<samp><span class="option">-msse</span></samp>, or with a combination of <samp><span class="option">-m3dnow</span></samp> and
<samp><span class="option">-march=athlon</span></samp>.  All of them generate the machine
instruction that is part of the name.

<pre class="smallexample">     v4hi __builtin_ia32_pmulhuw (v4hi, v4hi)
     v8qi __builtin_ia32_pavgb (v8qi, v8qi)
     v4hi __builtin_ia32_pavgw (v4hi, v4hi)
     v1di __builtin_ia32_psadbw (v8qi, v8qi)
     v8qi __builtin_ia32_pmaxub (v8qi, v8qi)
     v4hi __builtin_ia32_pmaxsw (v4hi, v4hi)
     v8qi __builtin_ia32_pminub (v8qi, v8qi)
     v4hi __builtin_ia32_pminsw (v4hi, v4hi)
     int __builtin_ia32_pextrw (v4hi, int)
     v4hi __builtin_ia32_pinsrw (v4hi, int, int)
     int __builtin_ia32_pmovmskb (v8qi)
     void __builtin_ia32_maskmovq (v8qi, v8qi, char *)
     void __builtin_ia32_movntq (di *, di)
     void __builtin_ia32_sfence (void)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-msse</span></samp> is used. 
All of them generate the machine instruction that is part of the name.

<pre class="smallexample">     int __builtin_ia32_comieq (v4sf, v4sf)
     int __builtin_ia32_comineq (v4sf, v4sf)
     int __builtin_ia32_comilt (v4sf, v4sf)
     int __builtin_ia32_comile (v4sf, v4sf)
     int __builtin_ia32_comigt (v4sf, v4sf)
     int __builtin_ia32_comige (v4sf, v4sf)
     int __builtin_ia32_ucomieq (v4sf, v4sf)
     int __builtin_ia32_ucomineq (v4sf, v4sf)
     int __builtin_ia32_ucomilt (v4sf, v4sf)
     int __builtin_ia32_ucomile (v4sf, v4sf)
     int __builtin_ia32_ucomigt (v4sf, v4sf)
     int __builtin_ia32_ucomige (v4sf, v4sf)
     v4sf __builtin_ia32_addps (v4sf, v4sf)
     v4sf __builtin_ia32_subps (v4sf, v4sf)
     v4sf __builtin_ia32_mulps (v4sf, v4sf)
     v4sf __builtin_ia32_divps (v4sf, v4sf)
     v4sf __builtin_ia32_addss (v4sf, v4sf)
     v4sf __builtin_ia32_subss (v4sf, v4sf)
     v4sf __builtin_ia32_mulss (v4sf, v4sf)
     v4sf __builtin_ia32_divss (v4sf, v4sf)
     v4si __builtin_ia32_cmpeqps (v4sf, v4sf)
     v4si __builtin_ia32_cmpltps (v4sf, v4sf)
     v4si __builtin_ia32_cmpleps (v4sf, v4sf)
     v4si __builtin_ia32_cmpgtps (v4sf, v4sf)
     v4si __builtin_ia32_cmpgeps (v4sf, v4sf)
     v4si __builtin_ia32_cmpunordps (v4sf, v4sf)
     v4si __builtin_ia32_cmpneqps (v4sf, v4sf)
     v4si __builtin_ia32_cmpnltps (v4sf, v4sf)
     v4si __builtin_ia32_cmpnleps (v4sf, v4sf)
     v4si __builtin_ia32_cmpngtps (v4sf, v4sf)
     v4si __builtin_ia32_cmpngeps (v4sf, v4sf)
     v4si __builtin_ia32_cmpordps (v4sf, v4sf)
     v4si __builtin_ia32_cmpeqss (v4sf, v4sf)
     v4si __builtin_ia32_cmpltss (v4sf, v4sf)
     v4si __builtin_ia32_cmpless (v4sf, v4sf)
     v4si __builtin_ia32_cmpunordss (v4sf, v4sf)
     v4si __builtin_ia32_cmpneqss (v4sf, v4sf)
     v4si __builtin_ia32_cmpnlts (v4sf, v4sf)
     v4si __builtin_ia32_cmpnless (v4sf, v4sf)
     v4si __builtin_ia32_cmpordss (v4sf, v4sf)
     v4sf __builtin_ia32_maxps (v4sf, v4sf)
     v4sf __builtin_ia32_maxss (v4sf, v4sf)
     v4sf __builtin_ia32_minps (v4sf, v4sf)
     v4sf __builtin_ia32_minss (v4sf, v4sf)
     v4sf __builtin_ia32_andps (v4sf, v4sf)
     v4sf __builtin_ia32_andnps (v4sf, v4sf)
     v4sf __builtin_ia32_orps (v4sf, v4sf)
     v4sf __builtin_ia32_xorps (v4sf, v4sf)
     v4sf __builtin_ia32_movss (v4sf, v4sf)
     v4sf __builtin_ia32_movhlps (v4sf, v4sf)
     v4sf __builtin_ia32_movlhps (v4sf, v4sf)
     v4sf __builtin_ia32_unpckhps (v4sf, v4sf)
     v4sf __builtin_ia32_unpcklps (v4sf, v4sf)
     v4sf __builtin_ia32_cvtpi2ps (v4sf, v2si)
     v4sf __builtin_ia32_cvtsi2ss (v4sf, int)
     v2si __builtin_ia32_cvtps2pi (v4sf)
     int __builtin_ia32_cvtss2si (v4sf)
     v2si __builtin_ia32_cvttps2pi (v4sf)
     int __builtin_ia32_cvttss2si (v4sf)
     v4sf __builtin_ia32_rcpps (v4sf)
     v4sf __builtin_ia32_rsqrtps (v4sf)
     v4sf __builtin_ia32_sqrtps (v4sf)
     v4sf __builtin_ia32_rcpss (v4sf)
     v4sf __builtin_ia32_rsqrtss (v4sf)
     v4sf __builtin_ia32_sqrtss (v4sf)
     v4sf __builtin_ia32_shufps (v4sf, v4sf, int)
     void __builtin_ia32_movntps (float *, v4sf)
     int __builtin_ia32_movmskps (v4sf)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-msse</span></samp> is used.

     <dl>
<dt><code>v4sf __builtin_ia32_loadaps (float *)</code><dd>Generates the <code>movaps</code> machine instruction as a load from memory. 
<br><dt><code>void __builtin_ia32_storeaps (float *, v4sf)</code><dd>Generates the <code>movaps</code> machine instruction as a store to memory. 
<br><dt><code>v4sf __builtin_ia32_loadups (float *)</code><dd>Generates the <code>movups</code> machine instruction as a load from memory. 
<br><dt><code>void __builtin_ia32_storeups (float *, v4sf)</code><dd>Generates the <code>movups</code> machine instruction as a store to memory. 
<br><dt><code>v4sf __builtin_ia32_loadsss (float *)</code><dd>Generates the <code>movss</code> machine instruction as a load from memory. 
<br><dt><code>void __builtin_ia32_storess (float *, v4sf)</code><dd>Generates the <code>movss</code> machine instruction as a store to memory. 
<br><dt><code>v4sf __builtin_ia32_loadhps (v4sf, const v2sf *)</code><dd>Generates the <code>movhps</code> machine instruction as a load from memory. 
<br><dt><code>v4sf __builtin_ia32_loadlps (v4sf, const v2sf *)</code><dd>Generates the <code>movlps</code> machine instruction as a load from memory
<br><dt><code>void __builtin_ia32_storehps (v2sf *, v4sf)</code><dd>Generates the <code>movhps</code> machine instruction as a store to memory. 
<br><dt><code>void __builtin_ia32_storelps (v2sf *, v4sf)</code><dd>Generates the <code>movlps</code> machine instruction as a store to memory. 
</dl>

 <p>The following built-in functions are available when <samp><span class="option">-msse2</span></samp> is used. 
All of them generate the machine instruction that is part of the name.

<pre class="smallexample">     int __builtin_ia32_comisdeq (v2df, v2df)
     int __builtin_ia32_comisdlt (v2df, v2df)
     int __builtin_ia32_comisdle (v2df, v2df)
     int __builtin_ia32_comisdgt (v2df, v2df)
     int __builtin_ia32_comisdge (v2df, v2df)
     int __builtin_ia32_comisdneq (v2df, v2df)
     int __builtin_ia32_ucomisdeq (v2df, v2df)
     int __builtin_ia32_ucomisdlt (v2df, v2df)
     int __builtin_ia32_ucomisdle (v2df, v2df)
     int __builtin_ia32_ucomisdgt (v2df, v2df)
     int __builtin_ia32_ucomisdge (v2df, v2df)
     int __builtin_ia32_ucomisdneq (v2df, v2df)
     v2df __builtin_ia32_cmpeqpd (v2df, v2df)
     v2df __builtin_ia32_cmpltpd (v2df, v2df)
     v2df __builtin_ia32_cmplepd (v2df, v2df)
     v2df __builtin_ia32_cmpgtpd (v2df, v2df)
     v2df __builtin_ia32_cmpgepd (v2df, v2df)
     v2df __builtin_ia32_cmpunordpd (v2df, v2df)
     v2df __builtin_ia32_cmpneqpd (v2df, v2df)
     v2df __builtin_ia32_cmpnltpd (v2df, v2df)
     v2df __builtin_ia32_cmpnlepd (v2df, v2df)
     v2df __builtin_ia32_cmpngtpd (v2df, v2df)
     v2df __builtin_ia32_cmpngepd (v2df, v2df)
     v2df __builtin_ia32_cmpordpd (v2df, v2df)
     v2df __builtin_ia32_cmpeqsd (v2df, v2df)
     v2df __builtin_ia32_cmpltsd (v2df, v2df)
     v2df __builtin_ia32_cmplesd (v2df, v2df)
     v2df __builtin_ia32_cmpunordsd (v2df, v2df)
     v2df __builtin_ia32_cmpneqsd (v2df, v2df)
     v2df __builtin_ia32_cmpnltsd (v2df, v2df)
     v2df __builtin_ia32_cmpnlesd (v2df, v2df)
     v2df __builtin_ia32_cmpordsd (v2df, v2df)
     v2di __builtin_ia32_paddq (v2di, v2di)
     v2di __builtin_ia32_psubq (v2di, v2di)
     v2df __builtin_ia32_addpd (v2df, v2df)
     v2df __builtin_ia32_subpd (v2df, v2df)
     v2df __builtin_ia32_mulpd (v2df, v2df)
     v2df __builtin_ia32_divpd (v2df, v2df)
     v2df __builtin_ia32_addsd (v2df, v2df)
     v2df __builtin_ia32_subsd (v2df, v2df)
     v2df __builtin_ia32_mulsd (v2df, v2df)
     v2df __builtin_ia32_divsd (v2df, v2df)
     v2df __builtin_ia32_minpd (v2df, v2df)
     v2df __builtin_ia32_maxpd (v2df, v2df)
     v2df __builtin_ia32_minsd (v2df, v2df)
     v2df __builtin_ia32_maxsd (v2df, v2df)
     v2df __builtin_ia32_andpd (v2df, v2df)
     v2df __builtin_ia32_andnpd (v2df, v2df)
     v2df __builtin_ia32_orpd (v2df, v2df)
     v2df __builtin_ia32_xorpd (v2df, v2df)
     v2df __builtin_ia32_movsd (v2df, v2df)
     v2df __builtin_ia32_unpckhpd (v2df, v2df)
     v2df __builtin_ia32_unpcklpd (v2df, v2df)
     v16qi __builtin_ia32_paddb128 (v16qi, v16qi)
     v8hi __builtin_ia32_paddw128 (v8hi, v8hi)
     v4si __builtin_ia32_paddd128 (v4si, v4si)
     v2di __builtin_ia32_paddq128 (v2di, v2di)
     v16qi __builtin_ia32_psubb128 (v16qi, v16qi)
     v8hi __builtin_ia32_psubw128 (v8hi, v8hi)
     v4si __builtin_ia32_psubd128 (v4si, v4si)
     v2di __builtin_ia32_psubq128 (v2di, v2di)
     v8hi __builtin_ia32_pmullw128 (v8hi, v8hi)
     v8hi __builtin_ia32_pmulhw128 (v8hi, v8hi)
     v2di __builtin_ia32_pand128 (v2di, v2di)
     v2di __builtin_ia32_pandn128 (v2di, v2di)
     v2di __builtin_ia32_por128 (v2di, v2di)
     v2di __builtin_ia32_pxor128 (v2di, v2di)
     v16qi __builtin_ia32_pavgb128 (v16qi, v16qi)
     v8hi __builtin_ia32_pavgw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pcmpeqb128 (v16qi, v16qi)
     v8hi __builtin_ia32_pcmpeqw128 (v8hi, v8hi)
     v4si __builtin_ia32_pcmpeqd128 (v4si, v4si)
     v16qi __builtin_ia32_pcmpgtb128 (v16qi, v16qi)
     v8hi __builtin_ia32_pcmpgtw128 (v8hi, v8hi)
     v4si __builtin_ia32_pcmpgtd128 (v4si, v4si)
     v16qi __builtin_ia32_pmaxub128 (v16qi, v16qi)
     v8hi __builtin_ia32_pmaxsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pminub128 (v16qi, v16qi)
     v8hi __builtin_ia32_pminsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_punpckhbw128 (v16qi, v16qi)
     v8hi __builtin_ia32_punpckhwd128 (v8hi, v8hi)
     v4si __builtin_ia32_punpckhdq128 (v4si, v4si)
     v2di __builtin_ia32_punpckhqdq128 (v2di, v2di)
     v16qi __builtin_ia32_punpcklbw128 (v16qi, v16qi)
     v8hi __builtin_ia32_punpcklwd128 (v8hi, v8hi)
     v4si __builtin_ia32_punpckldq128 (v4si, v4si)
     v2di __builtin_ia32_punpcklqdq128 (v2di, v2di)
     v16qi __builtin_ia32_packsswb128 (v8hi, v8hi)
     v8hi __builtin_ia32_packssdw128 (v4si, v4si)
     v16qi __builtin_ia32_packuswb128 (v8hi, v8hi)
     v8hi __builtin_ia32_pmulhuw128 (v8hi, v8hi)
     void __builtin_ia32_maskmovdqu (v16qi, v16qi)
     v2df __builtin_ia32_loadupd (double *)
     void __builtin_ia32_storeupd (double *, v2df)
     v2df __builtin_ia32_loadhpd (v2df, double const *)
     v2df __builtin_ia32_loadlpd (v2df, double const *)
     int __builtin_ia32_movmskpd (v2df)
     int __builtin_ia32_pmovmskb128 (v16qi)
     void __builtin_ia32_movnti (int *, int)
     void __builtin_ia32_movnti64 (long long int *, long long int)
     void __builtin_ia32_movntpd (double *, v2df)
     void __builtin_ia32_movntdq (v2df *, v2df)
     v4si __builtin_ia32_pshufd (v4si, int)
     v8hi __builtin_ia32_pshuflw (v8hi, int)
     v8hi __builtin_ia32_pshufhw (v8hi, int)
     v2di __builtin_ia32_psadbw128 (v16qi, v16qi)
     v2df __builtin_ia32_sqrtpd (v2df)
     v2df __builtin_ia32_sqrtsd (v2df)
     v2df __builtin_ia32_shufpd (v2df, v2df, int)
     v2df __builtin_ia32_cvtdq2pd (v4si)
     v4sf __builtin_ia32_cvtdq2ps (v4si)
     v4si __builtin_ia32_cvtpd2dq (v2df)
     v2si __builtin_ia32_cvtpd2pi (v2df)
     v4sf __builtin_ia32_cvtpd2ps (v2df)
     v4si __builtin_ia32_cvttpd2dq (v2df)
     v2si __builtin_ia32_cvttpd2pi (v2df)
     v2df __builtin_ia32_cvtpi2pd (v2si)
     int __builtin_ia32_cvtsd2si (v2df)
     int __builtin_ia32_cvttsd2si (v2df)
     long long __builtin_ia32_cvtsd2si64 (v2df)
     long long __builtin_ia32_cvttsd2si64 (v2df)
     v4si __builtin_ia32_cvtps2dq (v4sf)
     v2df __builtin_ia32_cvtps2pd (v4sf)
     v4si __builtin_ia32_cvttps2dq (v4sf)
     v2df __builtin_ia32_cvtsi2sd (v2df, int)
     v2df __builtin_ia32_cvtsi642sd (v2df, long long)
     v4sf __builtin_ia32_cvtsd2ss (v4sf, v2df)
     v2df __builtin_ia32_cvtss2sd (v2df, v4sf)
     void __builtin_ia32_clflush (const void *)
     void __builtin_ia32_lfence (void)
     void __builtin_ia32_mfence (void)
     v16qi __builtin_ia32_loaddqu (const char *)
     void __builtin_ia32_storedqu (char *, v16qi)
     v1di __builtin_ia32_pmuludq (v2si, v2si)
     v2di __builtin_ia32_pmuludq128 (v4si, v4si)
     v8hi __builtin_ia32_psllw128 (v8hi, v8hi)
     v4si __builtin_ia32_pslld128 (v4si, v4si)
     v2di __builtin_ia32_psllq128 (v2di, v2di)
     v8hi __builtin_ia32_psrlw128 (v8hi, v8hi)
     v4si __builtin_ia32_psrld128 (v4si, v4si)
     v2di __builtin_ia32_psrlq128 (v2di, v2di)
     v8hi __builtin_ia32_psraw128 (v8hi, v8hi)
     v4si __builtin_ia32_psrad128 (v4si, v4si)
     v2di __builtin_ia32_pslldqi128 (v2di, int)
     v8hi __builtin_ia32_psllwi128 (v8hi, int)
     v4si __builtin_ia32_pslldi128 (v4si, int)
     v2di __builtin_ia32_psllqi128 (v2di, int)
     v2di __builtin_ia32_psrldqi128 (v2di, int)
     v8hi __builtin_ia32_psrlwi128 (v8hi, int)
     v4si __builtin_ia32_psrldi128 (v4si, int)
     v2di __builtin_ia32_psrlqi128 (v2di, int)
     v8hi __builtin_ia32_psrawi128 (v8hi, int)
     v4si __builtin_ia32_psradi128 (v4si, int)
     v4si __builtin_ia32_pmaddwd128 (v8hi, v8hi)
     v2di __builtin_ia32_movq128 (v2di)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-msse3</span></samp> is used. 
All of them generate the machine instruction that is part of the name.

<pre class="smallexample">     v2df __builtin_ia32_addsubpd (v2df, v2df)
     v4sf __builtin_ia32_addsubps (v4sf, v4sf)
     v2df __builtin_ia32_haddpd (v2df, v2df)
     v4sf __builtin_ia32_haddps (v4sf, v4sf)
     v2df __builtin_ia32_hsubpd (v2df, v2df)
     v4sf __builtin_ia32_hsubps (v4sf, v4sf)
     v16qi __builtin_ia32_lddqu (char const *)
     void __builtin_ia32_monitor (void *, unsigned int, unsigned int)
     v2df __builtin_ia32_movddup (v2df)
     v4sf __builtin_ia32_movshdup (v4sf)
     v4sf __builtin_ia32_movsldup (v4sf)
     void __builtin_ia32_mwait (unsigned int, unsigned int)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-msse3</span></samp> is used.

     <dl>
<dt><code>v2df __builtin_ia32_loadddup (double const *)</code><dd>Generates the <code>movddup</code> machine instruction as a load from memory. 
</dl>

 <p>The following built-in functions are available when <samp><span class="option">-mssse3</span></samp> is used. 
All of them generate the machine instruction that is part of the name
with MMX registers.

<pre class="smallexample">     v2si __builtin_ia32_phaddd (v2si, v2si)
     v4hi __builtin_ia32_phaddw (v4hi, v4hi)
     v4hi __builtin_ia32_phaddsw (v4hi, v4hi)
     v2si __builtin_ia32_phsubd (v2si, v2si)
     v4hi __builtin_ia32_phsubw (v4hi, v4hi)
     v4hi __builtin_ia32_phsubsw (v4hi, v4hi)
     v4hi __builtin_ia32_pmaddubsw (v8qi, v8qi)
     v4hi __builtin_ia32_pmulhrsw (v4hi, v4hi)
     v8qi __builtin_ia32_pshufb (v8qi, v8qi)
     v8qi __builtin_ia32_psignb (v8qi, v8qi)
     v2si __builtin_ia32_psignd (v2si, v2si)
     v4hi __builtin_ia32_psignw (v4hi, v4hi)
     v1di __builtin_ia32_palignr (v1di, v1di, int)
     v8qi __builtin_ia32_pabsb (v8qi)
     v2si __builtin_ia32_pabsd (v2si)
     v4hi __builtin_ia32_pabsw (v4hi)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-mssse3</span></samp> is used. 
All of them generate the machine instruction that is part of the name
with SSE registers.

<pre class="smallexample">     v4si __builtin_ia32_phaddd128 (v4si, v4si)
     v8hi __builtin_ia32_phaddw128 (v8hi, v8hi)
     v8hi __builtin_ia32_phaddsw128 (v8hi, v8hi)
     v4si __builtin_ia32_phsubd128 (v4si, v4si)
     v8hi __builtin_ia32_phsubw128 (v8hi, v8hi)
     v8hi __builtin_ia32_phsubsw128 (v8hi, v8hi)
     v8hi __builtin_ia32_pmaddubsw128 (v16qi, v16qi)
     v8hi __builtin_ia32_pmulhrsw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pshufb128 (v16qi, v16qi)
     v16qi __builtin_ia32_psignb128 (v16qi, v16qi)
     v4si __builtin_ia32_psignd128 (v4si, v4si)
     v8hi __builtin_ia32_psignw128 (v8hi, v8hi)
     v2di __builtin_ia32_palignr128 (v2di, v2di, int)
     v16qi __builtin_ia32_pabsb128 (v16qi)
     v4si __builtin_ia32_pabsd128 (v4si)
     v8hi __builtin_ia32_pabsw128 (v8hi)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-msse4.1</span></samp> is
used.  All of them generate the machine instruction that is part of the
name.

<pre class="smallexample">     v2df __builtin_ia32_blendpd (v2df, v2df, const int)
     v4sf __builtin_ia32_blendps (v4sf, v4sf, const int)
     v2df __builtin_ia32_blendvpd (v2df, v2df, v2df)
     v4sf __builtin_ia32_blendvps (v4sf, v4sf, v4sf)
     v2df __builtin_ia32_dppd (v2df, v2df, const int)
     v4sf __builtin_ia32_dpps (v4sf, v4sf, const int)
     v4sf __builtin_ia32_insertps128 (v4sf, v4sf, const int)
     v2di __builtin_ia32_movntdqa (v2di *);
     v16qi __builtin_ia32_mpsadbw128 (v16qi, v16qi, const int)
     v8hi __builtin_ia32_packusdw128 (v4si, v4si)
     v16qi __builtin_ia32_pblendvb128 (v16qi, v16qi, v16qi)
     v8hi __builtin_ia32_pblendw128 (v8hi, v8hi, const int)
     v2di __builtin_ia32_pcmpeqq (v2di, v2di)
     v8hi __builtin_ia32_phminposuw128 (v8hi)
     v16qi __builtin_ia32_pmaxsb128 (v16qi, v16qi)
     v4si __builtin_ia32_pmaxsd128 (v4si, v4si)
     v4si __builtin_ia32_pmaxud128 (v4si, v4si)
     v8hi __builtin_ia32_pmaxuw128 (v8hi, v8hi)
     v16qi __builtin_ia32_pminsb128 (v16qi, v16qi)
     v4si __builtin_ia32_pminsd128 (v4si, v4si)
     v4si __builtin_ia32_pminud128 (v4si, v4si)
     v8hi __builtin_ia32_pminuw128 (v8hi, v8hi)
     v4si __builtin_ia32_pmovsxbd128 (v16qi)
     v2di __builtin_ia32_pmovsxbq128 (v16qi)
     v8hi __builtin_ia32_pmovsxbw128 (v16qi)
     v2di __builtin_ia32_pmovsxdq128 (v4si)
     v4si __builtin_ia32_pmovsxwd128 (v8hi)
     v2di __builtin_ia32_pmovsxwq128 (v8hi)
     v4si __builtin_ia32_pmovzxbd128 (v16qi)
     v2di __builtin_ia32_pmovzxbq128 (v16qi)
     v8hi __builtin_ia32_pmovzxbw128 (v16qi)
     v2di __builtin_ia32_pmovzxdq128 (v4si)
     v4si __builtin_ia32_pmovzxwd128 (v8hi)
     v2di __builtin_ia32_pmovzxwq128 (v8hi)
     v2di __builtin_ia32_pmuldq128 (v4si, v4si)
     v4si __builtin_ia32_pmulld128 (v4si, v4si)
     int __builtin_ia32_ptestc128 (v2di, v2di)
     int __builtin_ia32_ptestnzc128 (v2di, v2di)
     int __builtin_ia32_ptestz128 (v2di, v2di)
     v2df __builtin_ia32_roundpd (v2df, const int)
     v4sf __builtin_ia32_roundps (v4sf, const int)
     v2df __builtin_ia32_roundsd (v2df, v2df, const int)
     v4sf __builtin_ia32_roundss (v4sf, v4sf, const int)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-msse4.1</span></samp> is
used.

     <dl>
<dt><code>v4sf __builtin_ia32_vec_set_v4sf (v4sf, float, const int)</code><dd>Generates the <code>insertps</code> machine instruction. 
<br><dt><code>int __builtin_ia32_vec_ext_v16qi (v16qi, const int)</code><dd>Generates the <code>pextrb</code> machine instruction. 
<br><dt><code>v16qi __builtin_ia32_vec_set_v16qi (v16qi, int, const int)</code><dd>Generates the <code>pinsrb</code> machine instruction. 
<br><dt><code>v4si __builtin_ia32_vec_set_v4si (v4si, int, const int)</code><dd>Generates the <code>pinsrd</code> machine instruction. 
<br><dt><code>v2di __builtin_ia32_vec_set_v2di (v2di, long long, const int)</code><dd>Generates the <code>pinsrq</code> machine instruction in 64bit mode. 
</dl>

 <p>The following built-in functions are changed to generate new SSE4.1
instructions when <samp><span class="option">-msse4.1</span></samp> is used.

     <dl>
<dt><code>float __builtin_ia32_vec_ext_v4sf (v4sf, const int)</code><dd>Generates the <code>extractps</code> machine instruction. 
<br><dt><code>int __builtin_ia32_vec_ext_v4si (v4si, const int)</code><dd>Generates the <code>pextrd</code> machine instruction. 
<br><dt><code>long long __builtin_ia32_vec_ext_v2di (v2di, const int)</code><dd>Generates the <code>pextrq</code> machine instruction in 64bit mode. 
</dl>

 <p>The following built-in functions are available when <samp><span class="option">-msse4.2</span></samp> is
used.  All of them generate the machine instruction that is part of the
name.

<pre class="smallexample">     v16qi __builtin_ia32_pcmpestrm128 (v16qi, int, v16qi, int, const int)
     int __builtin_ia32_pcmpestri128 (v16qi, int, v16qi, int, const int)
     int __builtin_ia32_pcmpestria128 (v16qi, int, v16qi, int, const int)
     int __builtin_ia32_pcmpestric128 (v16qi, int, v16qi, int, const int)
     int __builtin_ia32_pcmpestrio128 (v16qi, int, v16qi, int, const int)
     int __builtin_ia32_pcmpestris128 (v16qi, int, v16qi, int, const int)
     int __builtin_ia32_pcmpestriz128 (v16qi, int, v16qi, int, const int)
     v16qi __builtin_ia32_pcmpistrm128 (v16qi, v16qi, const int)
     int __builtin_ia32_pcmpistri128 (v16qi, v16qi, const int)
     int __builtin_ia32_pcmpistria128 (v16qi, v16qi, const int)
     int __builtin_ia32_pcmpistric128 (v16qi, v16qi, const int)
     int __builtin_ia32_pcmpistrio128 (v16qi, v16qi, const int)
     int __builtin_ia32_pcmpistris128 (v16qi, v16qi, const int)
     int __builtin_ia32_pcmpistriz128 (v16qi, v16qi, const int)
     v2di __builtin_ia32_pcmpgtq (v2di, v2di)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-msse4.2</span></samp> is
used.

     <dl>
<dt><code>unsigned int __builtin_ia32_crc32qi (unsigned int, unsigned char)</code><dd>Generates the <code>crc32b</code> machine instruction. 
<br><dt><code>unsigned int __builtin_ia32_crc32hi (unsigned int, unsigned short)</code><dd>Generates the <code>crc32w</code> machine instruction. 
<br><dt><code>unsigned int __builtin_ia32_crc32si (unsigned int, unsigned int)</code><dd>Generates the <code>crc32l</code> machine instruction. 
<br><dt><code>unsigned long long __builtin_ia32_crc32di (unsigned long long, unsigned long long)</code><dd>Generates the <code>crc32q</code> machine instruction. 
</dl>

 <p>The following built-in functions are changed to generate new SSE4.2
instructions when <samp><span class="option">-msse4.2</span></samp> is used.

     <dl>
<dt><code>int __builtin_popcount (unsigned int)</code><dd>Generates the <code>popcntl</code> machine instruction. 
<br><dt><code>int __builtin_popcountl (unsigned long)</code><dd>Generates the <code>popcntl</code> or <code>popcntq</code> machine instruction,
depending on the size of <code>unsigned long</code>. 
<br><dt><code>int __builtin_popcountll (unsigned long long)</code><dd>Generates the <code>popcntq</code> machine instruction. 
</dl>

 <p>The following built-in functions are available when <samp><span class="option">-mavx</span></samp> is
used. All of them generate the machine instruction that is part of the
name.

<pre class="smallexample">     v4df __builtin_ia32_addpd256 (v4df,v4df)
     v8sf __builtin_ia32_addps256 (v8sf,v8sf)
     v4df __builtin_ia32_addsubpd256 (v4df,v4df)
     v8sf __builtin_ia32_addsubps256 (v8sf,v8sf)
     v4df __builtin_ia32_andnpd256 (v4df,v4df)
     v8sf __builtin_ia32_andnps256 (v8sf,v8sf)
     v4df __builtin_ia32_andpd256 (v4df,v4df)
     v8sf __builtin_ia32_andps256 (v8sf,v8sf)
     v4df __builtin_ia32_blendpd256 (v4df,v4df,int)
     v8sf __builtin_ia32_blendps256 (v8sf,v8sf,int)
     v4df __builtin_ia32_blendvpd256 (v4df,v4df,v4df)
     v8sf __builtin_ia32_blendvps256 (v8sf,v8sf,v8sf)
     v2df __builtin_ia32_cmppd (v2df,v2df,int)
     v4df __builtin_ia32_cmppd256 (v4df,v4df,int)
     v4sf __builtin_ia32_cmpps (v4sf,v4sf,int)
     v8sf __builtin_ia32_cmpps256 (v8sf,v8sf,int)
     v2df __builtin_ia32_cmpsd (v2df,v2df,int)
     v4sf __builtin_ia32_cmpss (v4sf,v4sf,int)
     v4df __builtin_ia32_cvtdq2pd256 (v4si)
     v8sf __builtin_ia32_cvtdq2ps256 (v8si)
     v4si __builtin_ia32_cvtpd2dq256 (v4df)
     v4sf __builtin_ia32_cvtpd2ps256 (v4df)
     v8si __builtin_ia32_cvtps2dq256 (v8sf)
     v4df __builtin_ia32_cvtps2pd256 (v4sf)
     v4si __builtin_ia32_cvttpd2dq256 (v4df)
     v8si __builtin_ia32_cvttps2dq256 (v8sf)
     v4df __builtin_ia32_divpd256 (v4df,v4df)
     v8sf __builtin_ia32_divps256 (v8sf,v8sf)
     v8sf __builtin_ia32_dpps256 (v8sf,v8sf,int)
     v4df __builtin_ia32_haddpd256 (v4df,v4df)
     v8sf __builtin_ia32_haddps256 (v8sf,v8sf)
     v4df __builtin_ia32_hsubpd256 (v4df,v4df)
     v8sf __builtin_ia32_hsubps256 (v8sf,v8sf)
     v32qi __builtin_ia32_lddqu256 (pcchar)
     v32qi __builtin_ia32_loaddqu256 (pcchar)
     v4df __builtin_ia32_loadupd256 (pcdouble)
     v8sf __builtin_ia32_loadups256 (pcfloat)
     v2df __builtin_ia32_maskloadpd (pcv2df,v2df)
     v4df __builtin_ia32_maskloadpd256 (pcv4df,v4df)
     v4sf __builtin_ia32_maskloadps (pcv4sf,v4sf)
     v8sf __builtin_ia32_maskloadps256 (pcv8sf,v8sf)
     void __builtin_ia32_maskstorepd (pv2df,v2df,v2df)
     void __builtin_ia32_maskstorepd256 (pv4df,v4df,v4df)
     void __builtin_ia32_maskstoreps (pv4sf,v4sf,v4sf)
     void __builtin_ia32_maskstoreps256 (pv8sf,v8sf,v8sf)
     v4df __builtin_ia32_maxpd256 (v4df,v4df)
     v8sf __builtin_ia32_maxps256 (v8sf,v8sf)
     v4df __builtin_ia32_minpd256 (v4df,v4df)
     v8sf __builtin_ia32_minps256 (v8sf,v8sf)
     v4df __builtin_ia32_movddup256 (v4df)
     int __builtin_ia32_movmskpd256 (v4df)
     int __builtin_ia32_movmskps256 (v8sf)
     v8sf __builtin_ia32_movshdup256 (v8sf)
     v8sf __builtin_ia32_movsldup256 (v8sf)
     v4df __builtin_ia32_mulpd256 (v4df,v4df)
     v8sf __builtin_ia32_mulps256 (v8sf,v8sf)
     v4df __builtin_ia32_orpd256 (v4df,v4df)
     v8sf __builtin_ia32_orps256 (v8sf,v8sf)
     v2df __builtin_ia32_pd_pd256 (v4df)
     v4df __builtin_ia32_pd256_pd (v2df)
     v4sf __builtin_ia32_ps_ps256 (v8sf)
     v8sf __builtin_ia32_ps256_ps (v4sf)
     int __builtin_ia32_ptestc256 (v4di,v4di,ptest)
     int __builtin_ia32_ptestnzc256 (v4di,v4di,ptest)
     int __builtin_ia32_ptestz256 (v4di,v4di,ptest)
     v8sf __builtin_ia32_rcpps256 (v8sf)
     v4df __builtin_ia32_roundpd256 (v4df,int)
     v8sf __builtin_ia32_roundps256 (v8sf,int)
     v8sf __builtin_ia32_rsqrtps_nr256 (v8sf)
     v8sf __builtin_ia32_rsqrtps256 (v8sf)
     v4df __builtin_ia32_shufpd256 (v4df,v4df,int)
     v8sf __builtin_ia32_shufps256 (v8sf,v8sf,int)
     v4si __builtin_ia32_si_si256 (v8si)
     v8si __builtin_ia32_si256_si (v4si)
     v4df __builtin_ia32_sqrtpd256 (v4df)
     v8sf __builtin_ia32_sqrtps_nr256 (v8sf)
     v8sf __builtin_ia32_sqrtps256 (v8sf)
     void __builtin_ia32_storedqu256 (pchar,v32qi)
     void __builtin_ia32_storeupd256 (pdouble,v4df)
     void __builtin_ia32_storeups256 (pfloat,v8sf)
     v4df __builtin_ia32_subpd256 (v4df,v4df)
     v8sf __builtin_ia32_subps256 (v8sf,v8sf)
     v4df __builtin_ia32_unpckhpd256 (v4df,v4df)
     v8sf __builtin_ia32_unpckhps256 (v8sf,v8sf)
     v4df __builtin_ia32_unpcklpd256 (v4df,v4df)
     v8sf __builtin_ia32_unpcklps256 (v8sf,v8sf)
     v4df __builtin_ia32_vbroadcastf128_pd256 (pcv2df)
     v8sf __builtin_ia32_vbroadcastf128_ps256 (pcv4sf)
     v4df __builtin_ia32_vbroadcastsd256 (pcdouble)
     v4sf __builtin_ia32_vbroadcastss (pcfloat)
     v8sf __builtin_ia32_vbroadcastss256 (pcfloat)
     v2df __builtin_ia32_vextractf128_pd256 (v4df,int)
     v4sf __builtin_ia32_vextractf128_ps256 (v8sf,int)
     v4si __builtin_ia32_vextractf128_si256 (v8si,int)
     v4df __builtin_ia32_vinsertf128_pd256 (v4df,v2df,int)
     v8sf __builtin_ia32_vinsertf128_ps256 (v8sf,v4sf,int)
     v8si __builtin_ia32_vinsertf128_si256 (v8si,v4si,int)
     v4df __builtin_ia32_vperm2f128_pd256 (v4df,v4df,int)
     v8sf __builtin_ia32_vperm2f128_ps256 (v8sf,v8sf,int)
     v8si __builtin_ia32_vperm2f128_si256 (v8si,v8si,int)
     v2df __builtin_ia32_vpermil2pd (v2df,v2df,v2di,int)
     v4df __builtin_ia32_vpermil2pd256 (v4df,v4df,v4di,int)
     v4sf __builtin_ia32_vpermil2ps (v4sf,v4sf,v4si,int)
     v8sf __builtin_ia32_vpermil2ps256 (v8sf,v8sf,v8si,int)
     v2df __builtin_ia32_vpermilpd (v2df,int)
     v4df __builtin_ia32_vpermilpd256 (v4df,int)
     v4sf __builtin_ia32_vpermilps (v4sf,int)
     v8sf __builtin_ia32_vpermilps256 (v8sf,int)
     v2df __builtin_ia32_vpermilvarpd (v2df,v2di)
     v4df __builtin_ia32_vpermilvarpd256 (v4df,v4di)
     v4sf __builtin_ia32_vpermilvarps (v4sf,v4si)
     v8sf __builtin_ia32_vpermilvarps256 (v8sf,v8si)
     int __builtin_ia32_vtestcpd (v2df,v2df,ptest)
     int __builtin_ia32_vtestcpd256 (v4df,v4df,ptest)
     int __builtin_ia32_vtestcps (v4sf,v4sf,ptest)
     int __builtin_ia32_vtestcps256 (v8sf,v8sf,ptest)
     int __builtin_ia32_vtestnzcpd (v2df,v2df,ptest)
     int __builtin_ia32_vtestnzcpd256 (v4df,v4df,ptest)
     int __builtin_ia32_vtestnzcps (v4sf,v4sf,ptest)
     int __builtin_ia32_vtestnzcps256 (v8sf,v8sf,ptest)
     int __builtin_ia32_vtestzpd (v2df,v2df,ptest)
     int __builtin_ia32_vtestzpd256 (v4df,v4df,ptest)
     int __builtin_ia32_vtestzps (v4sf,v4sf,ptest)
     int __builtin_ia32_vtestzps256 (v8sf,v8sf,ptest)
     void __builtin_ia32_vzeroall (void)
     void __builtin_ia32_vzeroupper (void)
     v4df __builtin_ia32_xorpd256 (v4df,v4df)
     v8sf __builtin_ia32_xorps256 (v8sf,v8sf)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-mavx2</span></samp> is
used. All of them generate the machine instruction that is part of the
name.

<pre class="smallexample">     v32qi __builtin_ia32_mpsadbw256 (v32qi,v32qi,v32qi,int)
     v32qi __builtin_ia32_pabsb256 (v32qi)
     v16hi __builtin_ia32_pabsw256 (v16hi)
     v8si __builtin_ia32_pabsd256 (v8si)
     v16hi builtin_ia32_packssdw256 (v8si,v8si)
     v32qi __builtin_ia32_packsswb256 (v16hi,v16hi)
     v16hi __builtin_ia32_packusdw256 (v8si,v8si)
     v32qi __builtin_ia32_packuswb256 (v16hi,v16hi)
     v32qi__builtin_ia32_paddb256 (v32qi,v32qi)
     v16hi __builtin_ia32_paddw256 (v16hi,v16hi)
     v8si __builtin_ia32_paddd256 (v8si,v8si)
     v4di __builtin_ia32_paddq256 (v4di,v4di)
     v32qi __builtin_ia32_paddsb256 (v32qi,v32qi)
     v16hi __builtin_ia32_paddsw256 (v16hi,v16hi)
     v32qi __builtin_ia32_paddusb256 (v32qi,v32qi)
     v16hi __builtin_ia32_paddusw256 (v16hi,v16hi)
     v4di __builtin_ia32_palignr256 (v4di,v4di,int)
     v4di __builtin_ia32_andsi256 (v4di,v4di)
     v4di __builtin_ia32_andnotsi256 (v4di,v4di)
     v32qi__builtin_ia32_pavgb256 (v32qi,v32qi)
     v16hi __builtin_ia32_pavgw256 (v16hi,v16hi)
     v32qi __builtin_ia32_pblendvb256 (v32qi,v32qi,v32qi)
     v16hi __builtin_ia32_pblendw256 (v16hi,v16hi,int)
     v32qi __builtin_ia32_pcmpeqb256 (v32qi,v32qi)
     v16hi __builtin_ia32_pcmpeqw256 (v16hi,v16hi)
     v8si __builtin_ia32_pcmpeqd256 (c8si,v8si)
     v4di __builtin_ia32_pcmpeqq256 (v4di,v4di)
     v32qi __builtin_ia32_pcmpgtb256 (v32qi,v32qi)
     v16hi __builtin_ia32_pcmpgtw256 (16hi,v16hi)
     v8si __builtin_ia32_pcmpgtd256 (v8si,v8si)
     v4di __builtin_ia32_pcmpgtq256 (v4di,v4di)
     v16hi __builtin_ia32_phaddw256 (v16hi,v16hi)
     v8si __builtin_ia32_phaddd256 (v8si,v8si)
     v16hi __builtin_ia32_phaddsw256 (v16hi,v16hi)
     v16hi __builtin_ia32_phsubw256 (v16hi,v16hi)
     v8si __builtin_ia32_phsubd256 (v8si,v8si)
     v16hi __builtin_ia32_phsubsw256 (v16hi,v16hi)
     v32qi __builtin_ia32_pmaddubsw256 (v32qi,v32qi)
     v16hi __builtin_ia32_pmaddwd256 (v16hi,v16hi)
     v32qi __builtin_ia32_pmaxsb256 (v32qi,v32qi)
     v16hi __builtin_ia32_pmaxsw256 (v16hi,v16hi)
     v8si __builtin_ia32_pmaxsd256 (v8si,v8si)
     v32qi __builtin_ia32_pmaxub256 (v32qi,v32qi)
     v16hi __builtin_ia32_pmaxuw256 (v16hi,v16hi)
     v8si __builtin_ia32_pmaxud256 (v8si,v8si)
     v32qi __builtin_ia32_pminsb256 (v32qi,v32qi)
     v16hi __builtin_ia32_pminsw256 (v16hi,v16hi)
     v8si __builtin_ia32_pminsd256 (v8si,v8si)
     v32qi __builtin_ia32_pminub256 (v32qi,v32qi)
     v16hi __builtin_ia32_pminuw256 (v16hi,v16hi)
     v8si __builtin_ia32_pminud256 (v8si,v8si)
     int __builtin_ia32_pmovmskb256 (v32qi)
     v16hi __builtin_ia32_pmovsxbw256 (v16qi)
     v8si __builtin_ia32_pmovsxbd256 (v16qi)
     v4di __builtin_ia32_pmovsxbq256 (v16qi)
     v8si __builtin_ia32_pmovsxwd256 (v8hi)
     v4di __builtin_ia32_pmovsxwq256 (v8hi)
     v4di __builtin_ia32_pmovsxdq256 (v4si)
     v16hi __builtin_ia32_pmovzxbw256 (v16qi)
     v8si __builtin_ia32_pmovzxbd256 (v16qi)
     v4di __builtin_ia32_pmovzxbq256 (v16qi)
     v8si __builtin_ia32_pmovzxwd256 (v8hi)
     v4di __builtin_ia32_pmovzxwq256 (v8hi)
     v4di __builtin_ia32_pmovzxdq256 (v4si)
     v4di __builtin_ia32_pmuldq256 (v8si,v8si)
     v16hi __builtin_ia32_pmulhrsw256 (v16hi, v16hi)
     v16hi __builtin_ia32_pmulhuw256 (v16hi,v16hi)
     v16hi __builtin_ia32_pmulhw256 (v16hi,v16hi)
     v16hi __builtin_ia32_pmullw256 (v16hi,v16hi)
     v8si __builtin_ia32_pmulld256 (v8si,v8si)
     v4di __builtin_ia32_pmuludq256 (v8si,v8si)
     v4di __builtin_ia32_por256 (v4di,v4di)
     v16hi __builtin_ia32_psadbw256 (v32qi,v32qi)
     v32qi __builtin_ia32_pshufb256 (v32qi,v32qi)
     v8si __builtin_ia32_pshufd256 (v8si,int)
     v16hi __builtin_ia32_pshufhw256 (v16hi,int)
     v16hi __builtin_ia32_pshuflw256 (v16hi,int)
     v32qi __builtin_ia32_psignb256 (v32qi,v32qi)
     v16hi __builtin_ia32_psignw256 (v16hi,v16hi)
     v8si __builtin_ia32_psignd256 (v8si,v8si)
     v4di __builtin_ia32_pslldqi256 (v4di,int)
     v16hi __builtin_ia32_psllwi256 (16hi,int)
     v16hi __builtin_ia32_psllw256(v16hi,v8hi)
     v8si __builtin_ia32_pslldi256 (v8si,int)
     v8si __builtin_ia32_pslld256(v8si,v4si)
     v4di __builtin_ia32_psllqi256 (v4di,int)
     v4di __builtin_ia32_psllq256(v4di,v2di)
     v16hi __builtin_ia32_psrawi256 (v16hi,int)
     v16hi __builtin_ia32_psraw256 (v16hi,v8hi)
     v8si __builtin_ia32_psradi256 (v8si,int)
     v8si __builtin_ia32_psrad256 (v8si,v4si)
     v4di __builtin_ia32_psrldqi256 (v4di, int)
     v16hi __builtin_ia32_psrlwi256 (v16hi,int)
     v16hi __builtin_ia32_psrlw256 (v16hi,v8hi)
     v8si __builtin_ia32_psrldi256 (v8si,int)
     v8si __builtin_ia32_psrld256 (v8si,v4si)
     v4di __builtin_ia32_psrlqi256 (v4di,int)
     v4di __builtin_ia32_psrlq256(v4di,v2di)
     v32qi __builtin_ia32_psubb256 (v32qi,v32qi)
     v32hi __builtin_ia32_psubw256 (v16hi,v16hi)
     v8si __builtin_ia32_psubd256 (v8si,v8si)
     v4di __builtin_ia32_psubq256 (v4di,v4di)
     v32qi __builtin_ia32_psubsb256 (v32qi,v32qi)
     v16hi __builtin_ia32_psubsw256 (v16hi,v16hi)
     v32qi __builtin_ia32_psubusb256 (v32qi,v32qi)
     v16hi __builtin_ia32_psubusw256 (v16hi,v16hi)
     v32qi __builtin_ia32_punpckhbw256 (v32qi,v32qi)
     v16hi __builtin_ia32_punpckhwd256 (v16hi,v16hi)
     v8si __builtin_ia32_punpckhdq256 (v8si,v8si)
     v4di __builtin_ia32_punpckhqdq256 (v4di,v4di)
     v32qi __builtin_ia32_punpcklbw256 (v32qi,v32qi)
     v16hi __builtin_ia32_punpcklwd256 (v16hi,v16hi)
     v8si __builtin_ia32_punpckldq256 (v8si,v8si)
     v4di __builtin_ia32_punpcklqdq256 (v4di,v4di)
     v4di __builtin_ia32_pxor256 (v4di,v4di)
     v4di __builtin_ia32_movntdqa256 (pv4di)
     v4sf __builtin_ia32_vbroadcastss_ps (v4sf)
     v8sf __builtin_ia32_vbroadcastss_ps256 (v4sf)
     v4df __builtin_ia32_vbroadcastsd_pd256 (v2df)
     v4di __builtin_ia32_vbroadcastsi256 (v2di)
     v4si __builtin_ia32_pblendd128 (v4si,v4si)
     v8si __builtin_ia32_pblendd256 (v8si,v8si)
     v32qi __builtin_ia32_pbroadcastb256 (v16qi)
     v16hi __builtin_ia32_pbroadcastw256 (v8hi)
     v8si __builtin_ia32_pbroadcastd256 (v4si)
     v4di __builtin_ia32_pbroadcastq256 (v2di)
     v16qi __builtin_ia32_pbroadcastb128 (v16qi)
     v8hi __builtin_ia32_pbroadcastw128 (v8hi)
     v4si __builtin_ia32_pbroadcastd128 (v4si)
     v2di __builtin_ia32_pbroadcastq128 (v2di)
     v8si __builtin_ia32_permvarsi256 (v8si,v8si)
     v4df __builtin_ia32_permdf256 (v4df,int)
     v8sf __builtin_ia32_permvarsf256 (v8sf,v8sf)
     v4di __builtin_ia32_permdi256 (v4di,int)
     v4di __builtin_ia32_permti256 (v4di,v4di,int)
     v4di __builtin_ia32_extract128i256 (v4di,int)
     v4di __builtin_ia32_insert128i256 (v4di,v2di,int)
     v8si __builtin_ia32_maskloadd256 (pcv8si,v8si)
     v4di __builtin_ia32_maskloadq256 (pcv4di,v4di)
     v4si __builtin_ia32_maskloadd (pcv4si,v4si)
     v2di __builtin_ia32_maskloadq (pcv2di,v2di)
     void __builtin_ia32_maskstored256 (pv8si,v8si,v8si)
     void __builtin_ia32_maskstoreq256 (pv4di,v4di,v4di)
     void __builtin_ia32_maskstored (pv4si,v4si,v4si)
     void __builtin_ia32_maskstoreq (pv2di,v2di,v2di)
     v8si __builtin_ia32_psllv8si (v8si,v8si)
     v4si __builtin_ia32_psllv4si (v4si,v4si)
     v4di __builtin_ia32_psllv4di (v4di,v4di)
     v2di __builtin_ia32_psllv2di (v2di,v2di)
     v8si __builtin_ia32_psrav8si (v8si,v8si)
     v4si __builtin_ia32_psrav4si (v4si,v4si)
     v8si __builtin_ia32_psrlv8si (v8si,v8si)
     v4si __builtin_ia32_psrlv4si (v4si,v4si)
     v4di __builtin_ia32_psrlv4di (v4di,v4di)
     v2di __builtin_ia32_psrlv2di (v2di,v2di)
     v2df __builtin_ia32_gathersiv2df (v2df, pcdouble,v4si,v2df,int)
     v4df __builtin_ia32_gathersiv4df (v4df, pcdouble,v4si,v4df,int)
     v2df __builtin_ia32_gatherdiv2df (v2df, pcdouble,v2di,v2df,int)
     v4df __builtin_ia32_gatherdiv4df (v4df, pcdouble,v4di,v4df,int)
     v4sf __builtin_ia32_gathersiv4sf (v4sf, pcfloat,v4si,v4sf,int)
     v8sf __builtin_ia32_gathersiv8sf (v8sf, pcfloat,v8si,v8sf,int)
     v4sf __builtin_ia32_gatherdiv4sf (v4sf, pcfloat,v2di,v4sf,int)
     v4sf __builtin_ia32_gatherdiv4sf256 (v4sf, pcfloat,v4di,v4sf,int)
     v2di __builtin_ia32_gathersiv2di (v2di, pcint64,v4si,v2di,int)
     v4di __builtin_ia32_gathersiv4di (v4di, pcint64,v4si,v4di,int)
     v2di __builtin_ia32_gatherdiv2di (v2di, pcint64,v2di,v2di,int)
     v4di __builtin_ia32_gatherdiv4di (v4di, pcint64,v4di,v4di,int)
     v4si __builtin_ia32_gathersiv4si (v4si, pcint,v4si,v4si,int)
     v8si __builtin_ia32_gathersiv8si (v8si, pcint,v8si,v8si,int)
     v4si __builtin_ia32_gatherdiv4si (v4si, pcint,v2di,v4si,int)
     v4si __builtin_ia32_gatherdiv4si256 (v4si, pcint,v4di,v4si,int)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-maes</span></samp> is
used.  All of them generate the machine instruction that is part of the
name.

<pre class="smallexample">     v2di __builtin_ia32_aesenc128 (v2di, v2di)
     v2di __builtin_ia32_aesenclast128 (v2di, v2di)
     v2di __builtin_ia32_aesdec128 (v2di, v2di)
     v2di __builtin_ia32_aesdeclast128 (v2di, v2di)
     v2di __builtin_ia32_aeskeygenassist128 (v2di, const int)
     v2di __builtin_ia32_aesimc128 (v2di)
</pre>
 <p>The following built-in function is available when <samp><span class="option">-mpclmul</span></samp> is
used.

     <dl>
<dt><code>v2di __builtin_ia32_pclmulqdq128 (v2di, v2di, const int)</code><dd>Generates the <code>pclmulqdq</code> machine instruction. 
</dl>

 <p>The following built-in function is available when <samp><span class="option">-mfsgsbase</span></samp> is
used.  All of them generate the machine instruction that is part of the
name.

<pre class="smallexample">     unsigned int __builtin_ia32_rdfsbase32 (void)
     unsigned long long __builtin_ia32_rdfsbase64 (void)
     unsigned int __builtin_ia32_rdgsbase32 (void)
     unsigned long long __builtin_ia32_rdgsbase64 (void)
     void _writefsbase_u32 (unsigned int)
     void _writefsbase_u64 (unsigned long long)
     void _writegsbase_u32 (unsigned int)
     void _writegsbase_u64 (unsigned long long)
</pre>
 <p>The following built-in function is available when <samp><span class="option">-mrdrnd</span></samp> is
used.  All of them generate the machine instruction that is part of the
name.

<pre class="smallexample">     unsigned int __builtin_ia32_rdrand16_step (unsigned short *)
     unsigned int __builtin_ia32_rdrand32_step (unsigned int *)
     unsigned int __builtin_ia32_rdrand64_step (unsigned long long *)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-msse4a</span></samp> is used. 
All of them generate the machine instruction that is part of the name.

<pre class="smallexample">     void __builtin_ia32_movntsd (double *, v2df)
     void __builtin_ia32_movntss (float *, v4sf)
     v2di __builtin_ia32_extrq  (v2di, v16qi)
     v2di __builtin_ia32_extrqi (v2di, const unsigned int, const unsigned int)
     v2di __builtin_ia32_insertq (v2di, v2di)
     v2di __builtin_ia32_insertqi (v2di, v2di, const unsigned int, const unsigned int)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-mxop</span></samp> is used.
<pre class="smallexample">     v2df __builtin_ia32_vfrczpd (v2df)
     v4sf __builtin_ia32_vfrczps (v4sf)
     v2df __builtin_ia32_vfrczsd (v2df, v2df)
     v4sf __builtin_ia32_vfrczss (v4sf, v4sf)
     v4df __builtin_ia32_vfrczpd256 (v4df)
     v8sf __builtin_ia32_vfrczps256 (v8sf)
     v2di __builtin_ia32_vpcmov (v2di, v2di, v2di)
     v2di __builtin_ia32_vpcmov_v2di (v2di, v2di, v2di)
     v4si __builtin_ia32_vpcmov_v4si (v4si, v4si, v4si)
     v8hi __builtin_ia32_vpcmov_v8hi (v8hi, v8hi, v8hi)
     v16qi __builtin_ia32_vpcmov_v16qi (v16qi, v16qi, v16qi)
     v2df __builtin_ia32_vpcmov_v2df (v2df, v2df, v2df)
     v4sf __builtin_ia32_vpcmov_v4sf (v4sf, v4sf, v4sf)
     v4di __builtin_ia32_vpcmov_v4di256 (v4di, v4di, v4di)
     v8si __builtin_ia32_vpcmov_v8si256 (v8si, v8si, v8si)
     v16hi __builtin_ia32_vpcmov_v16hi256 (v16hi, v16hi, v16hi)
     v32qi __builtin_ia32_vpcmov_v32qi256 (v32qi, v32qi, v32qi)
     v4df __builtin_ia32_vpcmov_v4df256 (v4df, v4df, v4df)
     v8sf __builtin_ia32_vpcmov_v8sf256 (v8sf, v8sf, v8sf)
     v16qi __builtin_ia32_vpcomeqb (v16qi, v16qi)
     v8hi __builtin_ia32_vpcomeqw (v8hi, v8hi)
     v4si __builtin_ia32_vpcomeqd (v4si, v4si)
     v2di __builtin_ia32_vpcomeqq (v2di, v2di)
     v16qi __builtin_ia32_vpcomequb (v16qi, v16qi)
     v4si __builtin_ia32_vpcomequd (v4si, v4si)
     v2di __builtin_ia32_vpcomequq (v2di, v2di)
     v8hi __builtin_ia32_vpcomequw (v8hi, v8hi)
     v8hi __builtin_ia32_vpcomeqw (v8hi, v8hi)
     v16qi __builtin_ia32_vpcomfalseb (v16qi, v16qi)
     v4si __builtin_ia32_vpcomfalsed (v4si, v4si)
     v2di __builtin_ia32_vpcomfalseq (v2di, v2di)
     v16qi __builtin_ia32_vpcomfalseub (v16qi, v16qi)
     v4si __builtin_ia32_vpcomfalseud (v4si, v4si)
     v2di __builtin_ia32_vpcomfalseuq (v2di, v2di)
     v8hi __builtin_ia32_vpcomfalseuw (v8hi, v8hi)
     v8hi __builtin_ia32_vpcomfalsew (v8hi, v8hi)
     v16qi __builtin_ia32_vpcomgeb (v16qi, v16qi)
     v4si __builtin_ia32_vpcomged (v4si, v4si)
     v2di __builtin_ia32_vpcomgeq (v2di, v2di)
     v16qi __builtin_ia32_vpcomgeub (v16qi, v16qi)
     v4si __builtin_ia32_vpcomgeud (v4si, v4si)
     v2di __builtin_ia32_vpcomgeuq (v2di, v2di)
     v8hi __builtin_ia32_vpcomgeuw (v8hi, v8hi)
     v8hi __builtin_ia32_vpcomgew (v8hi, v8hi)
     v16qi __builtin_ia32_vpcomgtb (v16qi, v16qi)
     v4si __builtin_ia32_vpcomgtd (v4si, v4si)
     v2di __builtin_ia32_vpcomgtq (v2di, v2di)
     v16qi __builtin_ia32_vpcomgtub (v16qi, v16qi)
     v4si __builtin_ia32_vpcomgtud (v4si, v4si)
     v2di __builtin_ia32_vpcomgtuq (v2di, v2di)
     v8hi __builtin_ia32_vpcomgtuw (v8hi, v8hi)
     v8hi __builtin_ia32_vpcomgtw (v8hi, v8hi)
     v16qi __builtin_ia32_vpcomleb (v16qi, v16qi)
     v4si __builtin_ia32_vpcomled (v4si, v4si)
     v2di __builtin_ia32_vpcomleq (v2di, v2di)
     v16qi __builtin_ia32_vpcomleub (v16qi, v16qi)
     v4si __builtin_ia32_vpcomleud (v4si, v4si)
     v2di __builtin_ia32_vpcomleuq (v2di, v2di)
     v8hi __builtin_ia32_vpcomleuw (v8hi, v8hi)
     v8hi __builtin_ia32_vpcomlew (v8hi, v8hi)
     v16qi __builtin_ia32_vpcomltb (v16qi, v16qi)
     v4si __builtin_ia32_vpcomltd (v4si, v4si)
     v2di __builtin_ia32_vpcomltq (v2di, v2di)
     v16qi __builtin_ia32_vpcomltub (v16qi, v16qi)
     v4si __builtin_ia32_vpcomltud (v4si, v4si)
     v2di __builtin_ia32_vpcomltuq (v2di, v2di)
     v8hi __builtin_ia32_vpcomltuw (v8hi, v8hi)
     v8hi __builtin_ia32_vpcomltw (v8hi, v8hi)
     v16qi __builtin_ia32_vpcomneb (v16qi, v16qi)
     v4si __builtin_ia32_vpcomned (v4si, v4si)
     v2di __builtin_ia32_vpcomneq (v2di, v2di)
     v16qi __builtin_ia32_vpcomneub (v16qi, v16qi)
     v4si __builtin_ia32_vpcomneud (v4si, v4si)
     v2di __builtin_ia32_vpcomneuq (v2di, v2di)
     v8hi __builtin_ia32_vpcomneuw (v8hi, v8hi)
     v8hi __builtin_ia32_vpcomnew (v8hi, v8hi)
     v16qi __builtin_ia32_vpcomtrueb (v16qi, v16qi)
     v4si __builtin_ia32_vpcomtrued (v4si, v4si)
     v2di __builtin_ia32_vpcomtrueq (v2di, v2di)
     v16qi __builtin_ia32_vpcomtrueub (v16qi, v16qi)
     v4si __builtin_ia32_vpcomtrueud (v4si, v4si)
     v2di __builtin_ia32_vpcomtrueuq (v2di, v2di)
     v8hi __builtin_ia32_vpcomtrueuw (v8hi, v8hi)
     v8hi __builtin_ia32_vpcomtruew (v8hi, v8hi)
     v4si __builtin_ia32_vphaddbd (v16qi)
     v2di __builtin_ia32_vphaddbq (v16qi)
     v8hi __builtin_ia32_vphaddbw (v16qi)
     v2di __builtin_ia32_vphadddq (v4si)
     v4si __builtin_ia32_vphaddubd (v16qi)
     v2di __builtin_ia32_vphaddubq (v16qi)
     v8hi __builtin_ia32_vphaddubw (v16qi)
     v2di __builtin_ia32_vphaddudq (v4si)
     v4si __builtin_ia32_vphadduwd (v8hi)
     v2di __builtin_ia32_vphadduwq (v8hi)
     v4si __builtin_ia32_vphaddwd (v8hi)
     v2di __builtin_ia32_vphaddwq (v8hi)
     v8hi __builtin_ia32_vphsubbw (v16qi)
     v2di __builtin_ia32_vphsubdq (v4si)
     v4si __builtin_ia32_vphsubwd (v8hi)
     v4si __builtin_ia32_vpmacsdd (v4si, v4si, v4si)
     v2di __builtin_ia32_vpmacsdqh (v4si, v4si, v2di)
     v2di __builtin_ia32_vpmacsdql (v4si, v4si, v2di)
     v4si __builtin_ia32_vpmacssdd (v4si, v4si, v4si)
     v2di __builtin_ia32_vpmacssdqh (v4si, v4si, v2di)
     v2di __builtin_ia32_vpmacssdql (v4si, v4si, v2di)
     v4si __builtin_ia32_vpmacsswd (v8hi, v8hi, v4si)
     v8hi __builtin_ia32_vpmacssww (v8hi, v8hi, v8hi)
     v4si __builtin_ia32_vpmacswd (v8hi, v8hi, v4si)
     v8hi __builtin_ia32_vpmacsww (v8hi, v8hi, v8hi)
     v4si __builtin_ia32_vpmadcsswd (v8hi, v8hi, v4si)
     v4si __builtin_ia32_vpmadcswd (v8hi, v8hi, v4si)
     v16qi __builtin_ia32_vpperm (v16qi, v16qi, v16qi)
     v16qi __builtin_ia32_vprotb (v16qi, v16qi)
     v4si __builtin_ia32_vprotd (v4si, v4si)
     v2di __builtin_ia32_vprotq (v2di, v2di)
     v8hi __builtin_ia32_vprotw (v8hi, v8hi)
     v16qi __builtin_ia32_vpshab (v16qi, v16qi)
     v4si __builtin_ia32_vpshad (v4si, v4si)
     v2di __builtin_ia32_vpshaq (v2di, v2di)
     v8hi __builtin_ia32_vpshaw (v8hi, v8hi)
     v16qi __builtin_ia32_vpshlb (v16qi, v16qi)
     v4si __builtin_ia32_vpshld (v4si, v4si)
     v2di __builtin_ia32_vpshlq (v2di, v2di)
     v8hi __builtin_ia32_vpshlw (v8hi, v8hi)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-mfma4</span></samp> is used. 
All of them generate the machine instruction that is part of the name
with MMX registers.

<pre class="smallexample">     v2df __builtin_ia32_fmaddpd (v2df, v2df, v2df)
     v4sf __builtin_ia32_fmaddps (v4sf, v4sf, v4sf)
     v2df __builtin_ia32_fmaddsd (v2df, v2df, v2df)
     v4sf __builtin_ia32_fmaddss (v4sf, v4sf, v4sf)
     v2df __builtin_ia32_fmsubpd (v2df, v2df, v2df)
     v4sf __builtin_ia32_fmsubps (v4sf, v4sf, v4sf)
     v2df __builtin_ia32_fmsubsd (v2df, v2df, v2df)
     v4sf __builtin_ia32_fmsubss (v4sf, v4sf, v4sf)
     v2df __builtin_ia32_fnmaddpd (v2df, v2df, v2df)
     v4sf __builtin_ia32_fnmaddps (v4sf, v4sf, v4sf)
     v2df __builtin_ia32_fnmaddsd (v2df, v2df, v2df)
     v4sf __builtin_ia32_fnmaddss (v4sf, v4sf, v4sf)
     v2df __builtin_ia32_fnmsubpd (v2df, v2df, v2df)
     v4sf __builtin_ia32_fnmsubps (v4sf, v4sf, v4sf)
     v2df __builtin_ia32_fnmsubsd (v2df, v2df, v2df)
     v4sf __builtin_ia32_fnmsubss (v4sf, v4sf, v4sf)
     v2df __builtin_ia32_fmaddsubpd  (v2df, v2df, v2df)
     v4sf __builtin_ia32_fmaddsubps  (v4sf, v4sf, v4sf)
     v2df __builtin_ia32_fmsubaddpd  (v2df, v2df, v2df)
     v4sf __builtin_ia32_fmsubaddps  (v4sf, v4sf, v4sf)
     v4df __builtin_ia32_fmaddpd256 (v4df, v4df, v4df)
     v8sf __builtin_ia32_fmaddps256 (v8sf, v8sf, v8sf)
     v4df __builtin_ia32_fmsubpd256 (v4df, v4df, v4df)
     v8sf __builtin_ia32_fmsubps256 (v8sf, v8sf, v8sf)
     v4df __builtin_ia32_fnmaddpd256 (v4df, v4df, v4df)
     v8sf __builtin_ia32_fnmaddps256 (v8sf, v8sf, v8sf)
     v4df __builtin_ia32_fnmsubpd256 (v4df, v4df, v4df)
     v8sf __builtin_ia32_fnmsubps256 (v8sf, v8sf, v8sf)
     v4df __builtin_ia32_fmaddsubpd256 (v4df, v4df, v4df)
     v8sf __builtin_ia32_fmaddsubps256 (v8sf, v8sf, v8sf)
     v4df __builtin_ia32_fmsubaddpd256 (v4df, v4df, v4df)
     v8sf __builtin_ia32_fmsubaddps256 (v8sf, v8sf, v8sf)
     
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-mlwp</span></samp> is used.

<pre class="smallexample">     void __builtin_ia32_llwpcb16 (void *);
     void __builtin_ia32_llwpcb32 (void *);
     void __builtin_ia32_llwpcb64 (void *);
     void * __builtin_ia32_llwpcb16 (void);
     void * __builtin_ia32_llwpcb32 (void);
     void * __builtin_ia32_llwpcb64 (void);
     void __builtin_ia32_lwpval16 (unsigned short, unsigned int, unsigned short)
     void __builtin_ia32_lwpval32 (unsigned int, unsigned int, unsigned int)
     void __builtin_ia32_lwpval64 (unsigned __int64, unsigned int, unsigned int)
     unsigned char __builtin_ia32_lwpins16 (unsigned short, unsigned int, unsigned short)
     unsigned char __builtin_ia32_lwpins32 (unsigned int, unsigned int, unsigned int)
     unsigned char __builtin_ia32_lwpins64 (unsigned __int64, unsigned int, unsigned int)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-mbmi</span></samp> is used. 
All of them generate the machine instruction that is part of the name.
<pre class="smallexample">     unsigned int __builtin_ia32_bextr_u32(unsigned int, unsigned int);
     unsigned long long __builtin_ia32_bextr_u64 (unsigned long long, unsigned long long);
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-mbmi2</span></samp> is used. 
All of them generate the machine instruction that is part of the name.
<pre class="smallexample">     unsigned int _bzhi_u32 (unsigned int, unsigned int)
     unsigned int _pdep_u32 (unsigned int, unsigned int)
     unsigned int _pext_u32 (unsigned int, unsigned int)
     unsigned long long _bzhi_u64 (unsigned long long, unsigned long long)
     unsigned long long _pdep_u64 (unsigned long long, unsigned long long)
     unsigned long long _pext_u64 (unsigned long long, unsigned long long)
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-mlzcnt</span></samp> is used. 
All of them generate the machine instruction that is part of the name.
<pre class="smallexample">     unsigned short __builtin_ia32_lzcnt_16(unsigned short);
     unsigned int __builtin_ia32_lzcnt_u32(unsigned int);
     unsigned long long __builtin_ia32_lzcnt_u64 (unsigned long long);
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-mtbm</span></samp> is used. 
Both of them generate the immediate form of the bextr machine instruction.
<pre class="smallexample">     unsigned int __builtin_ia32_bextri_u32 (unsigned int, const unsigned int);
     unsigned long long __builtin_ia32_bextri_u64 (unsigned long long, const unsigned long long);
</pre>
 <p>The following built-in functions are available when <samp><span class="option">-m3dnow</span></samp> is used. 
All of them generate the machine instruction that is part of the name.

<pre class="smallexample">     void __builtin_ia32_femms (void)
     v8qi __builtin_ia32_pavgusb (v8qi, v8qi)
     v2si __builtin_ia32_pf2id (v2sf)
     v2sf __builtin_ia32_pfacc (v2sf, v2sf)
     v2sf __builtin_ia32_pfadd (v2sf, v2sf)
     v2si __builtin_ia32_pfcmpeq (v2sf, v2sf)
     v2si __builtin_ia32_pfcmpge (v2sf, v2sf)
     v2si __builtin_ia32_pfcmpgt (v2sf, v2sf)
     v2sf __builtin_ia32_pfmax (v2sf, v2sf)
     v2sf __builtin_ia32_pfmin (v2sf, v2sf)
     v2sf __builtin_ia32_pfmul (v2sf, v2sf)
     v2sf __builtin_ia32_pfrcp (v2sf)
     v2sf __builtin_ia32_pfrcpit1 (v2sf, v2sf)
     v2sf __builtin_ia32_pfrcpit2 (v2sf, v2sf)
     v2sf __builtin_ia32_pfrsqrt (v2sf)
     v2sf __builtin_ia32_pfrsqrtit1 (v2sf, v2sf)
     v2sf __builtin_ia32_pfsub (v2sf, v2sf)
     v2sf __builtin_ia32_pfsubr (v2sf, v2sf)
     v2sf __builtin_ia32_pi2fd (v2si)
     v4hi __builtin_ia32_pmulhrw (v4hi, v4hi)
</pre>
 <p>The following built-in functions are available when both <samp><span class="option">-m3dnow</span></samp>
and <samp><span class="option">-march=athlon</span></samp> are used.  All of them generate the machine
instruction that is part of the name.

<pre class="smallexample">     v2si __builtin_ia32_pf2iw (v2sf)
     v2sf __builtin_ia32_pfnacc (v2sf, v2sf)
     v2sf __builtin_ia32_pfpnacc (v2sf, v2sf)
     v2sf __builtin_ia32_pi2fw (v2si)
     v2sf __builtin_ia32_pswapdsf (v2sf)
     v2si __builtin_ia32_pswapdsi (v2si)
</pre>
 </body></html>