[Hexagon] Adding missing vector multiply instruction encodings. Converting multiply...
[oota-llvm.git] / lib / Target / Hexagon / HexagonIntrinsicsV4.td
1 //===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // This is populated based on the following specs:
10 // Hexagon V4 Architecture Extensions
11 // Application-Level Specification
12 // 80-V9418-12 Rev. A
13 // June 15, 2010
14
15 // Vector reduce multiply word by signed half (32x16)
16 //Rdd=vrmpyweh(Rss,Rtt)[:<<1]
17 def : T_PP_pat <M4_vrmpyeh_s0, int_hexagon_M4_vrmpyeh_s0>;
18 def : T_PP_pat <M4_vrmpyeh_s1, int_hexagon_M4_vrmpyeh_s1>;
19
20 //Rdd=vrmpywoh(Rss,Rtt)[:<<1]
21 def : T_PP_pat <M4_vrmpyoh_s0, int_hexagon_M4_vrmpyoh_s0>;
22 def : T_PP_pat <M4_vrmpyoh_s1, int_hexagon_M4_vrmpyoh_s1>;
23
24 //Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
25 def : T_PPP_pat <M4_vrmpyeh_acc_s0, int_hexagon_M4_vrmpyeh_acc_s0>;
26 def : T_PPP_pat <M4_vrmpyeh_acc_s1, int_hexagon_M4_vrmpyeh_acc_s1>;
27
28 //Rdd=vrmpywoh(Rss,Rtt)[:<<1]
29 def : T_PPP_pat <M4_vrmpyoh_acc_s0, int_hexagon_M4_vrmpyoh_acc_s0>;
30 def : T_PPP_pat <M4_vrmpyoh_acc_s1, int_hexagon_M4_vrmpyoh_acc_s1>;
31
32 // Vector multiply halfwords, signed by unsigned
33 // Rdd=vmpyhsu(Rs,Rt)[:<<1]:sat
34 def : T_RR_pat <M2_vmpy2su_s0, int_hexagon_M2_vmpy2su_s0>;
35 def : T_RR_pat <M2_vmpy2su_s1, int_hexagon_M2_vmpy2su_s1>;
36
37 // Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
38 def : T_PRR_pat <M2_vmac2su_s0, int_hexagon_M2_vmac2su_s0>;
39 def : T_PRR_pat <M2_vmac2su_s1, int_hexagon_M2_vmac2su_s1>;
40
41 // Vector polynomial multiply halfwords
42 // Rdd=vpmpyh(Rs,Rt)
43 def : T_RR_pat <M4_vpmpyh, int_hexagon_M4_vpmpyh>;
44 // Rxx[^]=vpmpyh(Rs,Rt)
45 def : T_PRR_pat <M4_vpmpyh_acc, int_hexagon_M4_vpmpyh_acc>;
46
47 // Polynomial multiply words
48 // Rdd=pmpyw(Rs,Rt)
49 def : T_RR_pat <M4_pmpyw, int_hexagon_M4_pmpyw>;
50 // Rxx^=pmpyw(Rs,Rt)
51 def : T_PRR_pat <M4_pmpyw_acc, int_hexagon_M4_pmpyw_acc>;
52
53 //Rxx^=asr(Rss,Rt)
54 def : T_PPR_pat <S2_asr_r_p_xor, int_hexagon_S2_asr_r_p_xor>;
55 //Rxx^=asl(Rss,Rt)
56 def : T_PPR_pat <S2_asl_r_p_xor, int_hexagon_S2_asl_r_p_xor>;
57 //Rxx^=lsr(Rss,Rt)
58 def : T_PPR_pat <S2_lsr_r_p_xor, int_hexagon_S2_lsr_r_p_xor>;
59 //Rxx^=lsl(Rss,Rt)
60 def : T_PPR_pat <S2_lsl_r_p_xor, int_hexagon_S2_lsl_r_p_xor>;
61
62 // Multiply and use upper result
63 def : MType_R32_pat <int_hexagon_M2_mpysu_up, M2_mpysu_up>;
64 def : MType_R32_pat <int_hexagon_M2_mpy_up_s1, M2_mpy_up_s1>;
65 def : MType_R32_pat <int_hexagon_M2_hmmpyh_s1, M2_hmmpyh_s1>;
66 def : MType_R32_pat <int_hexagon_M2_hmmpyl_s1, M2_hmmpyl_s1>;
67 def : MType_R32_pat <int_hexagon_M2_mpy_up_s1_sat, M2_mpy_up_s1_sat>;
68
69 // Vector reduce add unsigned halfwords
70 def : Pat <(int_hexagon_M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2),
71            (M2_vraddh DoubleRegs:$src1, DoubleRegs:$src2)>;
72
73 def : T_P_pat <S2_brevp, int_hexagon_S2_brevp>;
74
75 def: T_P_pat  <S2_ct0p,      int_hexagon_S2_ct0p>;
76 def: T_P_pat  <S2_ct1p,      int_hexagon_S2_ct1p>;
77 def: T_RR_pat<C4_nbitsset,  int_hexagon_C4_nbitsset>;
78 def: T_RR_pat<C4_nbitsclr,  int_hexagon_C4_nbitsclr>;
79 def: T_RI_pat<C4_nbitsclri, int_hexagon_C4_nbitsclri>;
80
81 def : T_RR_pat<A4_cmpbeq,   int_hexagon_A4_cmpbeq>;
82 def : T_RR_pat<A4_cmpbgt,   int_hexagon_A4_cmpbgt>;
83 def : T_RR_pat<A4_cmpbgtu,  int_hexagon_A4_cmpbgtu>;
84 def : T_RR_pat<A4_cmpheq,   int_hexagon_A4_cmpheq>;
85 def : T_RR_pat<A4_cmphgt,   int_hexagon_A4_cmphgt>;
86 def : T_RR_pat<A4_cmphgtu,  int_hexagon_A4_cmphgtu>;
87
88 def : T_RI_pat<A4_cmpbeqi,  int_hexagon_A4_cmpbeqi>;
89 def : T_RI_pat<A4_cmpbgti,  int_hexagon_A4_cmpbgti>;
90 def : T_RI_pat<A4_cmpbgtui, int_hexagon_A4_cmpbgtui>;
91
92 def : T_RI_pat<A4_cmpheqi,  int_hexagon_A4_cmpheqi>;
93 def : T_RI_pat<A4_cmphgti,  int_hexagon_A4_cmphgti>;
94 def : T_RI_pat<A4_cmphgtui, int_hexagon_A4_cmphgtui>;
95
96 def : T_RP_pat <A4_boundscheck, int_hexagon_A4_boundscheck>;
97
98 def : T_PR_pat<A4_tlbmatch, int_hexagon_A4_tlbmatch>;
99
100 def : Pat <(int_hexagon_M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2,
101                                       IntRegs:$src3),
102            (M4_mpyrr_addr IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
103
104 def : T_IRR_pat <M4_mpyrr_addi, int_hexagon_M4_mpyrr_addi>;
105 def : T_IRI_pat <M4_mpyri_addi, int_hexagon_M4_mpyri_addi>;
106 def : T_RIR_pat <M4_mpyri_addr_u2, int_hexagon_M4_mpyri_addr_u2>;
107 def : T_RRI_pat <M4_mpyri_addr, int_hexagon_M4_mpyri_addr>;
108 // Multiply 32x32 and use upper result
109 def : T_RRR_pat <M4_mac_up_s1_sat, int_hexagon_M4_mac_up_s1_sat>;
110 def : T_RRR_pat <M4_nac_up_s1_sat, int_hexagon_M4_nac_up_s1_sat>;
111
112 // Complex multiply 32x16
113 def : T_PR_pat <M4_cmpyi_wh, int_hexagon_M4_cmpyi_wh>;
114 def : T_PR_pat <M4_cmpyr_wh, int_hexagon_M4_cmpyr_wh>;
115
116 def : T_PR_pat <M4_cmpyi_whc, int_hexagon_M4_cmpyi_whc>;
117 def : T_PR_pat <M4_cmpyr_whc, int_hexagon_M4_cmpyr_whc>;
118
119 def : T_PP_pat<A4_andnp, int_hexagon_A4_andnp>;
120 def : T_PP_pat<A4_ornp,  int_hexagon_A4_ornp>;
121
122 // Complex add/sub halfwords/words
123 def : T_PP_pat <S4_vxaddsubw, int_hexagon_S4_vxaddsubw>;
124 def : T_PP_pat <S4_vxsubaddw, int_hexagon_S4_vxsubaddw>;
125 def : T_PP_pat <S4_vxaddsubh, int_hexagon_S4_vxaddsubh>;
126 def : T_PP_pat <S4_vxsubaddh, int_hexagon_S4_vxsubaddh>;
127
128 def : T_PP_pat <S4_vxaddsubhr, int_hexagon_S4_vxaddsubhr>;
129 def : T_PP_pat <S4_vxsubaddhr, int_hexagon_S4_vxsubaddhr>;
130
131 // Extract bitfield
132 def : T_PP_pat  <S4_extractp_rp, int_hexagon_S4_extractp_rp>;
133 def : T_RP_pat  <S4_extract_rp, int_hexagon_S4_extract_rp>;
134 def : T_PII_pat <S4_extractp, int_hexagon_S4_extractp>;
135 def : T_RII_pat <S4_extract, int_hexagon_S4_extract>;
136
137 // Vector conditional negate
138 // Rdd=vcnegh(Rss,Rt)
139 def : T_PR_pat <S2_vcnegh, int_hexagon_S2_vcnegh>;
140
141 // Shift an immediate left by register amount
142 def : T_IR_pat<S4_lsli, int_hexagon_S4_lsli>;
143
144 // Vector reduce maximum halfwords
145 def : T_PPR_pat <A4_vrmaxh, int_hexagon_A4_vrmaxh>;
146 def : T_PPR_pat <A4_vrmaxuh, int_hexagon_A4_vrmaxuh>;
147
148 // Vector reduce maximum words
149 def : T_PPR_pat <A4_vrmaxw, int_hexagon_A4_vrmaxw>;
150 def : T_PPR_pat <A4_vrmaxuw, int_hexagon_A4_vrmaxuw>;
151
152 // Vector reduce minimum halfwords
153 def : T_PPR_pat <A4_vrminh, int_hexagon_A4_vrminh>;
154 def : T_PPR_pat <A4_vrminuh, int_hexagon_A4_vrminuh>;
155
156 // Vector reduce minimum words
157 def : T_PPR_pat <A4_vrminw, int_hexagon_A4_vrminw>;
158 def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
159
160 // Rotate and reduce bytes
161 def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
162                                      u2ImmPred:$src3),
163            (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2ImmPred:$src3)>;
164
165 // Rotate and reduce bytes with accumulation
166 // Rxx+=vrcrotate(Rss,Rt,#u2)
167 def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
168                                          IntRegs:$src3, u2ImmPred:$src4),
169            (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
170                              IntRegs:$src3, u2ImmPred:$src4)>;
171
172 // Vector conditional negate
173 def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
174
175 // Logical xor with xor accumulation
176 def : T_PPP_pat<M4_xor_xacc, int_hexagon_M4_xor_xacc>;
177
178 // ALU64 - Vector min/max byte
179 def : T_PP_pat <A2_vminb, int_hexagon_A2_vminb>;
180 def : T_PP_pat <A2_vmaxb, int_hexagon_A2_vmaxb>;
181
182 // Shift and add/sub/and/or
183 def : T_IRI_pat <S4_andi_asl_ri, int_hexagon_S4_andi_asl_ri>;
184 def : T_IRI_pat <S4_ori_asl_ri,  int_hexagon_S4_ori_asl_ri>;
185 def : T_IRI_pat <S4_addi_asl_ri, int_hexagon_S4_addi_asl_ri>;
186 def : T_IRI_pat <S4_subi_asl_ri, int_hexagon_S4_subi_asl_ri>;
187 def : T_IRI_pat <S4_andi_lsr_ri, int_hexagon_S4_andi_lsr_ri>;
188 def : T_IRI_pat <S4_ori_lsr_ri,  int_hexagon_S4_ori_lsr_ri>;
189 def : T_IRI_pat <S4_addi_lsr_ri, int_hexagon_S4_addi_lsr_ri>;
190 def : T_IRI_pat <S4_subi_lsr_ri, int_hexagon_S4_subi_lsr_ri>;
191
192 // Split bitfield
193 def : T_RI_pat <A4_bitspliti, int_hexagon_A4_bitspliti>;
194 def : T_RR_pat <A4_bitsplit, int_hexagon_A4_bitsplit>;
195
196 def: T_RR_pat<S4_parity,   int_hexagon_S4_parity>;
197
198 def: T_RI_pat<S4_ntstbit_i,  int_hexagon_S4_ntstbit_i>;
199 def: T_RR_pat<S4_ntstbit_r,  int_hexagon_S4_ntstbit_r>;
200
201 def: T_RI_pat<S4_clbaddi,  int_hexagon_S4_clbaddi>;
202 def: T_PI_pat<S4_clbpaddi, int_hexagon_S4_clbpaddi>;
203 def: T_P_pat <S4_clbpnorm, int_hexagon_S4_clbpnorm>;
204
205 /********************************************************************
206 *            ALU32/ALU                                              *
207 *********************************************************************/
208
209 // ALU32 / ALU / Logical Operations.
210 def: T_RR_pat<A4_andn, int_hexagon_A4_andn>;
211 def: T_RR_pat<A4_orn,  int_hexagon_A4_orn>;
212
213 /********************************************************************
214 *            ALU32/PERM                                             *
215 *********************************************************************/
216
217 // Combine Words Into Doublewords.
218 def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s8ExtPred>;
219 def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s8ExtPred>;
220
221 /********************************************************************
222 *            ALU32/PRED                                             *
223 *********************************************************************/
224
225 // Compare
226 def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s10ExtPred>;
227 def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s10ExtPred>;
228 def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u9ExtPred>;
229
230 def: T_RR_pat<A4_rcmpeq,  int_hexagon_A4_rcmpeq>;
231 def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
232
233 def: T_RI_pat<A4_rcmpeqi,  int_hexagon_A4_rcmpeqi>;
234 def: T_RI_pat<A4_rcmpneqi, int_hexagon_A4_rcmpneqi>;
235
236 /********************************************************************
237 *            CR                                                     *
238 *********************************************************************/
239
240 // CR / Logical Operations On Predicates.
241
242 class qi_CRInst_qiqiqi_pat<Intrinsic IntID, InstHexagon Inst> :
243   Pat<(i32 (IntID IntRegs:$Rs, IntRegs:$Rt, IntRegs:$Ru)),
244       (i32 (C2_tfrpr (Inst (C2_tfrrp IntRegs:$Rs),
245                            (C2_tfrrp IntRegs:$Rt),
246                            (C2_tfrrp IntRegs:$Ru))))>;
247
248 def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_and,   C4_and_and>;
249 def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_andn,  C4_and_andn>;
250 def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_or,    C4_and_or>;
251 def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_and_orn,   C4_and_orn>;
252 def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_and,    C4_or_and>;
253 def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_andn,   C4_or_andn>;
254 def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_or,     C4_or_or>;
255 def: qi_CRInst_qiqiqi_pat<int_hexagon_C4_or_orn,    C4_or_orn>;
256
257 /********************************************************************
258 *            XTYPE/ALU                                              *
259 *********************************************************************/
260
261 // Add And Accumulate.
262
263 def : T_RRI_pat <S4_addaddi, int_hexagon_S4_addaddi>;
264 def : T_RIR_pat <S4_subaddi, int_hexagon_S4_subaddi>;
265
266
267 // XTYPE / ALU / Logical-logical Words.
268 def : T_RRR_pat <M4_or_xor,   int_hexagon_M4_or_xor>;
269 def : T_RRR_pat <M4_and_xor,  int_hexagon_M4_and_xor>;
270 def : T_RRR_pat <M4_or_and,   int_hexagon_M4_or_and>;
271 def : T_RRR_pat <M4_and_and,  int_hexagon_M4_and_and>;
272 def : T_RRR_pat <M4_xor_and,  int_hexagon_M4_xor_and>;
273 def : T_RRR_pat <M4_or_or,    int_hexagon_M4_or_or>;
274 def : T_RRR_pat <M4_and_or,   int_hexagon_M4_and_or>;
275 def : T_RRR_pat <M4_xor_or,   int_hexagon_M4_xor_or>;
276 def : T_RRR_pat <M4_or_andn,  int_hexagon_M4_or_andn>;
277 def : T_RRR_pat <M4_and_andn, int_hexagon_M4_and_andn>;
278 def : T_RRR_pat <M4_xor_andn, int_hexagon_M4_xor_andn>;
279
280 def : T_RRI_pat <S4_or_andi, int_hexagon_S4_or_andi>;
281 def : T_RRI_pat <S4_or_andix,  int_hexagon_S4_or_andix>;
282 def : T_RRI_pat <S4_or_ori, int_hexagon_S4_or_ori>;
283
284 // Modulo wrap.
285 def : T_RR_pat <A4_modwrapu, int_hexagon_A4_modwrapu>;
286
287 // Arithmetic/Convergent round
288 // Rd=[cround|round](Rs,Rt)[:sat]
289 // Rd=[cround|round](Rs,#u5)[:sat]
290 def : T_RI_pat <A4_cround_ri, int_hexagon_A4_cround_ri>;
291 def : T_RR_pat <A4_cround_rr, int_hexagon_A4_cround_rr>;
292
293 def : T_RI_pat <A4_round_ri, int_hexagon_A4_round_ri>;
294 def : T_RR_pat <A4_round_rr, int_hexagon_A4_round_rr>;
295
296 def : T_RI_pat <A4_round_ri_sat, int_hexagon_A4_round_ri_sat>;
297 def : T_RR_pat <A4_round_rr_sat, int_hexagon_A4_round_rr_sat>;
298
299 def : T_P_pat <A2_roundsat, int_hexagon_A2_roundsat>;