forked from hxuhack/symobfuscator-deprecated-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAsmMatcherEmitter.cpp
3263 lines (2795 loc) · 121 KB
/
AsmMatcherEmitter.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===- AsmMatcherEmitter.cpp - Generate an assembly matcher ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This tablegen backend emits a target specifier matcher for converting parsed
// assembly operands in the MCInst structures. It also emits a matcher for
// custom operand parsing.
//
// Converting assembly operands into MCInst structures
// ---------------------------------------------------
//
// The input to the target specific matcher is a list of literal tokens and
// operands. The target specific parser should generally eliminate any syntax
// which is not relevant for matching; for example, comma tokens should have
// already been consumed and eliminated by the parser. Most instructions will
// end up with a single literal token (the instruction name) and some number of
// operands.
//
// Some example inputs, for X86:
// 'addl' (immediate ...) (register ...)
// 'add' (immediate ...) (memory ...)
// 'call' '*' %epc
//
// The assembly matcher is responsible for converting this input into a precise
// machine instruction (i.e., an instruction with a well defined encoding). This
// mapping has several properties which complicate matching:
//
// - It may be ambiguous; many architectures can legally encode particular
// variants of an instruction in different ways (for example, using a smaller
// encoding for small immediates). Such ambiguities should never be
// arbitrarily resolved by the assembler, the assembler is always responsible
// for choosing the "best" available instruction.
//
// - It may depend on the subtarget or the assembler context. Instructions
// which are invalid for the current mode, but otherwise unambiguous (e.g.,
// an SSE instruction in a file being assembled for i486) should be accepted
// and rejected by the assembler front end. However, if the proper encoding
// for an instruction is dependent on the assembler context then the matcher
// is responsible for selecting the correct machine instruction for the
// current mode.
//
// The core matching algorithm attempts to exploit the regularity in most
// instruction sets to quickly determine the set of possibly matching
// instructions, and the simplify the generated code. Additionally, this helps
// to ensure that the ambiguities are intentionally resolved by the user.
//
// The matching is divided into two distinct phases:
//
// 1. Classification: Each operand is mapped to the unique set which (a)
// contains it, and (b) is the largest such subset for which a single
// instruction could match all members.
//
// For register classes, we can generate these subgroups automatically. For
// arbitrary operands, we expect the user to define the classes and their
// relations to one another (for example, 8-bit signed immediates as a
// subset of 32-bit immediates).
//
// By partitioning the operands in this way, we guarantee that for any
// tuple of classes, any single instruction must match either all or none
// of the sets of operands which could classify to that tuple.
//
// In addition, the subset relation amongst classes induces a partial order
// on such tuples, which we use to resolve ambiguities.
//
// 2. The input can now be treated as a tuple of classes (static tokens are
// simple singleton sets). Each such tuple should generally map to a single
// instruction (we currently ignore cases where this isn't true, whee!!!),
// which we can emit a simple matcher for.
//
// Custom Operand Parsing
// ----------------------
//
// Some targets need a custom way to parse operands, some specific instructions
// can contain arguments that can represent processor flags and other kinds of
// identifiers that need to be mapped to specific values in the final encoded
// instructions. The target specific custom operand parsing works in the
// following way:
//
// 1. A operand match table is built, each entry contains a mnemonic, an
// operand class, a mask for all operand positions for that same
// class/mnemonic and target features to be checked while trying to match.
//
// 2. The operand matcher will try every possible entry with the same
// mnemonic and will check if the target feature for this mnemonic also
// matches. After that, if the operand to be matched has its index
// present in the mask, a successful match occurs. Otherwise, fallback
// to the regular operand parsing.
//
// 3. For a match success, each operand class that has a 'ParserMethod'
// becomes part of a switch from where the custom method is called.
//
//===----------------------------------------------------------------------===//
#include "CodeGenTarget.h"
#include "SubtargetFeatureInfo.h"
#include "Types.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/StringMatcher.h"
#include "llvm/TableGen/StringToOffsetTable.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <cassert>
#include <cctype>
#include <forward_list>
#include <map>
#include <set>
using namespace llvm;
#define DEBUG_TYPE "asm-matcher-emitter"
cl::OptionCategory AsmMatcherEmitterCat("Options for -gen-asm-matcher");
static cl::opt<std::string>
MatchPrefix("match-prefix", cl::init(""),
cl::desc("Only match instructions with the given prefix"),
cl::cat(AsmMatcherEmitterCat));
namespace {
class AsmMatcherInfo;
// Register sets are used as keys in some second-order sets TableGen creates
// when generating its data structures. This means that the order of two
// RegisterSets can be seen in the outputted AsmMatcher tables occasionally, and
// can even affect compiler output (at least seen in diagnostics produced when
// all matches fail). So we use a type that sorts them consistently.
typedef std::set<Record*, LessRecordByID> RegisterSet;
class AsmMatcherEmitter {
RecordKeeper &Records;
public:
AsmMatcherEmitter(RecordKeeper &R) : Records(R) {}
void run(raw_ostream &o);
};
/// ClassInfo - Helper class for storing the information about a particular
/// class of operands which can be matched.
struct ClassInfo {
enum ClassInfoKind {
/// Invalid kind, for use as a sentinel value.
Invalid = 0,
/// The class for a particular token.
Token,
/// The (first) register class, subsequent register classes are
/// RegisterClass0+1, and so on.
RegisterClass0,
/// The (first) user defined class, subsequent user defined classes are
/// UserClass0+1, and so on.
UserClass0 = 1<<16
};
/// Kind - The class kind, which is either a predefined kind, or (UserClass0 +
/// N) for the Nth user defined class.
unsigned Kind;
/// SuperClasses - The super classes of this class. Note that for simplicities
/// sake user operands only record their immediate super class, while register
/// operands include all superclasses.
std::vector<ClassInfo*> SuperClasses;
/// Name - The full class name, suitable for use in an enum.
std::string Name;
/// ClassName - The unadorned generic name for this class (e.g., Token).
std::string ClassName;
/// ValueName - The name of the value this class represents; for a token this
/// is the literal token string, for an operand it is the TableGen class (or
/// empty if this is a derived class).
std::string ValueName;
/// PredicateMethod - The name of the operand method to test whether the
/// operand matches this class; this is not valid for Token or register kinds.
std::string PredicateMethod;
/// RenderMethod - The name of the operand method to add this operand to an
/// MCInst; this is not valid for Token or register kinds.
std::string RenderMethod;
/// ParserMethod - The name of the operand method to do a target specific
/// parsing on the operand.
std::string ParserMethod;
/// For register classes: the records for all the registers in this class.
RegisterSet Registers;
/// For custom match classes: the diagnostic kind for when the predicate fails.
std::string DiagnosticType;
/// Is this operand optional and not always required.
bool IsOptional;
/// DefaultMethod - The name of the method that returns the default operand
/// for optional operand
std::string DefaultMethod;
public:
/// isRegisterClass() - Check if this is a register class.
bool isRegisterClass() const {
return Kind >= RegisterClass0 && Kind < UserClass0;
}
/// isUserClass() - Check if this is a user defined class.
bool isUserClass() const {
return Kind >= UserClass0;
}
/// isRelatedTo - Check whether this class is "related" to \p RHS. Classes
/// are related if they are in the same class hierarchy.
bool isRelatedTo(const ClassInfo &RHS) const {
// Tokens are only related to tokens.
if (Kind == Token || RHS.Kind == Token)
return Kind == Token && RHS.Kind == Token;
// Registers classes are only related to registers classes, and only if
// their intersection is non-empty.
if (isRegisterClass() || RHS.isRegisterClass()) {
if (!isRegisterClass() || !RHS.isRegisterClass())
return false;
RegisterSet Tmp;
std::insert_iterator<RegisterSet> II(Tmp, Tmp.begin());
std::set_intersection(Registers.begin(), Registers.end(),
RHS.Registers.begin(), RHS.Registers.end(),
II, LessRecordByID());
return !Tmp.empty();
}
// Otherwise we have two users operands; they are related if they are in the
// same class hierarchy.
//
// FIXME: This is an oversimplification, they should only be related if they
// intersect, however we don't have that information.
assert(isUserClass() && RHS.isUserClass() && "Unexpected class!");
const ClassInfo *Root = this;
while (!Root->SuperClasses.empty())
Root = Root->SuperClasses.front();
const ClassInfo *RHSRoot = &RHS;
while (!RHSRoot->SuperClasses.empty())
RHSRoot = RHSRoot->SuperClasses.front();
return Root == RHSRoot;
}
/// isSubsetOf - Test whether this class is a subset of \p RHS.
bool isSubsetOf(const ClassInfo &RHS) const {
// This is a subset of RHS if it is the same class...
if (this == &RHS)
return true;
// ... or if any of its super classes are a subset of RHS.
for (const ClassInfo *CI : SuperClasses)
if (CI->isSubsetOf(RHS))
return true;
return false;
}
int getTreeDepth() const {
int Depth = 0;
const ClassInfo *Root = this;
while (!Root->SuperClasses.empty()) {
Depth++;
Root = Root->SuperClasses.front();
}
return Depth;
}
const ClassInfo *findRoot() const {
const ClassInfo *Root = this;
while (!Root->SuperClasses.empty())
Root = Root->SuperClasses.front();
return Root;
}
/// Compare two classes. This does not produce a total ordering, but does
/// guarantee that subclasses are sorted before their parents, and that the
/// ordering is transitive.
bool operator<(const ClassInfo &RHS) const {
if (this == &RHS)
return false;
// First, enforce the ordering between the three different types of class.
// Tokens sort before registers, which sort before user classes.
if (Kind == Token) {
if (RHS.Kind != Token)
return true;
assert(RHS.Kind == Token);
} else if (isRegisterClass()) {
if (RHS.Kind == Token)
return false;
else if (RHS.isUserClass())
return true;
assert(RHS.isRegisterClass());
} else if (isUserClass()) {
if (!RHS.isUserClass())
return false;
assert(RHS.isUserClass());
} else {
llvm_unreachable("Unknown ClassInfoKind");
}
if (Kind == Token || isUserClass()) {
// Related tokens and user classes get sorted by depth in the inheritence
// tree (so that subclasses are before their parents).
if (isRelatedTo(RHS)) {
if (getTreeDepth() > RHS.getTreeDepth())
return true;
if (getTreeDepth() < RHS.getTreeDepth())
return false;
} else {
// Unrelated tokens and user classes are ordered by the name of their
// root nodes, so that there is a consistent ordering between
// unconnected trees.
return findRoot()->ValueName < RHS.findRoot()->ValueName;
}
} else if (isRegisterClass()) {
// For register sets, sort by number of registers. This guarantees that
// a set will always sort before all of it's strict supersets.
if (Registers.size() != RHS.Registers.size())
return Registers.size() < RHS.Registers.size();
} else {
llvm_unreachable("Unknown ClassInfoKind");
}
// FIXME: We should be able to just return false here, as we only need a
// partial order (we use stable sorts, so this is deterministic) and the
// name of a class shouldn't be significant. However, some of the backends
// accidentally rely on this behaviour, so it will have to stay like this
// until they are fixed.
return ValueName < RHS.ValueName;
}
};
class AsmVariantInfo {
public:
StringRef RegisterPrefix;
StringRef TokenizingCharacters;
StringRef SeparatorCharacters;
StringRef BreakCharacters;
StringRef Name;
int AsmVariantNo;
};
/// MatchableInfo - Helper class for storing the necessary information for an
/// instruction or alias which is capable of being matched.
struct MatchableInfo {
struct AsmOperand {
/// Token - This is the token that the operand came from.
StringRef Token;
/// The unique class instance this operand should match.
ClassInfo *Class;
/// The operand name this is, if anything.
StringRef SrcOpName;
/// The suboperand index within SrcOpName, or -1 for the entire operand.
int SubOpIdx;
/// Whether the token is "isolated", i.e., it is preceded and followed
/// by separators.
bool IsIsolatedToken;
/// Register record if this token is singleton register.
Record *SingletonReg;
explicit AsmOperand(bool IsIsolatedToken, StringRef T)
: Token(T), Class(nullptr), SubOpIdx(-1),
IsIsolatedToken(IsIsolatedToken), SingletonReg(nullptr) {}
};
/// ResOperand - This represents a single operand in the result instruction
/// generated by the match. In cases (like addressing modes) where a single
/// assembler operand expands to multiple MCOperands, this represents the
/// single assembler operand, not the MCOperand.
struct ResOperand {
enum {
/// RenderAsmOperand - This represents an operand result that is
/// generated by calling the render method on the assembly operand. The
/// corresponding AsmOperand is specified by AsmOperandNum.
RenderAsmOperand,
/// TiedOperand - This represents a result operand that is a duplicate of
/// a previous result operand.
TiedOperand,
/// ImmOperand - This represents an immediate value that is dumped into
/// the operand.
ImmOperand,
/// RegOperand - This represents a fixed register that is dumped in.
RegOperand
} Kind;
union {
/// This is the operand # in the AsmOperands list that this should be
/// copied from.
unsigned AsmOperandNum;
/// TiedOperandNum - This is the (earlier) result operand that should be
/// copied from.
unsigned TiedOperandNum;
/// ImmVal - This is the immediate value added to the instruction.
int64_t ImmVal;
/// Register - This is the register record.
Record *Register;
};
/// MINumOperands - The number of MCInst operands populated by this
/// operand.
unsigned MINumOperands;
static ResOperand getRenderedOp(unsigned AsmOpNum, unsigned NumOperands) {
ResOperand X;
X.Kind = RenderAsmOperand;
X.AsmOperandNum = AsmOpNum;
X.MINumOperands = NumOperands;
return X;
}
static ResOperand getTiedOp(unsigned TiedOperandNum) {
ResOperand X;
X.Kind = TiedOperand;
X.TiedOperandNum = TiedOperandNum;
X.MINumOperands = 1;
return X;
}
static ResOperand getImmOp(int64_t Val) {
ResOperand X;
X.Kind = ImmOperand;
X.ImmVal = Val;
X.MINumOperands = 1;
return X;
}
static ResOperand getRegOp(Record *Reg) {
ResOperand X;
X.Kind = RegOperand;
X.Register = Reg;
X.MINumOperands = 1;
return X;
}
};
/// AsmVariantID - Target's assembly syntax variant no.
int AsmVariantID;
/// AsmString - The assembly string for this instruction (with variants
/// removed), e.g. "movsx $src, $dst".
std::string AsmString;
/// TheDef - This is the definition of the instruction or InstAlias that this
/// matchable came from.
Record *const TheDef;
/// DefRec - This is the definition that it came from.
PointerUnion<const CodeGenInstruction*, const CodeGenInstAlias*> DefRec;
const CodeGenInstruction *getResultInst() const {
if (DefRec.is<const CodeGenInstruction*>())
return DefRec.get<const CodeGenInstruction*>();
return DefRec.get<const CodeGenInstAlias*>()->ResultInst;
}
/// ResOperands - This is the operand list that should be built for the result
/// MCInst.
SmallVector<ResOperand, 8> ResOperands;
/// Mnemonic - This is the first token of the matched instruction, its
/// mnemonic.
StringRef Mnemonic;
/// AsmOperands - The textual operands that this instruction matches,
/// annotated with a class and where in the OperandList they were defined.
/// This directly corresponds to the tokenized AsmString after the mnemonic is
/// removed.
SmallVector<AsmOperand, 8> AsmOperands;
/// Predicates - The required subtarget features to match this instruction.
SmallVector<const SubtargetFeatureInfo *, 4> RequiredFeatures;
/// ConversionFnKind - The enum value which is passed to the generated
/// convertToMCInst to convert parsed operands into an MCInst for this
/// function.
std::string ConversionFnKind;
/// If this instruction is deprecated in some form.
bool HasDeprecation;
/// If this is an alias, this is use to determine whether or not to using
/// the conversion function defined by the instruction's AsmMatchConverter
/// or to use the function generated by the alias.
bool UseInstAsmMatchConverter;
MatchableInfo(const CodeGenInstruction &CGI)
: AsmVariantID(0), AsmString(CGI.AsmString), TheDef(CGI.TheDef), DefRec(&CGI),
UseInstAsmMatchConverter(true) {
}
MatchableInfo(std::unique_ptr<const CodeGenInstAlias> Alias)
: AsmVariantID(0), AsmString(Alias->AsmString), TheDef(Alias->TheDef),
DefRec(Alias.release()),
UseInstAsmMatchConverter(
TheDef->getValueAsBit("UseInstAsmMatchConverter")) {
}
// Could remove this and the dtor if PointerUnion supported unique_ptr
// elements with a dynamic failure/assertion (like the one below) in the case
// where it was copied while being in an owning state.
MatchableInfo(const MatchableInfo &RHS)
: AsmVariantID(RHS.AsmVariantID), AsmString(RHS.AsmString),
TheDef(RHS.TheDef), DefRec(RHS.DefRec), ResOperands(RHS.ResOperands),
Mnemonic(RHS.Mnemonic), AsmOperands(RHS.AsmOperands),
RequiredFeatures(RHS.RequiredFeatures),
ConversionFnKind(RHS.ConversionFnKind),
HasDeprecation(RHS.HasDeprecation),
UseInstAsmMatchConverter(RHS.UseInstAsmMatchConverter) {
assert(!DefRec.is<const CodeGenInstAlias *>());
}
~MatchableInfo() {
delete DefRec.dyn_cast<const CodeGenInstAlias*>();
}
// Two-operand aliases clone from the main matchable, but mark the second
// operand as a tied operand of the first for purposes of the assembler.
void formTwoOperandAlias(StringRef Constraint);
void initialize(const AsmMatcherInfo &Info,
SmallPtrSetImpl<Record*> &SingletonRegisters,
AsmVariantInfo const &Variant,
bool HasMnemonicFirst);
/// validate - Return true if this matchable is a valid thing to match against
/// and perform a bunch of validity checking.
bool validate(StringRef CommentDelimiter, bool Hack) const;
/// findAsmOperand - Find the AsmOperand with the specified name and
/// suboperand index.
int findAsmOperand(StringRef N, int SubOpIdx) const {
auto I = find_if(AsmOperands, [&](const AsmOperand &Op) {
return Op.SrcOpName == N && Op.SubOpIdx == SubOpIdx;
});
return (I != AsmOperands.end()) ? I - AsmOperands.begin() : -1;
}
/// findAsmOperandNamed - Find the first AsmOperand with the specified name.
/// This does not check the suboperand index.
int findAsmOperandNamed(StringRef N) const {
auto I = find_if(AsmOperands,
[&](const AsmOperand &Op) { return Op.SrcOpName == N; });
return (I != AsmOperands.end()) ? I - AsmOperands.begin() : -1;
}
void buildInstructionResultOperands();
void buildAliasResultOperands();
/// operator< - Compare two matchables.
bool operator<(const MatchableInfo &RHS) const {
// The primary comparator is the instruction mnemonic.
if (int Cmp = Mnemonic.compare(RHS.Mnemonic))
return Cmp == -1;
if (AsmOperands.size() != RHS.AsmOperands.size())
return AsmOperands.size() < RHS.AsmOperands.size();
// Compare lexicographically by operand. The matcher validates that other
// orderings wouldn't be ambiguous using \see couldMatchAmbiguouslyWith().
for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class)
return true;
if (*RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
return false;
}
// Give matches that require more features higher precedence. This is useful
// because we cannot define AssemblerPredicates with the negation of
// processor features. For example, ARM v6 "nop" may be either a HINT or
// MOV. With v6, we want to match HINT. The assembler has no way to
// predicate MOV under "NoV6", but HINT will always match first because it
// requires V6 while MOV does not.
if (RequiredFeatures.size() != RHS.RequiredFeatures.size())
return RequiredFeatures.size() > RHS.RequiredFeatures.size();
return false;
}
/// couldMatchAmbiguouslyWith - Check whether this matchable could
/// ambiguously match the same set of operands as \p RHS (without being a
/// strictly superior match).
bool couldMatchAmbiguouslyWith(const MatchableInfo &RHS) const {
// The primary comparator is the instruction mnemonic.
if (Mnemonic != RHS.Mnemonic)
return false;
// The number of operands is unambiguous.
if (AsmOperands.size() != RHS.AsmOperands.size())
return false;
// Otherwise, make sure the ordering of the two instructions is unambiguous
// by checking that either (a) a token or operand kind discriminates them,
// or (b) the ordering among equivalent kinds is consistent.
// Tokens and operand kinds are unambiguous (assuming a correct target
// specific parser).
for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i)
if (AsmOperands[i].Class->Kind != RHS.AsmOperands[i].Class->Kind ||
AsmOperands[i].Class->Kind == ClassInfo::Token)
if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class ||
*RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
return false;
// Otherwise, this operand could commute if all operands are equivalent, or
// there is a pair of operands that compare less than and a pair that
// compare greater than.
bool HasLT = false, HasGT = false;
for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class)
HasLT = true;
if (*RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
HasGT = true;
}
return HasLT == HasGT;
}
void dump() const;
private:
void tokenizeAsmString(AsmMatcherInfo const &Info,
AsmVariantInfo const &Variant);
void addAsmOperand(StringRef Token, bool IsIsolatedToken = false);
};
struct OperandMatchEntry {
unsigned OperandMask;
const MatchableInfo* MI;
ClassInfo *CI;
static OperandMatchEntry create(const MatchableInfo *mi, ClassInfo *ci,
unsigned opMask) {
OperandMatchEntry X;
X.OperandMask = opMask;
X.CI = ci;
X.MI = mi;
return X;
}
};
class AsmMatcherInfo {
public:
/// Tracked Records
RecordKeeper &Records;
/// The tablegen AsmParser record.
Record *AsmParser;
/// Target - The target information.
CodeGenTarget &Target;
/// The classes which are needed for matching.
std::forward_list<ClassInfo> Classes;
/// The information on the matchables to match.
std::vector<std::unique_ptr<MatchableInfo>> Matchables;
/// Info for custom matching operands by user defined methods.
std::vector<OperandMatchEntry> OperandMatchInfo;
/// Map of Register records to their class information.
typedef std::map<Record*, ClassInfo*, LessRecordByID> RegisterClassesTy;
RegisterClassesTy RegisterClasses;
/// Map of Predicate records to their subtarget information.
std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures;
/// Map of AsmOperandClass records to their class information.
std::map<Record*, ClassInfo*> AsmOperandClasses;
private:
/// Map of token to class information which has already been constructed.
std::map<std::string, ClassInfo*> TokenClasses;
/// Map of RegisterClass records to their class information.
std::map<Record*, ClassInfo*> RegisterClassClasses;
private:
/// getTokenClass - Lookup or create the class for the given token.
ClassInfo *getTokenClass(StringRef Token);
/// getOperandClass - Lookup or create the class for the given operand.
ClassInfo *getOperandClass(const CGIOperandList::OperandInfo &OI,
int SubOpIdx);
ClassInfo *getOperandClass(Record *Rec, int SubOpIdx);
/// buildRegisterClasses - Build the ClassInfo* instances for register
/// classes.
void buildRegisterClasses(SmallPtrSetImpl<Record*> &SingletonRegisters);
/// buildOperandClasses - Build the ClassInfo* instances for user defined
/// operand classes.
void buildOperandClasses();
void buildInstructionOperandReference(MatchableInfo *II, StringRef OpName,
unsigned AsmOpIdx);
void buildAliasOperandReference(MatchableInfo *II, StringRef OpName,
MatchableInfo::AsmOperand &Op);
public:
AsmMatcherInfo(Record *AsmParser,
CodeGenTarget &Target,
RecordKeeper &Records);
/// Construct the various tables used during matching.
void buildInfo();
/// buildOperandMatchInfo - Build the necessary information to handle user
/// defined operand parsing methods.
void buildOperandMatchInfo();
/// getSubtargetFeature - Lookup or create the subtarget feature info for the
/// given operand.
const SubtargetFeatureInfo *getSubtargetFeature(Record *Def) const {
assert(Def->isSubClassOf("Predicate") && "Invalid predicate type!");
const auto &I = SubtargetFeatures.find(Def);
return I == SubtargetFeatures.end() ? nullptr : &I->second;
}
RecordKeeper &getRecords() const {
return Records;
}
bool hasOptionalOperands() const {
return find_if(Classes, [](const ClassInfo &Class) {
return Class.IsOptional;
}) != Classes.end();
}
};
} // end anonymous namespace
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MatchableInfo::dump() const {
errs() << TheDef->getName() << " -- " << "flattened:\"" << AsmString <<"\"\n";
for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
const AsmOperand &Op = AsmOperands[i];
errs() << " op[" << i << "] = " << Op.Class->ClassName << " - ";
errs() << '\"' << Op.Token << "\"\n";
}
}
#endif
static std::pair<StringRef, StringRef>
parseTwoOperandConstraint(StringRef S, ArrayRef<SMLoc> Loc) {
// Split via the '='.
std::pair<StringRef, StringRef> Ops = S.split('=');
if (Ops.second == "")
PrintFatalError(Loc, "missing '=' in two-operand alias constraint");
// Trim whitespace and the leading '$' on the operand names.
size_t start = Ops.first.find_first_of('$');
if (start == std::string::npos)
PrintFatalError(Loc, "expected '$' prefix on asm operand name");
Ops.first = Ops.first.slice(start + 1, std::string::npos);
size_t end = Ops.first.find_last_of(" \t");
Ops.first = Ops.first.slice(0, end);
// Now the second operand.
start = Ops.second.find_first_of('$');
if (start == std::string::npos)
PrintFatalError(Loc, "expected '$' prefix on asm operand name");
Ops.second = Ops.second.slice(start + 1, std::string::npos);
end = Ops.second.find_last_of(" \t");
Ops.first = Ops.first.slice(0, end);
return Ops;
}
void MatchableInfo::formTwoOperandAlias(StringRef Constraint) {
// Figure out which operands are aliased and mark them as tied.
std::pair<StringRef, StringRef> Ops =
parseTwoOperandConstraint(Constraint, TheDef->getLoc());
// Find the AsmOperands that refer to the operands we're aliasing.
int SrcAsmOperand = findAsmOperandNamed(Ops.first);
int DstAsmOperand = findAsmOperandNamed(Ops.second);
if (SrcAsmOperand == -1)
PrintFatalError(TheDef->getLoc(),
"unknown source two-operand alias operand '" + Ops.first +
"'.");
if (DstAsmOperand == -1)
PrintFatalError(TheDef->getLoc(),
"unknown destination two-operand alias operand '" +
Ops.second + "'.");
// Find the ResOperand that refers to the operand we're aliasing away
// and update it to refer to the combined operand instead.
for (ResOperand &Op : ResOperands) {
if (Op.Kind == ResOperand::RenderAsmOperand &&
Op.AsmOperandNum == (unsigned)SrcAsmOperand) {
Op.AsmOperandNum = DstAsmOperand;
break;
}
}
// Remove the AsmOperand for the alias operand.
AsmOperands.erase(AsmOperands.begin() + SrcAsmOperand);
// Adjust the ResOperand references to any AsmOperands that followed
// the one we just deleted.
for (ResOperand &Op : ResOperands) {
switch(Op.Kind) {
default:
// Nothing to do for operands that don't reference AsmOperands.
break;
case ResOperand::RenderAsmOperand:
if (Op.AsmOperandNum > (unsigned)SrcAsmOperand)
--Op.AsmOperandNum;
break;
case ResOperand::TiedOperand:
if (Op.TiedOperandNum > (unsigned)SrcAsmOperand)
--Op.TiedOperandNum;
break;
}
}
}
/// extractSingletonRegisterForAsmOperand - Extract singleton register,
/// if present, from specified token.
static void
extractSingletonRegisterForAsmOperand(MatchableInfo::AsmOperand &Op,
const AsmMatcherInfo &Info,
StringRef RegisterPrefix) {
StringRef Tok = Op.Token;
// If this token is not an isolated token, i.e., it isn't separated from
// other tokens (e.g. with whitespace), don't interpret it as a register name.
if (!Op.IsIsolatedToken)
return;
if (RegisterPrefix.empty()) {
std::string LoweredTok = Tok.lower();
if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(LoweredTok))
Op.SingletonReg = Reg->TheDef;
return;
}
if (!Tok.startswith(RegisterPrefix))
return;
StringRef RegName = Tok.substr(RegisterPrefix.size());
if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(RegName))
Op.SingletonReg = Reg->TheDef;
// If there is no register prefix (i.e. "%" in "%eax"), then this may
// be some random non-register token, just ignore it.
}
void MatchableInfo::initialize(const AsmMatcherInfo &Info,
SmallPtrSetImpl<Record*> &SingletonRegisters,
AsmVariantInfo const &Variant,
bool HasMnemonicFirst) {
AsmVariantID = Variant.AsmVariantNo;
AsmString =
CodeGenInstruction::FlattenAsmStringVariants(AsmString,
Variant.AsmVariantNo);
tokenizeAsmString(Info, Variant);
// The first token of the instruction is the mnemonic, which must be a
// simple string, not a $foo variable or a singleton register.
if (AsmOperands.empty())
PrintFatalError(TheDef->getLoc(),
"Instruction '" + TheDef->getName() + "' has no tokens");
assert(!AsmOperands[0].Token.empty());
if (HasMnemonicFirst) {
Mnemonic = AsmOperands[0].Token;
if (Mnemonic[0] == '$')
PrintFatalError(TheDef->getLoc(),
"Invalid instruction mnemonic '" + Mnemonic + "'!");
// Remove the first operand, it is tracked in the mnemonic field.
AsmOperands.erase(AsmOperands.begin());
} else if (AsmOperands[0].Token[0] != '$')
Mnemonic = AsmOperands[0].Token;
// Compute the require features.
for (Record *Predicate : TheDef->getValueAsListOfDefs("Predicates"))
if (const SubtargetFeatureInfo *Feature =
Info.getSubtargetFeature(Predicate))
RequiredFeatures.push_back(Feature);
// Collect singleton registers, if used.
for (MatchableInfo::AsmOperand &Op : AsmOperands) {
extractSingletonRegisterForAsmOperand(Op, Info, Variant.RegisterPrefix);
if (Record *Reg = Op.SingletonReg)
SingletonRegisters.insert(Reg);
}
const RecordVal *DepMask = TheDef->getValue("DeprecatedFeatureMask");
if (!DepMask)
DepMask = TheDef->getValue("ComplexDeprecationPredicate");
HasDeprecation =
DepMask ? !DepMask->getValue()->getAsUnquotedString().empty() : false;
}
/// Append an AsmOperand for the given substring of AsmString.
void MatchableInfo::addAsmOperand(StringRef Token, bool IsIsolatedToken) {
AsmOperands.push_back(AsmOperand(IsIsolatedToken, Token));
}
/// tokenizeAsmString - Tokenize a simplified assembly string.
void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info,
AsmVariantInfo const &Variant) {
StringRef String = AsmString;
size_t Prev = 0;
bool InTok = false;
bool IsIsolatedToken = true;
for (size_t i = 0, e = String.size(); i != e; ++i) {
char Char = String[i];
if (Variant.BreakCharacters.find(Char) != std::string::npos) {
if (InTok) {
addAsmOperand(String.slice(Prev, i), false);
Prev = i;
IsIsolatedToken = false;
}
InTok = true;
continue;
}
if (Variant.TokenizingCharacters.find(Char) != std::string::npos) {
if (InTok) {
addAsmOperand(String.slice(Prev, i), IsIsolatedToken);
InTok = false;
IsIsolatedToken = false;
}
addAsmOperand(String.slice(i, i + 1), IsIsolatedToken);
Prev = i + 1;
IsIsolatedToken = true;
continue;
}
if (Variant.SeparatorCharacters.find(Char) != std::string::npos) {
if (InTok) {
addAsmOperand(String.slice(Prev, i), IsIsolatedToken);
InTok = false;
}
Prev = i + 1;
IsIsolatedToken = true;
continue;
}
switch (Char) {
case '\\':
if (InTok) {
addAsmOperand(String.slice(Prev, i), false);
InTok = false;
IsIsolatedToken = false;
}
++i;
assert(i != String.size() && "Invalid quoted character");
addAsmOperand(String.slice(i, i + 1), IsIsolatedToken);
Prev = i + 1;
IsIsolatedToken = false;
break;
case '$': {
if (InTok) {
addAsmOperand(String.slice(Prev, i), false);
InTok = false;
IsIsolatedToken = false;
}
// If this isn't "${", start new identifier looking like "$xxx"
if (i + 1 == String.size() || String[i + 1] != '{') {
Prev = i;
break;
}
size_t EndPos = String.find('}', i);