1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
|
// ir-specialize.cpp
#include "ir-specialize.h"
#include "ir.h"
#include "ir-clone.h"
#include "ir-insts.h"
namespace Slang
{
// This file implements the primary specialization pass, that takes
// generic/polymorphic Slang code and specializes/monomorphises it.
//
// At present this primarily means generating specialized copies
// of generic functions/types based on the concrete types used
// at specialization sites, and also specializing instances
// of witness-table lookup to directly refer to the concrete
// values for witnesses when witness tables are known.
//
// This pass also performs some amount of simplification and
// specialization for code using existential (interface) types
// for local variables and function parameters/results.
//
// Eventually, this pass will also need to perform specialization
// of functions to argument values for parameters that must
// be compile-time constants,
//
// All of these passes are inter-related in that applying
// simplifications/specializations of one category can open
// up opportunities for transformations in the other categories.
struct SpecializationContext;
IRInst* specializeGenericImpl(
IRGeneric* genericVal,
IRSpecialize* specializeInst,
IRModule* module,
SpecializationContext* context);
struct SpecializationContext
{
// For convenience, we will keep a pointer to the module
// we are specializing.
IRModule* module;
// We know that we can only perform generic specialization when all
// of the arguments to a generic are also fully specialized.
// The "is fully specialized" condition is something we
// need to solve for over the program, because the fully-
// specialized-ness of an instruction depends on the
// fully-specialized-ness of its operands.
//
// We will build an explicit hash set to encode those
// instructions that are fully specialized.
//
HashSet<IRInst*> fullySpecializedInsts;
// An instruction is then fully specialized if and only
// if it is in our set.
//
bool isInstFullySpecialized(
IRInst* inst)
{
// A small wrinkle is that a null instruction pointer
// sometimes appears a a type, and so should be treated
// as fully specialized too.
//
// TODO: It would be nice to remove this wrinkle.
//
if(!inst) return true;
return fullySpecializedInsts.Contains(inst);
}
// When an instruction isn't fully specialized, but its operands *are*
// then it is a candidate for specialization itself, so we will have
// a query to check for the "all operands fully specialized" case.
//
bool areAllOperandsFullySpecialized(
IRInst* inst)
{
if(!isInstFullySpecialized(inst->getFullType()))
return false;
UInt operandCount = inst->getOperandCount();
for(UInt ii = 0; ii < operandCount; ++ii)
{
IRInst* operand = inst->getOperand(ii);
if(!isInstFullySpecialized(operand))
return false;
}
return true;
}
// We will use a single work list of instructions that need
// to be considered for specialization or simplification,
// whether generic, existential, etc.
//
List<IRInst*> workList;
void addToWorkList(
IRInst* inst)
{
// We will ignore any code that is nested under a generic,
// because it doesn't make sense to perform specialization
// on such code.
//
for( auto ii = inst->getParent(); ii; ii = ii->getParent() )
{
if(as<IRGeneric>(ii))
return;
}
workList.Add(inst);
}
// When a transformation makes a change to an instruction,
// we may need to re-consider transformations for instructions
// that use its value. In those cases we will call `addUsersToWorkList`
// on the instruction that is being modified or replaced.
//
void addUsersToWorkList(
IRInst* inst)
{
for( auto use = inst->firstUse; use; use = use->nextUse )
{
auto user = use->getUser();
addToWorkList(user);
}
}
// One of the main transformations we will apply is to
// consider an instruction as being fully specialized.
//
void markInstAsFullySpecialized(
IRInst* inst)
{
if(fullySpecializedInsts.Contains(inst))
return;
fullySpecializedInsts.Add(inst);
// If we know that an instruction is fully specialized,
// then we should start to consider its uses and children
// as candidates for being fully specialized too...
//
addUsersToWorkList(inst);
}
// Of course, somewhere along the way we expect
// to run into uses of `specialize(...)` instructions
// to bind a generic to arguments that we want to
// specialize into concrete code.
//
// We also know that if we encouter `specialize(g, a, b, c)`
// and then later `specialize(g, a, b, c)` again, we
// only want to generate the specialized code for `g<a,b,c>`
// *once*, and re-use it for both versions.
//
// We will cache existing specializations of generic function/types
// using the simple key type defined as part of the IR cloning infrastructure.
//
typedef IRSimpleSpecializationKey Key;
Dictionary<Key, IRInst*> genericSpecializations;
// We will also use some shared IR building state across
// all of our specialization/cloning steps.
//
SharedIRBuilder sharedBuilderStorage;
// Now let's look at the task of finding or generation a
// specialization of some generic `g`, given a specialization
// instruction like `specialize(g, a, b, c)`.
//
// The `specializeGeneric` function will return a value
// suitable for use as a replacement for the `specialize(...)`
// instruction.
//
IRInst* specializeGeneric(
IRGeneric* genericVal,
IRSpecialize* specializeInst)
{
// First, we want to see if an existing specialization
// has already been made. To do that we will construct a key
// for lookup in the generic specialization context.
//
// Our key will consist of the identity of the generic
// being specialized, and each of the argument values
// being pased to it. In our hypothetical example of
// `specialize(g, a, b, c)` the key will then be
// the array `[g, a, b, c]`.
//
Key key;
key.vals.Add(specializeInst->getBase());
UInt argCount = specializeInst->getArgCount();
for( UInt ii = 0; ii < argCount; ++ii )
{
key.vals.Add(specializeInst->getArg(ii));
}
{
// We use our generated key to look for an
// existing specialization that has been registered.
// If one is found, our work is done.
//
IRInst* specializedVal = nullptr;
if(genericSpecializations.TryGetValue(key, specializedVal))
return specializedVal;
}
// If no existing specialization is found, we need
// to create the specialization instead.
// This mostly amounts to evaluating the generic as
// if it were a function being called.
//
// We will use a free function to do the actual work
// of evaluating the generic, so that the logic
// can be re-used in other cases that need to
// do one-off specialization.
//
IRInst* specializedVal = specializeGenericImpl(genericVal, specializeInst, module, this);
// The value that was returned from evaluating
// the generic is the specialized value, and we
// need to remember it in our dictionary of
// specializations so that we don't instantiate
// this generic again for the same arguments.
//
genericSpecializations.Add(key, specializedVal);
return specializedVal;
}
// The logic for generating a specialization of an IR generic
// relies on the ability to "evaluate" the code in the body of
// the generic, but that obviously doesn't work if we don't
// actually have the full definition for the body.
//
// This can arise in particular for builtin operations/types.
//
// Before calling `specializeGeneric()` we need to make sure
// that the generic is actually amenable to specialization,
// by looking at whether it is a definition or a declaration.
//
bool canSpecializeGeneric(
IRGeneric* generic)
{
// It is possible to have multiple "layers" of generics
// (e.g., when a generic function is nested in a generic
// type). Therefore we need to drill down through all
// of the layers present to see if at the leaf we have
// something that looks like a definition.
//
IRGeneric* g = generic;
for(;;)
{
// Given the generic `g`, we will find the value
// it appears to return in its body.
//
auto val = findGenericReturnVal(g);
if(!val)
return false;
// If `g` returns an inner generic, then we need
// to drill down further.
//
if (auto nestedGeneric = as<IRGeneric>(val))
{
g = nestedGeneric;
continue;
}
// Once we've found the leaf value that will be produced
// after all specialization is complete, we can check
// whether it looks like a definition or not.
//
return isDefinition(val);
}
}
// Now that we know when we can specialize a generic, and how
// to do it, we can write a subroutine that takes a
// `specialize(g, a, b, c, ...)` instruction and performs
// specialization if it is possible.
//
void maybeSpecializeGeneric(
IRSpecialize* specInst)
{
// We will only attempt to specialize when all of the
// operands to the `speicalize(...)` instruction are
// themselves fully specialized.
//
if(!areAllOperandsFullySpecialized(specInst))
return;
// The invariant that the arguments are fully specialized
// should mean that `a, b, c, ...` are in a form that
// we can work with, but it does *not* guarantee
// that the `g` operand is something we can work with.
//
// We can only perform specialization in the case where
// the base `g` is a known `generic` instruction.
//
auto baseVal = specInst->getBase();
auto genericVal = as<IRGeneric>(baseVal);
if(!genericVal)
return;
// We can also only specialize a generic if it
// represents a definition rather than a declaration.
//
if(!canSpecializeGeneric(genericVal))
return;
// Once we know that specialization is possible,
// the actual work is fairly simple.
//
// First, we find or generate a specialized
// version of the result of the generic (a specialized
// type, function, or whatever).
//
auto specializedVal = specializeGeneric(genericVal, specInst);
// Any uses of this `specialize(...)` instruction will
// become uses of `specializeVal`, so we want to re-consider
// them for subsequent transformations.
//
addUsersToWorkList(specInst);
// Then we simply replace any uses of the `specialize(...)`
// instruction with the specialized value and delete
// the `specialize(...)` instruction from existence.
//
specInst->replaceUsesWith(specializedVal);
specInst->removeAndDeallocate();
}
// Generic specialization depends on identifying when
// instructions are fully specialized.
//
void maybeMarkAsFullySpecialized(
IRInst* inst)
{
switch(inst->op)
{
default:
// The default case is that an instruction can
// be considered as fully specialized as soon
// as all of its operands are.
//
// TODO: We realistically need a more refined
// check here that uses a white-list of instructions
// that can represent values suitable for use
// as generic arguments.
//
if(areAllOperandsFullySpecialized(inst))
{
markInstAsFullySpecialized(inst);
}
break;
// Certain instructions cannot ever be considered
// fully specialized because they should never
// be substituted into a generic as its arguments.
case kIROp_Specialize:
case kIROp_lookup_interface_method:
case kIROp_ExtractExistentialType:
break;
}
}
// The core of this pass is to look at one instruction
// at a time, and try to perform whatever specialization
// is appropriate based on its opcode.
//
void maybeSpecializeInst(
IRInst* inst)
{
switch(inst->op)
{
default:
// By default we assume that specialization is
// not possible for a given opcode.
//
break;
case kIROp_Specialize:
// The logic for specializing a `specialize(...)`
// instruction has already been elaborated above.
//
maybeSpecializeGeneric(cast<IRSpecialize>(inst));
break;
case kIROp_lookup_interface_method:
// The remaining case we need to consider here for generics
// is when we have a `lookup_witness_method` instruction
// that is being applied to a concrete witness table,
// because we can specialize it to just be a direct
// reference to the actual witness value from the table.
//
maybeSpecializeWitnessLookup(cast<IRLookupWitnessMethod>(inst));
break;
case kIROp_Call:
// When writing functions with existential-type parameters,
// we need additional support to specialize a callee
// function based on the concrete type encapsulated in
// an argument of existential type.
//
maybeSpecializeExistentialsForCall(cast<IRCall>(inst));
break;
// The specialization of functions with existential-type
// parameters can create further opportunities for specialization,
// but in order to realize these we often need to propagate
// through local simplification on values of existential type.
//
case kIROp_ExtractExistentialType:
maybeSpecializeExtractExistentialType(inst);
break;
case kIROp_ExtractExistentialValue:
maybeSpecializeExtractExistentialValue(inst);
break;
case kIROp_ExtractExistentialWitnessTable:
maybeSpecializeExtractExistentialWitnessTable(inst);
break;
}
}
// Specializing lookup on witness tables is a general
// transformation that helps with both generic and
// existential-based code.
//
void maybeSpecializeWitnessLookup(
IRLookupWitnessMethod* lookupInst)
{
// Note: While we currently have named the instruction
// `lookup_witness_method`, the `method` part is a misnomer
// and the same instruction can look up *any* interface
// requirement based on the witness table that provides
// a conformance, and the "key" that indicates the interface
// requirement.
// We can only specialize in the case where the lookup
// is being done on a concrete witness table, and not
// the result of a `specialize` instruction or other
// operation that will yield such a table.
//
auto witnessTable = as<IRWitnessTable>(lookupInst->getWitnessTable());
if(!witnessTable)
return;
// Because we have a concrete witness table, we can
// use it to look up the IR value that satisfies
// the given interface requirement.
//
auto requirementKey = lookupInst->getRequirementKey();
auto satisfyingVal = findWitnessVal(witnessTable, requirementKey);
// We expect to always find a satisfying value, but
// we will go ahead and code defensively so that
// we leave "correct" but unspecialized code if
// we cannot find a concrete value to use.
//
if(!satisfyingVal)
return;
// At this point, we know that `satisfyingVal` is what
// would result from executing this `lookup_witness_method`
// instruction dynamically, so we can go ahead and
// replace the original instruction with that value.
//
// We also make sure to add any uses of the lookup
// instruction to our work list, because subsequent
// simplifications might be possible now.
//
addUsersToWorkList(lookupInst);
lookupInst->replaceUsesWith(satisfyingVal);
lookupInst->removeAndDeallocate();
}
// The above subroutine needed a way to look up
// the satisfying value for a given requirement
// key in a concrete witness table, so let's
// define that now.
//
IRInst* findWitnessVal(
IRWitnessTable* witnessTable,
IRInst* requirementKey)
{
// A witness table is basically just a container
// for key-value pairs, and so the best we can
// do for now is a naive linear search.
//
for( auto entry : witnessTable->getEntries() )
{
if (requirementKey == entry->getRequirementKey())
{
return entry->getSatisfyingVal();
}
}
return nullptr;
}
// All of the machinery for generic specialization
// has been defined above, so we will now walk
// through the flow of the overall specialization pass.
//
void processModule()
{
// We start by initializing our shared IR building state,
// since we will re-use that state for any code we
// generate along the way.
//
SharedIRBuilder* sharedBuilder = &sharedBuilderStorage;
sharedBuilder->module = module;
sharedBuilder->session = module->session;
// The unspecialized IR we receive as input will have
// `IRBindGlobalGenericParam` instructions that associate
// each global-scope generic parameter (a type, witness
// table, or what-have-you) with the value that it should
// be bound to for the purposes of this code-generation
// pass.
//
// Before doing any other specialization work, we will
// iterate over these instructions (which may only
// appear at the global scope) and use them to drive
// replacement of the given generic type parameter with
// the desired concrete value.
//
// TODO: When we start to support global shader parameters
// that include existential/interface types, we will need
// to support a similar specialization step for them.
//
specializeGlobalGenericParameters();
// Now that we've eliminated all cases of global generic parameters,
// we should now have the properties that:
//
// 1. Execution starts in non-generic code, with no unbound
// generic parameters in scope.
//
// 2. Any case where non-generic code makes use of a generic
// type/function, there will be a `specialize` instruction
// that specifies both the generic and the (concrete) type
// arguments that should be provided to it.
//
// The basic approach now is to look for opportunities to apply
// our specialization rules (e.g., a `specialize` instruction
// where all the type arguments are concrete types) and then
// processing any additional opportunities created along the way.
//
// We start out simple by putting the root instruction for the
// module onto our work list.
//
addToWorkList(module->getModuleInst());
// We will then iterate until our work list goes dry.
//
while(workList.Count() != 0)
{
IRInst* inst = workList.Last();
workList.RemoveLast();
// For each instruction we process, we want to perform
// a few steps.
//
// First we will do any checking required to tag an
// instruction as being fully specialized.
//
maybeMarkAsFullySpecialized(inst);
// Next we will look for all the general-purpose
// specialization opportunities (generic specialization,
// existential specialization, simplifications, etc.)
//
maybeSpecializeInst(inst);
// Finally, we need to make our logic recurse through
// the whole IR module, so we want to add the children
// of any parent instructions to our work list so that
// we process them too.
//
// Note that we are adding the children of an instruction
// in reverse order. This is because the way we are
// using the work list treats it like a stack (LIFO) and
// we know that fully-specialized-ness will tend to flow
// top-down through the program, so that we want to process
// the children of an instruction in their original order.
//
for(auto child = inst->getLastChild(); child; child = child->getPrevInst())
{
// Also note that `addToWorkList` has been written
// to avoid adding any instruction that is a descendent
// of an IR generic, because we don't actually want
// to perform specialization inside of generics.
//
addToWorkList(child);
}
}
// Once the work list has gone dry, we should have the invariant
// that there are no `specialize` instructions inside of non-generic
// functions that in turn reference a generic type/function, *except*
// in the case where that generic is for a builtin type/function, in
// which case we wouldn't want to specialize it anyway.
}
// Given a `call` instruction in the IR, we need to detect the case
// where the callee has some interface-type parameter(s) and at the
// call site it is statically clear what concrete type(s) the arguments
// will have.
//
void maybeSpecializeExistentialsForCall(IRCall* inst)
{
// We can only specialize a call when the callee function is known.
//
auto calleeFunc = as<IRFunc>(inst->getCallee());
if(!calleeFunc)
return;
// We can only specialize if we have access to a body for the callee.
//
if(!calleeFunc->isDefinition())
return;
// We shouldn't bother specializing unless the callee has at least
// one parameter that has an existential/interface type.
//
bool shouldSpecialize = false;
UInt argCounter = 0;
for( auto param : calleeFunc->getParams() )
{
auto arg = inst->getArg(argCounter++);
if( !isExistentialType(param->getDataType()) )
continue;
shouldSpecialize = true;
// We *cannot* specialize unless the argument value corresponding
// to such a parameter is one we can specialize.
//
if( !canSpecializeExistentialArg(arg))
return;
}
// If we never found a parameter worth specializing, we should bail out.
//
if(!shouldSpecialize)
return;
// At this point, we believe we *should* and *can* specialize.
//
// We need a specialized variant of the callee (with the concrete
// types substituted in for existential-type parameters), and then
// we can replace the call site to call the new function instead.
//
// Any two call sites where the argument types are the same can
// re-use the same callee, so we will cache and re-use the
// specialized functions that we generate (similar to how generic
// specialization works). Therefore we will construct a key
// for use when caching the specialized functions.
//
IRSimpleSpecializationKey key;
// The specialized callee will always depend on the unspecialized
// function from which it is generated, so we add that to our key.
//
key.vals.Add(calleeFunc);
// Also, for any parameter that has an existential type, the
// specialized function will depend on the concrete type of the
// argument.
//
argCounter = 0;
for( auto param : calleeFunc->getParams() )
{
auto arg = inst->getArg(argCounter++);
if( !isExistentialType(param->getDataType()) )
continue;
if( auto makeExistential = as<IRMakeExistential>(arg) )
{
// Note that we use the *type* stored in the
// existential-type argument, but not anything to
// do with the particular value (otherwise we'd only
// be able to re-use the specialized callee for
// call sites that pass in the exact same argument).
//
auto val = makeExistential->getWrappedValue();
auto valType = val->getFullType();
key.vals.Add(valType);
// We are also including the witness table in the key.
// This isn't required with our current language model,
// since a given type can only conform to a given interface
// in one way (so there can be only one witness table).
// That means that the `valType` and the existential
// type of `param` above should uniquely determine
// the witness table we see.
//
// There are forward-looking cases where supporting
// "overlapping conformances" could be required, and
// there is low incremental cost to future-proofing
// this code, so we go ahead and add the witness
// table even if it is redundant.
//
auto witnessTable = makeExistential->getWitnessTable();
key.vals.Add(witnessTable);
}
else
{
SLANG_UNEXPECTED("missing case for existential argument");
}
}
// Once we've constructed our key, we can try to look for an
// existing specialization of the callee that we can use.
//
IRFunc* specializedCallee = nullptr;
if( !existentialSpecializedFuncs.TryGetValue(key, specializedCallee) )
{
// If we didn't find a specialized callee already made, then we
// will go ahead and create one, and then register it in our cache.
//
specializedCallee = createExistentialSpecializedFunc(inst, calleeFunc);
existentialSpecializedFuncs.Add(key, specializedCallee);
}
// At this point we have found or generated a specialized version
// of the callee, and we need to emit a call to it.
//
// We will start by constructing the argument list for the new call.
//
argCounter = 0;
List<IRInst*> newArgs;
for( auto param : calleeFunc->getParams() )
{
auto arg = inst->getArg(argCounter++);
// How we handle each argument depends on whether the corresponding
// parameter has an existential type or not.
//
if( !isExistentialType(param->getDataType()) )
{
// If the parameter doesn't have an existential type, then we
// don't want to change up the argument we pass at all.
//
newArgs.Add(arg);
}
else
{
// Any place where the original function had a parameter of
// existential type, we will now be passing in the concrete
// argument value instead of an existential wrapper.
//
if( auto makeExistential = as<IRMakeExistential>(arg) )
{
auto val = makeExistential->getWrappedValue();
newArgs.Add(val);
}
else
{
SLANG_UNEXPECTED("missing case for existential argument");
}
}
}
// Now that we've built up our argument list, it is simple enough
// to construct a new `call` instruction.
//
IRBuilder builderStorage;
auto builder = &builderStorage;
builder->sharedBuilder = &sharedBuilderStorage;
builder->setInsertBefore(inst);
auto newCall = builder->emitCallInst(
inst->getFullType(), specializedCallee, newArgs);
// We will completely replace the old `call` instruction with the
// new one, and will go so far as to transfer any decorations
// that were attached to the old call over to the new one.
//
inst->transferDecorationsTo(newCall);
inst->replaceUsesWith(newCall);
inst->removeAndDeallocate();
// Just in case, we will add any instructions that used the
// result of this call to our work list for re-consideration.
// At this moment this shouldn't open up new opportunities
// for specialization, but we can always play it safe.
//
addUsersToWorkList(newCall);
}
// The above `maybeSpecializeExistentialsForCall` routine needed
// a few utilities, which we will now define.
// First, we want to be able to test whether a type (used by
// a parameter) is an existential type so that we should specialize it.
//
bool isExistentialType(IRType* type)
{
// An IR-level interface type is always an existential.
//
if(as<IRInterfaceType>(type))
return true;
// Eventually we will also want to handle arrays over
// existential types, but that will require careful
// handling in many places.
return false;
}
// Similarly, we want to be able to test whether an instruction
// used as an argument for an existential-type parameter is
// suitable for use in specialization.
//
bool canSpecializeExistentialArg(IRInst* inst)
{
// A `makeExistential(v, w)` instruction can be used
// for specialization, since we have the concrete value `v`
// (which implicitly determines the concrete type), and
// the witness table `w.
//
if(as<IRMakeExistential>(inst))
return true;
// If we start to specialize functions that take arrays
// of existentials as input, we will need a strategy to
// determine arguments suitable for use in specializing
// them (these would need to be arrays that nominally
// have an existential element type, but somehow have
// annotations to indicate that the concrete type
// underlying the elements in homogeneous).
return false;
}
// In order to cache and re-use functions that have had existential-type
// parameters specialized, we need storage for the cache.
//
Dictionary<IRSimpleSpecializationKey, IRFunc*> existentialSpecializedFuncs;
// The logic for creating a specialized callee function by plugging
// in concrete types for existentials is similar to other cases of
// specialization in the compiler.
//
IRFunc* createExistentialSpecializedFunc(
IRCall* oldCall,
IRFunc* oldFunc)
{
// We will make use of the infrastructure for cloning
// IR code, that is defined in `ir-clone.{h,cpp}`.
//
// In order to do the cloning work we need an
// "environment" that will map old values to
// their replacements.
//
IRCloneEnv cloneEnv;
// We also need some IR building state, for any
// new instructions we will emit.
//
IRBuilder builderStorage;
auto builder = &builderStorage;
builder->sharedBuilder = &sharedBuilderStorage;
// We will start out by determining what the parameters
// of the specialized function should be, based on
// the parameters of the original, and the concrete
// type of selected arguments at the call site.
//
// Along the way we will build up explicit lists of
// the parameters, as well as any new instructions
// that need to be added to the body of the function
// we generate (as a kind of "prologue"). We build
// the lists here because we don't yet have a basic
// block, or even a function, to insert them into.
//
List<IRParam*> newParams;
List<IRInst*> newBodyInsts;
UInt argCounter = 0;
for( auto oldParam : oldFunc->getParams() )
{
auto arg = oldCall->getArg(argCounter++);
// Given an old parameter, and the argument value at
// the (old) call site, we need to determine what
// value should stand in for that parameter in
// the specialized callee.
//
IRInst* replacementVal = nullptr;
if( !isExistentialType(oldParam->getDataType()) )
{
// For parameters that don't have an existential type,
// there is nothing interesting to do. The new function
// will also have a parameter of the exact same type,
// and we'll use that instead of the original parameter.
//
auto newParam = builder->createParam(oldParam->getFullType());
newParams.Add(newParam);
replacementVal = newParam;
}
else
{
// The trickier case is when we have an existential-type
// parameter, because we need to extract out the concrete
// type that is coming from the call site.
//
if( auto oldMakeExistential = as<IRMakeExistential>(arg) )
{
// In this case, the `arg` is `makeExistential(val, witnessTable)`
// and we know that the specialized call site will just be
// passing in `val`.
//
auto val = oldMakeExistential->getWrappedValue();
auto witnessTable = oldMakeExistential->getWitnessTable();
// Our specialized function needs to take a parameter with the
// same type as `val`, to match the call site(s) that will be
// created.
//
auto valType = val->getFullType();
auto newParam = builder->createParam(valType);
newParams.Add(newParam);
// Within the body of the function we cannot just use `val`
// directly, because the existing code expects an existential
// value, including its witness table.
//
// Therefore we will create a `makeExistential(newParam, witnessTable)`
// in the body of the new function and use *that* as the replacement
// value for the original parameter (since it will have the
// correct existential type, and stores the right witness table).
//
auto newMakeExistential = builder->emitMakeExistential(oldParam->getFullType(), newParam, witnessTable);
newBodyInsts.Add(newMakeExistential);
replacementVal = newMakeExistential;
}
else
{
SLANG_UNEXPECTED("missing case for existential argument");
}
}
// Whatever replacement value was constructed, we need to
// register it as the replacement for the original parameter.
//
cloneEnv.mapOldValToNew.Add(oldParam, replacementVal);
}
// Next we will create the skeleton of the new
// specialized function, including its type.
//
// In order to construct the type of the new function, we
// need to extract the types of all its parameters.
//
List<IRType*> newParamTypes;
for( auto newParam : newParams )
{
newParamTypes.Add(newParam->getFullType());
}
IRType* newFuncType = builder->getFuncType(
newParamTypes.Count(),
newParamTypes.Buffer(),
oldFunc->getResultType());
IRFunc* newFunc = builder->createFunc();
newFunc->setFullType(newFuncType);
// By construction, our new function type will be
// "fully specialized" by the rules used for doing
// generic specialization elsewhere in this pass.
//
fullySpecializedInsts.Add(newFuncType);
// The above steps have accomplished the "first phase"
// of cloning the function (since `IRFunc`s have no
// operands).
//
// We can now use the shared IR cloning infrastructure
// to perform the second phase of cloning, which will recursively
// clone any nested decorations, blocks, and instructions.
//
cloneInstDecorationsAndChildren(
&cloneEnv,
builder->sharedBuilder,
oldFunc,
newFunc);
// Now that the main body of existing isntructions have
// been cloned into the new function, we can go ahead
// and insert all the parameters and body instructions
// we built up into the function at the right place.
//
// We expect the function to always have at least one
// block (this was an invariant established before
// we decided to specialize).
//
auto newEntryBlock = newFunc->getFirstBlock();
SLANG_ASSERT(newEntryBlock);
// We expect every valid block to have at least one
// "ordinary" instruction (it will at least have
// a terminator like a `return`).
//
auto newFirstOrdinary = newEntryBlock->getFirstOrdinaryInst();
SLANG_ASSERT(newFirstOrdinary);
// All of our parameters will get inserted before
// the first ordinary instruction (since the function parameters
// should come at the start of the first block).
//
for( auto newParam : newParams )
{
newParam->insertBefore(newFirstOrdinary);
}
// All of our new body instructions will *also* be inserted
// before the first ordinary instruction (but will come
// *after* the parameters by the order of these two loops).
//
for( auto newBodyInst : newBodyInsts )
{
newBodyInst->insertBefore(newFirstOrdinary);
}
// After all this work we have a valid `newFunc` that has been
// specialized to match the types at the call site.
//
// There might be further opportunities for simplification and
// specialization in the function body now that we've plugged
// in some more concrete type information, so we will
// add the whole function to our work list for subsequent
// consideration.
//
addToWorkList(newFunc);
return newFunc;
}
// When we've specialized a function with an interface-type parameter
// we will still end up with a `makeExistential` operation in its
// body, which could impede subequent specializations.
//
// For example, if we have the following after specialization:
//
// e = makeExistential(v, w1);
// w2 = extractExistentialWitnessTable(e);
// f = lookup_witness_method(w2, k);
// call(f, ...);
//
// We cannot then specialize the lookup for `f` in this code as written,
// but it seems obvious that we could replace `w2` with `w1` and maybe
// get further along.
//
// In order to set up further specialization opportunities we need
// to implement a few simplification rules around operations that
// extract from an existential, when their operand is a `makeExistential`.
//
// Let's start with the routine for the case above of extracting
// a witness table.
//
void maybeSpecializeExtractExistentialWitnessTable(IRInst* inst)
{
// We know `inst` is `extractExistentialWitnessTable(existentialArg)`.
//
auto existentialArg = inst->getOperand(0);
if( auto makeExistential = as<IRMakeExistential>(existentialArg) )
{
// In this case we know `inst` is:
//
// extractExistentialWitnessTable(makeExistential(..., witnessTable))
//
// and we can just simplify that to `witnessTable`.
//
auto witnessTable = makeExistential->getWitnessTable();
// Anything that used this instruction is now a candidate for
// further simplification or specialization (e.g., one of
// the users of this instruction could be a `lookup_witness_method`
// that we can now specialize).
//
addUsersToWorkList(inst);
inst->replaceUsesWith(witnessTable);
inst->removeAndDeallocate();
}
}
// The cases for simplifying `extractExistentialValue` is more or less the same
// as for witness tables.
//
void maybeSpecializeExtractExistentialValue(IRInst* inst)
{
// We know `inst` is `extractExistentialValue(existentialArg)`.
//
auto existentialArg = inst->getOperand(0);
if( auto makeExistential = as<IRMakeExistential>(existentialArg) )
{
// Now we know `inst` is:
//
// extractExistentialValue(makeExistential(val, ...))
//
// and we can just simplify that to `val`.
//
auto val = makeExistential->getWrappedValue();
addUsersToWorkList(inst);
inst->replaceUsesWith(val);
inst->removeAndDeallocate();
}
}
// The cases for simplifying `extractExistentialType` is more or less the same
// as for witness tables.
//
void maybeSpecializeExtractExistentialType(IRInst* inst)
{
// We know `inst` is `extractExistentialValue(existentialArg)`.
//
auto existentialArg = inst->getOperand(0);
if( auto makeExistential = as<IRMakeExistential>(existentialArg) )
{
// Now we know `inst` is:
//
// extractExistentialType(makeExistential(val, ...))
//
// and we can just simplify that to type type of `val`.
//
auto val = makeExistential->getWrappedValue();
auto valType = val->getFullType();
addUsersToWorkList(inst);
inst->replaceUsesWith(valType);
inst->removeAndDeallocate();
}
}
// The handling of specialization for global generic type
// parameters involves searching for all `bind_global_generic_param`
// instructions in the input module.
//
void specializeGlobalGenericParameters()
{
auto moduleInst = module->getModuleInst();
for(auto inst : moduleInst->getChildren())
{
// We only want to consider the `bind_global_generic_param`
// instructions, and ignore everything else.
//
auto bindInst = as<IRBindGlobalGenericParam>(inst);
if(!bindInst)
continue;
// HACK: Our current front-end emit logic can end up emitting multiple
// `bind_global_generic_param` instructions for the same parameter. This is
// a buggy behavior, but a real fix would require refactoring the way
// global generic arguments are specified today.
//
// For now we will do a sanity check to detect parameters that
// have already been specialized.
//
if( !as<IRGlobalGenericParam>(bindInst->getOperand(0)) )
{
// The "parameter" operand is no longer a parameter, so it
// seems things must have been specialized already.
//
continue;
}
// The actual logic for applying the substitution is
// almost trivial: we will replace any uses of the
// global generic parameter with its desired value.
//
auto param = bindInst->getParam();
auto val = bindInst->getVal();
param->replaceUsesWith(val);
}
{
// Now that we've replaced any uses of global generic
// parameters, we will do a second pass to remove
// the parameters and any `bind_global_generic_param`
// instructions, since both should be dead/unused.
//
IRInst* next = nullptr;
for(auto inst = moduleInst->getFirstChild(); inst; inst = next)
{
next = inst->getNextInst();
switch(inst->op)
{
default:
break;
case kIROp_GlobalGenericParam:
case kIROp_BindGlobalGenericParam:
// A `bind_global_generic_param` instruction should
// have no uses in the first place, and all the global
// generic parameters should have had their uses replaced.
//
SLANG_ASSERT(!inst->firstUse);
inst->removeAndDeallocate();
break;
}
}
}
}
};
void specializeModule(
IRModule* module)
{
SpecializationContext context;
context.module = module;
context.processModule();
}
IRInst* specializeGenericImpl(
IRGeneric* genericVal,
IRSpecialize* specializeInst,
IRModule* module,
SpecializationContext* context)
{
// Effectively, specializing a generic amounts to "calling" the generic
// on its concrete argument values and computing the
// result it returns.
//
// For now, all of our generics consist of a single
// basic block, so we can "call" them just by
// cloning the instructions in their single block
// into the global scope, using an environment for
// cloning that maps the generic parameters to
// the concrete arguments that were provided
// by the `specialize(...)` instruction.
//
IRCloneEnv env;
// We will walk through the parameters of the generic and
// register the corresponding argument of the `specialize`
// instruction to be used as the "cloned" value for each
// parameter.
//
// Suppose we are looking at `specialize(g, a, b, c)` and `g` has
// three generic parameters: `T`, `U`, and `V`. Then we will
// be initializing our environment to map `T -> a`, `U -> b`,
// and `V -> c`.
//
UInt argCounter = 0;
for( auto param : genericVal->getParams() )
{
UInt argIndex = argCounter++;
SLANG_ASSERT(argIndex < specializeInst->getArgCount());
IRInst* arg = specializeInst->getArg(argIndex);
env.mapOldValToNew.Add(param, arg);
}
// We will set up an IR builder for insertion
// into the global scope, at the same location
// as the original generic.
//
SharedIRBuilder sharedBuilderStorage;
sharedBuilderStorage.module = module;
sharedBuilderStorage.session = module->getSession();
IRBuilder builderStorage;
IRBuilder* builder = &builderStorage;
builder->sharedBuilder = &sharedBuilderStorage;
builder->setInsertBefore(genericVal);
// Now we will run through the body of the generic and
// clone each of its instructions into the global scope,
// until we reach a `return` instruction.
//
for( auto bb : genericVal->getBlocks() )
{
// We expect a generic to only ever contain a single block.
//
SLANG_ASSERT(bb == genericVal->getFirstBlock());
// We will iterate over the non-parameter ("ordinary")
// instructions only, because parameters were dealt
// with explictly at an earlier point.
//
for( auto ii : bb->getOrdinaryInsts() )
{
// The last block of the generic is expected to end with
// a `return` instruction for the specialized value that
// comes out of the abstraction.
//
// We thus use that cloned value as the result of the
// specialization step.
//
if( auto returnValInst = as<IRReturnVal>(ii) )
{
auto specializedVal = findCloneForOperand(&env, returnValInst->getVal());
return specializedVal;
}
// For any instruction other than a `return`, we will
// simply clone it completely into the global scope.
//
IRInst* clonedInst = cloneInst(&env, builder, ii);
// Any new instructions we create during cloning were
// not present when we initially built our work list,
// so we need to make sure to consider them now.
//
// This is important for the cases where one generic
// invokes another, because there will be `specialize`
// operations nested inside the first generic that refer
// to the second.
//
if( context )
{
context->addToWorkList(clonedInst);
}
}
}
// If we reach this point, something went wrong, because we
// never encountered a `return` inside the body of the generic.
//
SLANG_UNEXPECTED("no return from generic");
UNREACHABLE_RETURN(nullptr);
}
IRInst* specializeGeneric(
IRSpecialize* specializeInst)
{
auto baseGeneric = as<IRGeneric>(specializeInst->getBase());
SLANG_ASSERT(baseGeneric);
if(!baseGeneric) return specializeInst;
auto module = specializeInst->getModule();
SLANG_ASSERT(module);
if(!module) return specializeInst;
return specializeGenericImpl(baseGeneric, specializeInst, module, nullptr);
}
} // namespace Slang
|