diff options
| author | Konstantin <const@const.me> | 2023-01-20 15:20:20 +0100 |
|---|---|---|
| committer | Konstantin <const@const.me> | 2023-01-20 15:20:20 +0100 |
| commit | 062d01a9701a11468b1a46bd11f9349bb98671a6 (patch) | |
| tree | cc814846ed64a0583e536b032c4b816cf1d53309 | |
| parent | f3616a23f0c52586fe1911ffded035cd527b6719 (diff) | |
BIOS setting, enabled XMP for the memory
| -rw-r--r-- | SampleClips/columbia-large-vega8.txt | 78 | ||||
| -rw-r--r-- | SampleClips/columbia-medium-vega8.txt | 78 | ||||
| -rw-r--r-- | SampleClips/jfk-large-vega8.txt | 78 | ||||
| -rw-r--r-- | SampleClips/jfk-medium-vega8.txt | 78 |
4 files changed, 156 insertions, 156 deletions
diff --git a/SampleClips/columbia-large-vega8.txt b/SampleClips/columbia-large-vega8.txt index 1f78e04..d7efb46 100644 --- a/SampleClips/columbia-large-vega8.txt +++ b/SampleClips/columbia-large-vega8.txt @@ -1,45 +1,45 @@ CPU Tasks -LoadModel 7.28719 seconds -RunComplete 144.925 seconds -Run 144.859 seconds -Callbacks 13.7721 milliseconds, 44 calls, 313.002 microseconds average -Spectrogram 784.186 milliseconds, 41 calls, 19.1265 milliseconds average -Sample 84.0784 milliseconds, 527 calls, 159.542 microseconds average -Encode 82.3195 seconds, 9 calls, 9.14661 seconds average -Decode 62.5221 seconds, 9 calls, 6.9469 seconds average -DecodeStep 62.4374 seconds, 527 calls, 118.477 milliseconds average +LoadModel 7.12 seconds +RunComplete 133.87 seconds +Run 133.812 seconds +Callbacks 14.3995 milliseconds, 44 calls, 327.261 microseconds average +Spectrogram 694.252 milliseconds, 41 calls, 16.933 milliseconds average +Sample 77.2903 milliseconds, 527 calls, 146.661 microseconds average +Encode 78.2421 seconds, 9 calls, 8.69357 seconds average +Decode 55.5528 seconds, 9 calls, 6.17254 seconds average +DecodeStep 55.4753 seconds, 527 calls, 105.266 milliseconds average GPU Tasks -LoadModel 5.03066 seconds -Run 144.417 seconds -Encode 83.3841 seconds, 9 calls, 9.2649 seconds average -EncodeLayer 70.9038 seconds, 288 calls, 246.194 milliseconds average -Decode 61.0329 seconds, 9 calls, 6.78143 seconds average -DecodeStep 61.0324 seconds, 527 calls, 115.811 milliseconds average -DecodeLayer 57.3642 seconds, 16864 calls, 3.40158 milliseconds average +LoadModel 4.8933 seconds +Run 133.411 seconds +Encode 79.3112 seconds, 9 calls, 8.81235 seconds average +EncodeLayer 68.0212 seconds, 288 calls, 236.185 milliseconds average +Decode 54.0997 seconds, 9 calls, 6.01108 seconds average +DecodeStep 54.0993 seconds, 527 calls, 102.655 milliseconds average +DecodeLayer 51.0747 seconds, 16864 calls, 3.02862 milliseconds average Compute Shaders -mulMatTiledEx 64.7225 seconds, 2880 calls, 22.4731 milliseconds average -mulMatByRowTiled 24.9768 seconds, 166278 calls, 150.211 microseconds average -mulMatTiled 18.9281 seconds, 3465 calls, 5.46265 milliseconds average -mulMatByRowTiledEx 15.2747 seconds, 33152 calls, 460.748 microseconds average -softMaxFixed 4.00524 seconds, 17152 calls, 233.515 microseconds average -convolutionMain2Fixed 2.65685 seconds, 9 calls, 295.205 milliseconds average -norm 1.98729 seconds, 51704 calls, 38.4359 microseconds average -matReshapePanels 1.95102 seconds, 1737 calls, 1.12321 milliseconds average -addRepeat 1.62049 seconds, 68896 calls, 23.5207 microseconds average -addRepeatGelu 1.52277 seconds, 17170 calls, 88.6878 microseconds average -copyConvert 1.0661 seconds, 34880 calls, 30.5649 microseconds average -fmaRepeat1 1.04679 seconds, 51704 calls, 20.2457 microseconds average -softMax 812.506 milliseconds, 17391 calls, 46.7199 microseconds average -scaleInPlace 805.156 milliseconds, 17152 calls, 46.9424 microseconds average -addInPlace 672.184 milliseconds, 34304 calls, 19.5949 microseconds average -copyTranspose 634.985 milliseconds, 34304 calls, 18.5105 microseconds average -addRepeatScale 500.301 milliseconds, 33728 calls, 14.8334 microseconds average -add 384.464 milliseconds, 16873 calls, 22.7858 microseconds average -convolutionMain 369.129 milliseconds, 9 calls, 41.0143 milliseconds average -diagMaskInf 220.886 milliseconds, 16864 calls, 13.0981 microseconds average -convolutionPrep1 44.2511 milliseconds, 18 calls, 2.45839 milliseconds average -convolutionPrep2 38.6289 milliseconds, 18 calls, 2.14605 milliseconds average -addRows 1.8634 milliseconds, 527 calls, 3.53586 microseconds average +mulMatTiledEx 63.7687 seconds, 2880 calls, 22.1419 milliseconds average +mulMatByRowTiled 19.983 seconds, 166278 calls, 120.178 microseconds average +mulMatTiled 18.3409 seconds, 3465 calls, 5.29318 milliseconds average +mulMatByRowTiledEx 12.2089 seconds, 33152 calls, 368.27 microseconds average +softMaxFixed 3.18364 seconds, 17152 calls, 185.613 microseconds average +norm 1.90119 seconds, 51704 calls, 36.7707 microseconds average +convolutionMain2Fixed 1.81408 seconds, 9 calls, 201.564 milliseconds average +addRepeat 1.66567 seconds, 68896 calls, 24.1765 microseconds average +matReshapePanels 1.53839 seconds, 1737 calls, 885.656 microseconds average +fmaRepeat1 1.24758 seconds, 51704 calls, 24.1292 microseconds average +addRepeatGelu 1.2376 seconds, 17170 calls, 72.0792 microseconds average +copyConvert 1.03588 seconds, 34880 calls, 29.6985 microseconds average +scaleInPlace 836.078 milliseconds, 17152 calls, 48.7452 microseconds average +softMax 788.79 milliseconds, 17391 calls, 45.3562 microseconds average +copyTranspose 740.928 milliseconds, 34304 calls, 21.5989 microseconds average +addRepeatScale 735.736 milliseconds, 33728 calls, 21.8138 microseconds average +addInPlace 707.443 milliseconds, 34304 calls, 20.6228 microseconds average +add 508.618 milliseconds, 16873 calls, 30.1439 microseconds average +diagMaskInf 383.247 milliseconds, 16864 calls, 22.7258 microseconds average +convolutionMain 372.899 milliseconds, 9 calls, 41.4332 milliseconds average +convolutionPrep1 44.6208 milliseconds, 18 calls, 2.47893 milliseconds average +convolutionPrep2 31.7478 milliseconds, 18 calls, 1.76377 milliseconds average +addRows 1.5923 milliseconds, 527 calls, 3.02144 microseconds average Memory Usage Model 892.591 KB RAM, 2.8815 GB VRAM Context 92.2617 MB RAM, 1.27432 GB VRAM diff --git a/SampleClips/columbia-medium-vega8.txt b/SampleClips/columbia-medium-vega8.txt index 7652bd3..8f81d2d 100644 --- a/SampleClips/columbia-medium-vega8.txt +++ b/SampleClips/columbia-medium-vega8.txt @@ -1,45 +1,45 @@ CPU Tasks -LoadModel 1.02959 seconds -RunComplete 82.2062 seconds -Run 82.1216 seconds -Callbacks 9.3817 milliseconds, 37 calls, 253.559 microseconds average -Spectrogram 727.782 milliseconds, 42 calls, 17.3281 milliseconds average -Sample 86.0688 milliseconds, 511 calls, 168.432 microseconds average -Encode 47.9066 seconds, 10 calls, 4.79066 seconds average -Decode 34.2022 seconds, 10 calls, 3.42022 seconds average -DecodeStep 34.1156 seconds, 511 calls, 66.7624 milliseconds average +LoadModel 854.438 milliseconds +RunComplete 76.7069 seconds +Run 76.6544 seconds +Callbacks 10.1743 milliseconds, 37 calls, 274.981 microseconds average +Spectrogram 687.562 milliseconds, 42 calls, 16.3705 milliseconds average +Sample 73.812 milliseconds, 511 calls, 144.446 microseconds average +Encode 45.4051 seconds, 10 calls, 4.54051 seconds average +Decode 31.2366 seconds, 10 calls, 3.12366 seconds average +DecodeStep 31.1625 seconds, 511 calls, 60.9834 milliseconds average GPU Tasks -LoadModel 521.215 milliseconds -Run 81.7841 seconds -Encode 48.6521 seconds, 10 calls, 4.86521 seconds average -EncodeLayer 41.1815 seconds, 240 calls, 171.589 milliseconds average -Decode 33.132 seconds, 10 calls, 3.3132 seconds average -DecodeStep 33.1315 seconds, 511 calls, 64.8366 milliseconds average -DecodeLayer 29.8613 seconds, 12264 calls, 2.43488 milliseconds average +LoadModel 473.758 milliseconds +Run 76.3483 seconds +Encode 46.1492 seconds, 10 calls, 4.61492 seconds average +EncodeLayer 39.5117 seconds, 240 calls, 164.632 milliseconds average +Decode 30.199 seconds, 10 calls, 3.0199 seconds average +DecodeStep 30.1986 seconds, 511 calls, 59.097 milliseconds average +DecodeLayer 27.4002 seconds, 12264 calls, 2.2342 milliseconds average Compute Shaders -mulMatTiledEx 34.7452 seconds, 2400 calls, 14.4772 milliseconds average -mulMatByRowTiled 14.1115 seconds, 120741 calls, 116.874 microseconds average -mulMatTiled 12.7533 seconds, 2890 calls, 4.4129 milliseconds average -mulMatByRowTiledEx 6.27617 seconds, 24048 calls, 260.985 microseconds average -softMaxFixed 2.7865 seconds, 12504 calls, 222.849 microseconds average -convolutionMain2Fixed 1.89585 seconds, 10 calls, 189.585 milliseconds average -matReshapePanels 1.31263 seconds, 1450 calls, 905.261 microseconds average -addRepeat 1.11903 seconds, 50256 calls, 22.2666 microseconds average -addRepeatGelu 949.823 milliseconds, 12524 calls, 75.8403 microseconds average -softMax 766.059 milliseconds, 12775 calls, 59.9654 microseconds average -copyConvert 762.917 milliseconds, 25488 calls, 29.9324 microseconds average -fmaRepeat1 701.015 milliseconds, 37793 calls, 18.5488 microseconds average -normFixed 672.62 milliseconds, 37793 calls, 17.7975 microseconds average -addInPlace 543.904 milliseconds, 25008 calls, 21.7492 microseconds average -scaleInPlace 517.351 milliseconds, 12504 calls, 41.3749 microseconds average -addRepeatScale 496.454 milliseconds, 24528 calls, 20.2403 microseconds average -copyTranspose 411.612 milliseconds, 25008 calls, 16.4592 microseconds average -convolutionMain 338.765 milliseconds, 10 calls, 33.8765 milliseconds average -add 272.422 milliseconds, 12274 calls, 22.195 microseconds average -diagMaskInf 144.462 milliseconds, 12264 calls, 11.7793 microseconds average -convolutionPrep2 60.9698 milliseconds, 20 calls, 3.04849 milliseconds average -convolutionPrep1 33.0132 milliseconds, 20 calls, 1.65066 milliseconds average -addRows 1.5961 milliseconds, 511 calls, 3.12348 microseconds average +mulMatTiledEx 34.2075 seconds, 2400 calls, 14.2531 milliseconds average +mulMatTiled 12.4474 seconds, 2890 calls, 4.30705 milliseconds average +mulMatByRowTiled 11.7568 seconds, 120741 calls, 97.3723 microseconds average +mulMatByRowTiledEx 4.9317 seconds, 24048 calls, 205.077 microseconds average +softMaxFixed 2.26629 seconds, 12504 calls, 181.245 microseconds average +convolutionMain2Fixed 1.29108 seconds, 10 calls, 129.108 milliseconds average +addRepeat 1.11767 seconds, 50256 calls, 22.2396 microseconds average +matReshapePanels 1.06919 seconds, 1450 calls, 737.369 microseconds average +copyConvert 881.162 milliseconds, 25488 calls, 34.5717 microseconds average +addRepeatGelu 767.11 milliseconds, 12524 calls, 61.2512 microseconds average +softMax 759.06 milliseconds, 12775 calls, 59.4176 microseconds average +addRepeatScale 727.529 milliseconds, 24528 calls, 29.6612 microseconds average +addInPlace 695.233 milliseconds, 25008 calls, 27.8004 microseconds average +fmaRepeat1 608.837 milliseconds, 37793 calls, 16.1098 microseconds average +normFixed 574.785 milliseconds, 37793 calls, 15.2088 microseconds average +copyTranspose 559.208 milliseconds, 25008 calls, 22.3612 microseconds average +scaleInPlace 512.856 milliseconds, 12504 calls, 41.0153 microseconds average +convolutionMain 328.414 milliseconds, 10 calls, 32.8414 milliseconds average +add 254.621 milliseconds, 12274 calls, 20.7447 microseconds average +diagMaskInf 223.898 milliseconds, 12264 calls, 18.2565 microseconds average +convolutionPrep2 42.5244 milliseconds, 20 calls, 2.12622 milliseconds average +convolutionPrep1 31.1691 milliseconds, 20 calls, 1.55845 milliseconds average +addRows 1.4072 milliseconds, 511 calls, 2.75382 microseconds average Memory Usage Model 877.966 KB RAM, 1.42785 GB VRAM Context 91.0721 MB RAM, 893.634 MB VRAM diff --git a/SampleClips/jfk-large-vega8.txt b/SampleClips/jfk-large-vega8.txt index 22c55bb..2e488c9 100644 --- a/SampleClips/jfk-large-vega8.txt +++ b/SampleClips/jfk-large-vega8.txt @@ -1,45 +1,45 @@ CPU Tasks -LoadModel 1.7639 seconds -RunComplete 12.2094 seconds -Run 12.1434 seconds -Callbacks 781.9 microseconds, 4 calls, 195.475 microseconds average -Spectrogram 48.9234 milliseconds, 3 calls, 16.3078 milliseconds average -Sample 4.7225 milliseconds, 27 calls, 174.907 microseconds average -Encode 9.21833 seconds -Decode 2.92391 seconds -DecodeStep 2.91915 seconds, 27 calls, 108.117 milliseconds average +LoadModel 1.61442 seconds +RunComplete 11.1008 seconds +Run 11.0526 seconds +Callbacks 364.6 microseconds, 4 calls, 91.15 microseconds average +Spectrogram 42.5458 milliseconds, 3 calls, 14.1819 milliseconds average +Sample 4.1022 milliseconds, 27 calls, 151.933 microseconds average +Encode 8.77118 seconds +Decode 2.28079 seconds +DecodeStep 2.27667 seconds, 27 calls, 84.3212 milliseconds average GPU Tasks -LoadModel 1.15875 seconds -Run 12.0632 seconds -Encode 9.30063 seconds -EncodeLayer 7.88068 seconds, 32 calls, 246.271 milliseconds average -Decode 2.76256 seconds -DecodeStep 2.76254 seconds, 27 calls, 102.316 milliseconds average -DecodeLayer 2.59817 seconds, 864 calls, 3.00715 milliseconds average +LoadModel 1.04929 seconds +Run 10.9677 seconds +Encode 8.85998 seconds +EncodeLayer 7.57052 seconds, 32 calls, 236.579 milliseconds average +Decode 2.10771 seconds +DecodeStep 2.10769 seconds, 27 calls, 78.0627 milliseconds average +DecodeLayer 1.99148 seconds, 864 calls, 2.30495 milliseconds average Compute Shaders -mulMatTiledEx 6.73307 seconds, 320 calls, 21.0409 milliseconds average -mulMatTiled 1.4207 seconds, 385 calls, 3.69012 milliseconds average -mulMatByRowTiled 1.22465 seconds, 8346 calls, 146.735 microseconds average -mulMatByRowTiledEx 746.587 milliseconds, 1664 calls, 448.67 microseconds average -softMaxFixed 386.055 milliseconds, 896 calls, 430.865 microseconds average -convolutionMain2Fixed 278.426 milliseconds -addRepeat 191.992 milliseconds, 3616 calls, 53.0952 microseconds average -matReshapePanels 159.243 milliseconds, 193 calls, 825.094 microseconds average -norm 156.746 milliseconds, 2684 calls, 58.4003 microseconds average -addRepeatGelu 109.01 milliseconds, 898 calls, 121.392 microseconds average -copyConvert 102.255 milliseconds, 1856 calls, 55.0943 microseconds average -fmaRepeat1 92.7262 milliseconds, 2684 calls, 34.5478 microseconds average -scaleInPlace 90.3307 milliseconds, 896 calls, 100.816 microseconds average -addInPlace 72.1176 milliseconds, 1792 calls, 40.2442 microseconds average -copyTranspose 67.2686 milliseconds, 1792 calls, 37.5383 microseconds average -softMax 55.0367 milliseconds, 891 calls, 61.7696 microseconds average -addRepeatScale 45.4057 milliseconds, 1728 calls, 26.2764 microseconds average -convolutionMain 43.7905 milliseconds -add 33.058 milliseconds, 865 calls, 38.2173 microseconds average -diagMaskInf 16.9767 milliseconds, 864 calls, 19.649 microseconds average -convolutionPrep1 5.2747 milliseconds, 2 calls, 2.63735 milliseconds average -convolutionPrep2 4.6813 milliseconds, 2 calls, 2.34065 milliseconds average -addRows 80.8 microseconds, 27 calls, 2.99259 microseconds average +mulMatTiledEx 6.65327 seconds, 320 calls, 20.7915 milliseconds average +mulMatTiled 1.39618 seconds, 385 calls, 3.62644 milliseconds average +mulMatByRowTiled 962.672 milliseconds, 8346 calls, 115.345 microseconds average +mulMatByRowTiledEx 610.853 milliseconds, 1664 calls, 367.099 microseconds average +softMaxFixed 282.392 milliseconds, 896 calls, 315.17 microseconds average +convolutionMain2Fixed 201.356 milliseconds +addRepeat 132.117 milliseconds, 3616 calls, 36.5368 microseconds average +matReshapePanels 125.575 milliseconds, 193 calls, 650.649 microseconds average +norm 118.464 milliseconds, 2684 calls, 44.1371 microseconds average +addRepeatGelu 86.6617 milliseconds, 898 calls, 96.5052 microseconds average +copyConvert 69.9066 milliseconds, 1856 calls, 37.6652 microseconds average +fmaRepeat1 53.2854 milliseconds, 2684 calls, 19.853 microseconds average +scaleInPlace 51.3726 milliseconds, 896 calls, 57.3355 microseconds average +copyTranspose 47.1209 milliseconds, 1792 calls, 26.2951 microseconds average +addInPlace 45.977 milliseconds, 1792 calls, 25.6568 microseconds average +convolutionMain 39.939 milliseconds +softMax 32.0882 milliseconds, 891 calls, 36.0137 microseconds average +addRepeatScale 19.924 milliseconds, 1728 calls, 11.5301 microseconds average +add 16.1369 milliseconds, 865 calls, 18.6554 microseconds average +diagMaskInf 6.8347 milliseconds, 864 calls, 7.91053 microseconds average +convolutionPrep1 4.9909 milliseconds, 2 calls, 2.49545 milliseconds average +convolutionPrep2 2.6408 milliseconds, 2 calls, 1.3204 milliseconds average +addRows 70.9 microseconds, 27 calls, 2.62593 microseconds average Memory Usage Model 892.591 KB RAM, 2.8815 GB VRAM Context 1.98427 MB RAM, 1.13175 GB VRAM diff --git a/SampleClips/jfk-medium-vega8.txt b/SampleClips/jfk-medium-vega8.txt index 070522f..e6c4ec6 100644 --- a/SampleClips/jfk-medium-vega8.txt +++ b/SampleClips/jfk-medium-vega8.txt @@ -1,45 +1,45 @@ CPU Tasks -LoadModel 3.693 seconds -RunComplete 6.3734 seconds -Run 6.2892 seconds -Callbacks 436.4 microseconds, 4 calls, 109.1 microseconds average -Spectrogram 40.3373 milliseconds, 3 calls, 13.4458 milliseconds average -Sample 4.5591 milliseconds, 28 calls, 162.825 microseconds average -Encode 4.80886 seconds -Decode 1.47958 seconds -DecodeStep 1.47499 seconds, 28 calls, 52.6782 milliseconds average +LoadModel 822.903 milliseconds +RunComplete 6.09091 seconds +Run 6.03901 seconds +Callbacks 526 microseconds, 4 calls, 131.5 microseconds average +Spectrogram 48.1091 milliseconds, 3 calls, 16.0364 milliseconds average +Sample 4.1241 milliseconds, 28 calls, 147.289 microseconds average +Encode 4.71938 seconds +Decode 1.31885 seconds +DecodeStep 1.31471 seconds, 28 calls, 46.954 milliseconds average GPU Tasks -LoadModel 2.13832 seconds -Run 6.20864 seconds -Encode 4.82799 seconds -EncodeLayer 4.05025 seconds, 24 calls, 168.761 milliseconds average -Decode 1.38065 seconds -DecodeStep 1.38062 seconds, 28 calls, 49.3078 milliseconds average -DecodeLayer 1.23477 seconds, 672 calls, 1.83746 milliseconds average +LoadModel 443.09 milliseconds +Run 5.96203 seconds +Encode 4.76175 seconds +EncodeLayer 4.06066 seconds, 24 calls, 169.194 milliseconds average +Decode 1.20028 seconds +DecodeStep 1.20025 seconds, 28 calls, 42.8662 milliseconds average +DecodeLayer 1.08776 seconds, 672 calls, 1.61869 milliseconds average Compute Shaders -mulMatTiledEx 3.25092 seconds, 240 calls, 13.5455 milliseconds average -mulMatTiled 852.787 milliseconds, 289 calls, 2.95082 milliseconds average -mulMatByRowTiled 721.566 milliseconds, 6507 calls, 110.891 microseconds average -mulMatByRowTiledEx 324.607 milliseconds, 1296 calls, 250.469 microseconds average -softMaxFixed 224.749 milliseconds, 696 calls, 322.915 microseconds average -convolutionMain2Fixed 179.754 milliseconds -matReshapePanels 96.6348 milliseconds, 145 calls, 666.447 microseconds average -addRepeat 89.3072 milliseconds, 2808 calls, 31.8046 microseconds average -addRepeatGelu 66.8906 milliseconds, 698 calls, 95.8318 microseconds average -scaleInPlace 60.7223 milliseconds, 696 calls, 87.2447 microseconds average -copyConvert 53.7138 milliseconds, 1440 calls, 37.3012 microseconds average -normFixed 48.1435 milliseconds, 2093 calls, 23.0022 microseconds average -softMax 46.5807 milliseconds, 700 calls, 66.5439 microseconds average -fmaRepeat1 45.484 milliseconds, 2093 calls, 21.7315 microseconds average -addInPlace 36.4279 milliseconds, 1392 calls, 26.1695 microseconds average -convolutionMain 32.6121 milliseconds -copyTranspose 30.096 milliseconds, 1392 calls, 21.6207 microseconds average -addRepeatScale 18.6507 milliseconds, 1344 calls, 13.877 microseconds average -add 11.0739 milliseconds, 673 calls, 16.4545 microseconds average -convolutionPrep2 5.4643 milliseconds, 2 calls, 2.73215 milliseconds average -diagMaskInf 4.0519 milliseconds, 672 calls, 6.02961 microseconds average -convolutionPrep1 3.5447 milliseconds, 2 calls, 1.77235 milliseconds average -addRows 76.9 microseconds, 28 calls, 2.74643 microseconds average +mulMatTiledEx 3.20617 seconds, 240 calls, 13.3591 milliseconds average +mulMatTiled 997.08 milliseconds, 289 calls, 3.4501 milliseconds average +mulMatByRowTiled 584.778 milliseconds, 6507 calls, 89.869 microseconds average +mulMatByRowTiledEx 264.182 milliseconds, 1296 calls, 203.844 microseconds average +softMaxFixed 177.044 milliseconds, 696 calls, 254.373 microseconds average +convolutionMain2Fixed 132.808 milliseconds +addRepeat 94.4794 milliseconds, 2808 calls, 33.6465 microseconds average +matReshapePanels 78.392 milliseconds, 145 calls, 540.634 microseconds average +copyConvert 56.4795 milliseconds, 1440 calls, 39.2219 microseconds average +addRepeatGelu 54.2227 milliseconds, 698 calls, 77.683 microseconds average +scaleInPlace 44.2715 milliseconds, 696 calls, 63.6085 microseconds average +normFixed 40.1478 milliseconds, 2093 calls, 19.1819 microseconds average +fmaRepeat1 40.1179 milliseconds, 2093 calls, 19.1677 microseconds average +addInPlace 37.9729 milliseconds, 1392 calls, 27.2794 microseconds average +softMax 36.5724 milliseconds, 700 calls, 52.2463 microseconds average +convolutionMain 33.6316 milliseconds +copyTranspose 27.8482 milliseconds, 1392 calls, 20.0059 microseconds average +addRepeatScale 18.8881 milliseconds, 1344 calls, 14.0536 microseconds average +add 12.8944 milliseconds, 673 calls, 19.1596 microseconds average +diagMaskInf 8.9251 milliseconds, 672 calls, 13.2814 microseconds average +convolutionPrep2 3.653 milliseconds, 2 calls, 1.8265 milliseconds average +convolutionPrep1 3.5057 milliseconds, 2 calls, 1.75285 milliseconds average +addRows 64 microseconds, 28 calls, 2.28571 microseconds average Memory Usage Model 877.966 KB RAM, 1.42785 GB VRAM Context 1.9836 MB RAM, 771.354 MB VRAM |
