File tree Expand file tree Collapse file tree
common/triton_utils/autotune_kernel_configs/triton_3.6.0/NVIDIA_GeForce_RTX_5090 Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ {
2+ "1024" : {
3+ "BLOCK_SIZE_K" : 32 ,
4+ "BLOCK_SIZE_M" : 16 ,
5+ "BLOCK_SIZE_N" : 64 ,
6+ "GROUP_SIZE_M" : 32 ,
7+ "NEED_TRANS" : false ,
8+ "num_stages" : 3 ,
9+ "num_warps" : 4
10+ },
11+ "128" : {
12+ "BLOCK_SIZE_K" : 32 ,
13+ "BLOCK_SIZE_M" : 16 ,
14+ "BLOCK_SIZE_N" : 128 ,
15+ "GROUP_SIZE_M" : 1 ,
16+ "NEED_TRANS" : false ,
17+ "num_stages" : 3 ,
18+ "num_warps" : 8
19+ },
20+ "16384" : {
21+ "BLOCK_SIZE_K" : 64 ,
22+ "BLOCK_SIZE_M" : 64 ,
23+ "BLOCK_SIZE_N" : 128 ,
24+ "GROUP_SIZE_M" : 32 ,
25+ "NEED_TRANS" : false ,
26+ "num_stages" : 3 ,
27+ "num_warps" : 4
28+ },
29+ "2048" : {
30+ "BLOCK_SIZE_K" : 32 ,
31+ "BLOCK_SIZE_M" : 16 ,
32+ "BLOCK_SIZE_N" : 32 ,
33+ "GROUP_SIZE_M" : 64 ,
34+ "NEED_TRANS" : false ,
35+ "num_stages" : 3 ,
36+ "num_warps" : 4
37+ },
38+ "256" : {
39+ "BLOCK_SIZE_K" : 32 ,
40+ "BLOCK_SIZE_M" : 16 ,
41+ "BLOCK_SIZE_N" : 128 ,
42+ "GROUP_SIZE_M" : 1 ,
43+ "NEED_TRANS" : false ,
44+ "num_stages" : 3 ,
45+ "num_warps" : 8
46+ },
47+ "512" : {
48+ "BLOCK_SIZE_K" : 32 ,
49+ "BLOCK_SIZE_M" : 16 ,
50+ "BLOCK_SIZE_N" : 64 ,
51+ "GROUP_SIZE_M" : 64 ,
52+ "NEED_TRANS" : false ,
53+ "num_stages" : 4 ,
54+ "num_warps" : 4
55+ },
56+ "64" : {
57+ "BLOCK_SIZE_K" : 32 ,
58+ "BLOCK_SIZE_M" : 16 ,
59+ "BLOCK_SIZE_N" : 128 ,
60+ "GROUP_SIZE_M" : 1 ,
61+ "NEED_TRANS" : false ,
62+ "num_stages" : 2 ,
63+ "num_warps" : 4
64+ },
65+ "8" : {
66+ "BLOCK_SIZE_K" : 32 ,
67+ "BLOCK_SIZE_M" : 16 ,
68+ "BLOCK_SIZE_N" : 128 ,
69+ "GROUP_SIZE_M" : 1 ,
70+ "NEED_TRANS" : false ,
71+ "num_stages" : 3 ,
72+ "num_warps" : 4
73+ },
74+ "800" : {
75+ "BLOCK_SIZE_K" : 32 ,
76+ "BLOCK_SIZE_M" : 32 ,
77+ "BLOCK_SIZE_N" : 64 ,
78+ "GROUP_SIZE_M" : 32 ,
79+ "NEED_TRANS" : false ,
80+ "num_stages" : 5 ,
81+ "num_warps" : 4
82+ },
83+ "8192" : {
84+ "BLOCK_SIZE_K" : 64 ,
85+ "BLOCK_SIZE_M" : 32 ,
86+ "BLOCK_SIZE_N" : 128 ,
87+ "GROUP_SIZE_M" : 32 ,
88+ "NEED_TRANS" : false ,
89+ "num_stages" : 2 ,
90+ "num_warps" : 4
91+ }
92+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_SIZE_K" : 128 ,
4+ "BLOCK_SIZE_M" : 16 ,
5+ "BLOCK_SIZE_N" : 32 ,
6+ "GROUP_SIZE_M" : 16 ,
7+ "NEED_TRANS" : false ,
8+ "num_stages" : 5 ,
9+ "num_warps" : 4
10+ },
11+ "100" : {
12+ "BLOCK_SIZE_K" : 64 ,
13+ "BLOCK_SIZE_M" : 32 ,
14+ "BLOCK_SIZE_N" : 16 ,
15+ "GROUP_SIZE_M" : 16 ,
16+ "NEED_TRANS" : false ,
17+ "num_stages" : 3 ,
18+ "num_warps" : 4
19+ },
20+ "1024" : {
21+ "BLOCK_SIZE_K" : 128 ,
22+ "BLOCK_SIZE_M" : 32 ,
23+ "BLOCK_SIZE_N" : 128 ,
24+ "GROUP_SIZE_M" : 32 ,
25+ "NEED_TRANS" : false ,
26+ "num_stages" : 2 ,
27+ "num_warps" : 4
28+ },
29+ "128" : {
30+ "BLOCK_SIZE_K" : 64 ,
31+ "BLOCK_SIZE_M" : 16 ,
32+ "BLOCK_SIZE_N" : 32 ,
33+ "GROUP_SIZE_M" : 1 ,
34+ "NEED_TRANS" : false ,
35+ "num_stages" : 5 ,
36+ "num_warps" : 4
37+ },
38+ "16" : {
39+ "BLOCK_SIZE_K" : 128 ,
40+ "BLOCK_SIZE_M" : 16 ,
41+ "BLOCK_SIZE_N" : 32 ,
42+ "GROUP_SIZE_M" : 1 ,
43+ "NEED_TRANS" : false ,
44+ "num_stages" : 2 ,
45+ "num_warps" : 4
46+ },
47+ "2048" : {
48+ "BLOCK_SIZE_K" : 64 ,
49+ "BLOCK_SIZE_M" : 64 ,
50+ "BLOCK_SIZE_N" : 128 ,
51+ "GROUP_SIZE_M" : 64 ,
52+ "NEED_TRANS" : false ,
53+ "num_stages" : 3 ,
54+ "num_warps" : 8
55+ },
56+ "256" : {
57+ "BLOCK_SIZE_K" : 128 ,
58+ "BLOCK_SIZE_M" : 16 ,
59+ "BLOCK_SIZE_N" : 32 ,
60+ "GROUP_SIZE_M" : 1 ,
61+ "NEED_TRANS" : false ,
62+ "num_stages" : 4 ,
63+ "num_warps" : 4
64+ },
65+ "32" : {
66+ "BLOCK_SIZE_K" : 64 ,
67+ "BLOCK_SIZE_M" : 16 ,
68+ "BLOCK_SIZE_N" : 128 ,
69+ "GROUP_SIZE_M" : 64 ,
70+ "NEED_TRANS" : false ,
71+ "num_stages" : 3 ,
72+ "num_warps" : 8
73+ },
74+ "64" : {
75+ "BLOCK_SIZE_K" : 64 ,
76+ "BLOCK_SIZE_M" : 16 ,
77+ "BLOCK_SIZE_N" : 32 ,
78+ "GROUP_SIZE_M" : 1 ,
79+ "NEED_TRANS" : false ,
80+ "num_stages" : 5 ,
81+ "num_warps" : 4
82+ },
83+ "8" : {
84+ "BLOCK_SIZE_K" : 64 ,
85+ "BLOCK_SIZE_M" : 16 ,
86+ "BLOCK_SIZE_N" : 128 ,
87+ "GROUP_SIZE_M" : 32 ,
88+ "NEED_TRANS" : false ,
89+ "num_stages" : 5 ,
90+ "num_warps" : 8
91+ }
92+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_SIZE_K" : 128 ,
4+ "BLOCK_SIZE_M" : 16 ,
5+ "BLOCK_SIZE_N" : 64 ,
6+ "GROUP_SIZE_M" : 16 ,
7+ "NEED_TRANS" : false ,
8+ "num_stages" : 3 ,
9+ "num_warps" : 4
10+ },
11+ "100" : {
12+ "BLOCK_SIZE_K" : 128 ,
13+ "BLOCK_SIZE_M" : 16 ,
14+ "BLOCK_SIZE_N" : 32 ,
15+ "GROUP_SIZE_M" : 1 ,
16+ "NEED_TRANS" : true ,
17+ "num_stages" : 4 ,
18+ "num_warps" : 4
19+ },
20+ "1024" : {
21+ "BLOCK_SIZE_K" : 128 ,
22+ "BLOCK_SIZE_M" : 32 ,
23+ "BLOCK_SIZE_N" : 128 ,
24+ "GROUP_SIZE_M" : 1 ,
25+ "NEED_TRANS" : true ,
26+ "num_stages" : 2 ,
27+ "num_warps" : 4
28+ },
29+ "128" : {
30+ "BLOCK_SIZE_K" : 128 ,
31+ "BLOCK_SIZE_M" : 16 ,
32+ "BLOCK_SIZE_N" : 64 ,
33+ "GROUP_SIZE_M" : 16 ,
34+ "NEED_TRANS" : false ,
35+ "num_stages" : 3 ,
36+ "num_warps" : 4
37+ },
38+ "16" : {
39+ "BLOCK_SIZE_K" : 128 ,
40+ "BLOCK_SIZE_M" : 16 ,
41+ "BLOCK_SIZE_N" : 32 ,
42+ "GROUP_SIZE_M" : 32 ,
43+ "NEED_TRANS" : false ,
44+ "num_stages" : 3 ,
45+ "num_warps" : 4
46+ },
47+ "2048" : {
48+ "BLOCK_SIZE_K" : 128 ,
49+ "BLOCK_SIZE_M" : 64 ,
50+ "BLOCK_SIZE_N" : 64 ,
51+ "GROUP_SIZE_M" : 16 ,
52+ "NEED_TRANS" : true ,
53+ "num_stages" : 3 ,
54+ "num_warps" : 4
55+ },
56+ "256" : {
57+ "BLOCK_SIZE_K" : 128 ,
58+ "BLOCK_SIZE_M" : 16 ,
59+ "BLOCK_SIZE_N" : 64 ,
60+ "GROUP_SIZE_M" : 64 ,
61+ "NEED_TRANS" : false ,
62+ "num_stages" : 3 ,
63+ "num_warps" : 4
64+ },
65+ "32" : {
66+ "BLOCK_SIZE_K" : 128 ,
67+ "BLOCK_SIZE_M" : 16 ,
68+ "BLOCK_SIZE_N" : 32 ,
69+ "GROUP_SIZE_M" : 32 ,
70+ "NEED_TRANS" : false ,
71+ "num_stages" : 3 ,
72+ "num_warps" : 4
73+ },
74+ "4096" : {
75+ "BLOCK_SIZE_K" : 128 ,
76+ "BLOCK_SIZE_M" : 64 ,
77+ "BLOCK_SIZE_N" : 128 ,
78+ "GROUP_SIZE_M" : 16 ,
79+ "NEED_TRANS" : false ,
80+ "num_stages" : 3 ,
81+ "num_warps" : 8
82+ },
83+ "64" : {
84+ "BLOCK_SIZE_K" : 128 ,
85+ "BLOCK_SIZE_M" : 16 ,
86+ "BLOCK_SIZE_N" : 32 ,
87+ "GROUP_SIZE_M" : 1 ,
88+ "NEED_TRANS" : true ,
89+ "num_stages" : 3 ,
90+ "num_warps" : 4
91+ },
92+ "8" : {
93+ "BLOCK_SIZE_K" : 128 ,
94+ "BLOCK_SIZE_M" : 16 ,
95+ "BLOCK_SIZE_N" : 128 ,
96+ "GROUP_SIZE_M" : 1 ,
97+ "NEED_TRANS" : true ,
98+ "num_stages" : 3 ,
99+ "num_warps" : 8
100+ }
101+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1024" : {
3+ "BLOCK_SIZE_K" : 64 ,
4+ "BLOCK_SIZE_M" : 16 ,
5+ "BLOCK_SIZE_N" : 128 ,
6+ "GROUP_SIZE_M" : 64 ,
7+ "NEED_TRANS" : true ,
8+ "num_stages" : 4 ,
9+ "num_warps" : 8
10+ },
11+ "128" : {
12+ "BLOCK_SIZE_K" : 64 ,
13+ "BLOCK_SIZE_M" : 16 ,
14+ "BLOCK_SIZE_N" : 128 ,
15+ "GROUP_SIZE_M" : 1 ,
16+ "NEED_TRANS" : false ,
17+ "num_stages" : 3 ,
18+ "num_warps" : 8
19+ },
20+ "16384" : {
21+ "BLOCK_SIZE_K" : 128 ,
22+ "BLOCK_SIZE_M" : 64 ,
23+ "BLOCK_SIZE_N" : 128 ,
24+ "GROUP_SIZE_M" : 64 ,
25+ "NEED_TRANS" : true ,
26+ "num_stages" : 3 ,
27+ "num_warps" : 4
28+ },
29+ "2048" : {
30+ "BLOCK_SIZE_K" : 128 ,
31+ "BLOCK_SIZE_M" : 16 ,
32+ "BLOCK_SIZE_N" : 64 ,
33+ "GROUP_SIZE_M" : 64 ,
34+ "NEED_TRANS" : false ,
35+ "num_stages" : 3 ,
36+ "num_warps" : 4
37+ },
38+ "256" : {
39+ "BLOCK_SIZE_K" : 64 ,
40+ "BLOCK_SIZE_M" : 16 ,
41+ "BLOCK_SIZE_N" : 128 ,
42+ "GROUP_SIZE_M" : 64 ,
43+ "NEED_TRANS" : false ,
44+ "num_stages" : 3 ,
45+ "num_warps" : 8
46+ },
47+ "32768" : {
48+ "BLOCK_SIZE_K" : 128 ,
49+ "BLOCK_SIZE_M" : 64 ,
50+ "BLOCK_SIZE_N" : 128 ,
51+ "GROUP_SIZE_M" : 64 ,
52+ "NEED_TRANS" : false ,
53+ "num_stages" : 3 ,
54+ "num_warps" : 8
55+ },
56+ "512" : {
57+ "BLOCK_SIZE_K" : 64 ,
58+ "BLOCK_SIZE_M" : 16 ,
59+ "BLOCK_SIZE_N" : 64 ,
60+ "GROUP_SIZE_M" : 64 ,
61+ "NEED_TRANS" : false ,
62+ "num_stages" : 4 ,
63+ "num_warps" : 4
64+ },
65+ "64" : {
66+ "BLOCK_SIZE_K" : 64 ,
67+ "BLOCK_SIZE_M" : 16 ,
68+ "BLOCK_SIZE_N" : 128 ,
69+ "GROUP_SIZE_M" : 1 ,
70+ "NEED_TRANS" : false ,
71+ "num_stages" : 2 ,
72+ "num_warps" : 4
73+ },
74+ "8" : {
75+ "BLOCK_SIZE_K" : 64 ,
76+ "BLOCK_SIZE_M" : 16 ,
77+ "BLOCK_SIZE_N" : 64 ,
78+ "GROUP_SIZE_M" : 16 ,
79+ "NEED_TRANS" : true ,
80+ "num_stages" : 3 ,
81+ "num_warps" : 4
82+ },
83+ "800" : {
84+ "BLOCK_SIZE_K" : 64 ,
85+ "BLOCK_SIZE_M" : 16 ,
86+ "BLOCK_SIZE_N" : 32 ,
87+ "GROUP_SIZE_M" : 64 ,
88+ "NEED_TRANS" : true ,
89+ "num_stages" : 4 ,
90+ "num_warps" : 4
91+ },
92+ "8192" : {
93+ "BLOCK_SIZE_K" : 128 ,
94+ "BLOCK_SIZE_M" : 64 ,
95+ "BLOCK_SIZE_N" : 64 ,
96+ "GROUP_SIZE_M" : 32 ,
97+ "NEED_TRANS" : false ,
98+ "num_stages" : 2 ,
99+ "num_warps" : 4
100+ }
101+ }
You can’t perform that action at this time.
0 commit comments