File size: 9,067 Bytes
90efac3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
layer,module,loss,samples,damp,time
0,self_attn.q_proj,0.0000156653,0.10000,2.083
0,self_attn.k_proj,0.0000032556,0.10000,2.094
0,self_attn.v_proj,0.0000005451,0.10000,2.096
0,self_attn.o_proj,0.0000008700,0.10000,0.596
0,mlp.gate_proj,0.0000435553,0.10000,1.181
0,mlp.up_proj,0.0000308079,0.10000,1.186
0,mlp.down_proj,0.0000036231,0.10000,3.570
1,self_attn.q_proj,0.0000096145,0.10000,2.272
1,self_attn.v_proj,0.0000009271,0.10000,2.302
1,self_attn.k_proj,0.0000025740,0.10000,2.312
1,self_attn.o_proj,0.0000002494,0.10000,0.587
1,mlp.gate_proj,0.0008104122,0.10000,1.194
1,mlp.up_proj,0.0005772593,0.10000,1.199
1,mlp.down_proj,0.0000024426,0.10000,3.619
2,self_attn.q_proj,0.0000300082,0.10000,2.124
2,self_attn.k_proj,0.0000076323,0.10000,2.135
2,self_attn.v_proj,0.0000016731,0.10000,2.138
2,self_attn.o_proj,0.0000004566,0.10000,0.589
2,mlp.up_proj,0.0005655596,0.10000,1.200
2,mlp.gate_proj,0.0008204051,0.10000,1.206
2,mlp.down_proj,0.0000034888,0.10000,3.573
3,self_attn.v_proj,0.0000023768,0.10000,2.066
3,self_attn.q_proj,0.0000321667,0.10000,2.071
3,self_attn.k_proj,0.0000077473,0.10000,2.076
3,self_attn.o_proj,0.0000013562,0.10000,0.591
3,mlp.gate_proj,0.0013856396,0.10000,1.208
3,mlp.up_proj,0.0011071415,0.10000,1.213
3,mlp.down_proj,0.0000201999,0.10000,3.568
4,self_attn.q_proj,0.0000552769,0.10000,2.068
4,self_attn.v_proj,0.0000055460,0.10000,2.085
4,self_attn.k_proj,0.0000116454,0.10000,2.089
4,self_attn.o_proj,0.0000010820,0.10000,0.596
4,mlp.up_proj,0.0008047744,0.10000,1.181
4,mlp.gate_proj,0.0011026591,0.10000,1.187
4,mlp.down_proj,0.0000091144,0.10000,3.550
5,self_attn.q_proj,0.0000541105,0.10000,2.220
5,self_attn.v_proj,0.0000053406,0.10000,2.220
5,self_attn.k_proj,0.0000105450,0.10000,2.224
5,self_attn.o_proj,0.0000009863,0.10000,0.591
5,mlp.up_proj,0.0011642066,0.10000,1.197
5,mlp.gate_proj,0.0013546876,0.10000,1.205
5,mlp.down_proj,0.0000043981,0.10000,3.593
6,self_attn.q_proj,0.0000342204,0.10000,2.073
6,self_attn.v_proj,0.0000037009,0.10000,2.089
6,self_attn.k_proj,0.0000067855,0.10000,2.092
6,self_attn.o_proj,0.0000017441,0.10000,0.589
6,mlp.up_proj,0.0001849451,0.10000,1.194
6,mlp.gate_proj,0.0002499141,0.10000,1.194
6,mlp.down_proj,0.0000109457,0.10000,3.549
7,self_attn.v_proj,0.0000060006,0.10000,2.174
7,self_attn.q_proj,0.0000350479,0.10000,2.181
7,self_attn.k_proj,0.0000057774,0.10000,2.184
7,self_attn.o_proj,0.0000031796,0.10000,0.589
7,mlp.up_proj,0.0001335227,0.10000,1.206
7,mlp.gate_proj,0.0001452405,0.10000,1.206
7,mlp.down_proj,0.0000178218,0.10000,3.530
8,self_attn.q_proj,0.0000540192,0.10000,2.123
8,self_attn.k_proj,0.0000110568,0.10000,2.137
8,self_attn.v_proj,0.0000055582,0.10000,2.139
8,self_attn.o_proj,0.0000041908,0.10000,0.589
8,mlp.gate_proj,0.0001565939,0.10000,1.199
8,mlp.up_proj,0.0001524405,0.10000,1.202
8,mlp.down_proj,0.0000192921,0.10000,3.578
9,self_attn.q_proj,0.0000465290,0.10000,2.145
9,self_attn.v_proj,0.0000078169,0.10000,2.147
9,self_attn.k_proj,0.0000080876,0.10000,2.151
9,self_attn.o_proj,0.0000069479,0.10000,0.618
9,mlp.gate_proj,0.0005196439,0.10000,1.216
9,mlp.up_proj,0.0003377999,0.10000,1.219
9,mlp.down_proj,0.0000234840,0.10000,3.559
10,self_attn.q_proj,0.0000447259,0.10000,2.092
10,self_attn.v_proj,0.0000055774,0.10000,2.113
10,self_attn.k_proj,0.0000085554,0.10000,2.114
10,self_attn.o_proj,0.0000042167,0.10000,0.587
10,mlp.gate_proj,0.0001891744,0.10000,1.196
10,mlp.up_proj,0.0001727057,0.10000,1.198
10,mlp.down_proj,0.0000204545,0.10000,3.594
11,self_attn.k_proj,0.0000104406,0.10000,2.091
11,self_attn.q_proj,0.0000504052,0.10000,2.101
11,self_attn.v_proj,0.0000049300,0.10000,2.104
11,self_attn.o_proj,0.0000057882,0.10000,0.592
11,mlp.gate_proj,0.0001683877,0.10000,1.228
11,mlp.up_proj,0.0001652096,0.10000,1.231
11,mlp.down_proj,0.0000196861,0.10000,3.563
12,self_attn.q_proj,0.0000550930,0.10000,2.122
12,self_attn.v_proj,0.0000062055,0.10000,2.139
12,self_attn.k_proj,0.0000111796,0.10000,2.143
12,self_attn.o_proj,0.0000068578,0.10000,0.585
12,mlp.gate_proj,0.0001639272,0.10000,1.187
12,mlp.up_proj,0.0001695412,0.10000,1.190
12,mlp.down_proj,0.0000214752,0.10000,3.629
13,self_attn.q_proj,0.0000566915,0.10000,2.093
13,self_attn.v_proj,0.0000073534,0.10000,2.096
13,self_attn.k_proj,0.0000102786,0.10000,2.103
13,self_attn.o_proj,0.0000092767,0.10000,0.588
13,mlp.gate_proj,0.0001736244,0.10000,1.184
13,mlp.up_proj,0.0001677590,0.10000,1.187
13,mlp.down_proj,0.0000210207,0.10000,3.562
14,self_attn.q_proj,0.0000750175,0.10000,2.163
14,self_attn.v_proj,0.0000076160,0.10000,2.175
14,self_attn.k_proj,0.0000138165,0.10000,2.179
14,self_attn.o_proj,0.0000093313,0.10000,0.589
14,mlp.up_proj,0.0001827195,0.10000,1.219
14,mlp.gate_proj,0.0001781230,0.10000,1.222
14,mlp.down_proj,0.0000232633,0.10000,3.630
15,self_attn.q_proj,0.0000644674,0.10000,2.133
15,self_attn.k_proj,0.0000126894,0.10000,2.139
15,self_attn.v_proj,0.0000067925,0.10000,2.140
15,self_attn.o_proj,0.0000076798,0.10000,0.592
15,mlp.up_proj,0.0001769633,0.10000,1.178
15,mlp.gate_proj,0.0001678769,0.10000,1.181
15,mlp.down_proj,0.0000247494,0.10000,3.561
16,self_attn.q_proj,0.0000667001,0.10000,2.092
16,self_attn.v_proj,0.0000090089,0.10000,2.111
16,self_attn.k_proj,0.0000117023,0.10000,2.113
16,self_attn.o_proj,0.0000114549,0.10000,0.589
16,mlp.up_proj,0.0001811838,0.10000,1.178
16,mlp.gate_proj,0.0001722064,0.10000,1.180
16,mlp.down_proj,0.0000223780,0.10000,3.583
17,self_attn.v_proj,0.0000100828,0.10000,2.238
17,self_attn.k_proj,0.0000123868,0.10000,2.259
17,self_attn.q_proj,0.0000762568,0.10000,2.264
17,self_attn.o_proj,0.0000079355,0.10000,0.594
17,mlp.up_proj,0.0002123742,0.10000,1.371
17,mlp.gate_proj,0.0001976952,0.10000,1.374
17,mlp.down_proj,0.0000311225,0.10000,3.594
18,self_attn.q_proj,0.0000607342,0.10000,2.109
18,self_attn.k_proj,0.0000096124,0.10000,2.121
18,self_attn.v_proj,0.0000108709,0.10000,2.130
18,self_attn.o_proj,0.0000120994,0.10000,0.596
18,mlp.gate_proj,0.0002071995,0.10000,1.195
18,mlp.up_proj,0.0002252296,0.10000,1.199
18,mlp.down_proj,0.0000362348,0.10000,3.553
19,self_attn.k_proj,0.0000095172,0.10000,2.110
19,self_attn.q_proj,0.0000709936,0.10000,2.114
19,self_attn.v_proj,0.0000133349,0.10000,2.115
19,self_attn.o_proj,0.0000142899,0.10000,0.590
19,mlp.gate_proj,0.0002420128,0.10000,1.201
19,mlp.up_proj,0.0002512523,0.10000,1.204
19,mlp.down_proj,0.0000400181,0.10000,3.529
20,self_attn.k_proj,0.0000105798,0.10000,2.128
20,self_attn.v_proj,0.0000149335,0.10000,2.135
20,self_attn.q_proj,0.0000714233,0.10000,2.139
20,self_attn.o_proj,0.0000085477,0.10000,0.586
20,mlp.up_proj,0.0003235113,0.10000,1.247
20,mlp.gate_proj,0.0003114748,0.10000,1.252
20,mlp.down_proj,0.0000751625,0.10000,3.538
21,self_attn.q_proj,0.0000870349,0.10000,2.168
21,self_attn.v_proj,0.0000235002,0.10000,2.177
21,self_attn.k_proj,0.0000107961,0.10000,2.181
21,self_attn.o_proj,0.0000312681,0.10000,0.588
21,mlp.gate_proj,0.0004483905,0.10000,1.200
21,mlp.up_proj,0.0004428911,0.10000,1.202
21,mlp.down_proj,0.0001190182,0.10000,3.628
22,self_attn.q_proj,0.0001302130,0.10000,2.126
22,self_attn.k_proj,0.0000150554,0.10000,2.130
22,self_attn.v_proj,0.0000373550,0.10000,2.138
22,self_attn.o_proj,0.0000171763,0.10000,0.592
22,mlp.up_proj,0.0006391320,0.10000,1.196
22,mlp.gate_proj,0.0006453207,0.10000,1.199
22,mlp.down_proj,0.0002066607,0.10000,3.603
23,self_attn.q_proj,0.0001625592,0.10000,2.178
23,self_attn.v_proj,0.0000508609,0.10000,2.185
23,self_attn.k_proj,0.0000197490,0.10000,2.188
23,self_attn.o_proj,0.0000418422,0.10000,0.590
23,mlp.up_proj,0.0009001281,0.10000,1.205
23,mlp.gate_proj,0.0009187021,0.10000,1.205
23,mlp.down_proj,0.0002839736,0.10000,3.559
24,self_attn.k_proj,0.0000170952,0.10000,2.132
24,self_attn.q_proj,0.0001363194,0.10000,2.140
24,self_attn.v_proj,0.0000470417,0.10000,2.146
24,self_attn.o_proj,0.0000316222,0.10000,0.598
24,mlp.gate_proj,0.0009379816,0.10000,1.190
24,mlp.up_proj,0.0009908391,0.10000,1.193
24,mlp.down_proj,0.0003710463,0.10000,3.578
25,self_attn.q_proj,0.0001556033,0.10000,2.113
25,self_attn.v_proj,0.0000708320,0.10000,2.118
25,self_attn.k_proj,0.0000174687,0.10000,2.121
25,self_attn.o_proj,0.0000477942,0.10000,0.607
25,mlp.up_proj,0.0012794713,0.10000,1.372
25,mlp.gate_proj,0.0011451249,0.10000,1.385
25,mlp.down_proj,0.0005603151,0.10000,3.566
26,self_attn.q_proj,0.0002087234,0.10000,2.704
26,self_attn.k_proj,0.0000243788,0.10000,2.723
26,self_attn.v_proj,0.0001307992,0.10000,2.778
26,self_attn.o_proj,0.0000752641,0.10000,0.586
26,mlp.gate_proj,0.0011720247,0.10000,1.198
26,mlp.up_proj,0.0013400149,0.10000,1.200
26,mlp.down_proj,0.0010762026,0.10000,3.589
27,self_attn.q_proj,0.0003179918,0.10000,2.117
27,self_attn.k_proj,0.0000315897,0.10000,2.125
27,self_attn.v_proj,0.0001504091,0.10000,2.142
27,self_attn.o_proj,0.0001204303,0.10000,0.590
27,mlp.up_proj,0.0021019895,0.10000,1.184
27,mlp.gate_proj,0.0019921324,0.10000,1.188
27,mlp.down_proj,0.0029983620,0.10000,3.615