You are not logged in.
Hi guys and girls,
Since upgrading to 5.3.11 last night, I've experienced a couple of GPU hangs/crashes that made Xorg unresponsive for a few seconds.
As I don't have a clue about GPU drivers, I thought I'd post this here before filing an actual bug report upstream. Any insights will be appreciated.
Here's the dmesg excerpt:
% dmesg|grep 4798
[ 4798.860641] i915 0000:00:02.0: GPU HANG: ecode 9:0:0x00000000, hang on rcs0
[ 4798.860642] [drm] GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.
[ 4798.860642] [drm] Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel
[ 4798.860643] [drm] drm/i915 developers can then reassign to the right component if it's not a kernel issue.
[ 4798.860643] [drm] The gpu crash dump is required to analyze gpu hangs, so please always attach it.
[ 4798.860644] [drm] GPU crash dump saved to /sys/class/drm/card0/error
[ 4798.861654] i915 0000:00:02.0: Resetting rcs0 for hang on rcs0
And the crash dump:
% sudo cat /sys/class/drm/card0/error :(
GPU HANG: ecode 9:0:0x00000000, hang on rcs0
Kernel: 5.3.11-arch1-1 x86_64
Time: 1573925870 s 58835 us
Boottime: 4798 s 723003 us
Uptime: 4788 s 101907 us
Epoch: 4296315008 jiffies (300 HZ)
Capture: 4296316864 jiffies; 549557 ms ago, 6187 ms after epoch
Reset count: 0
Suspend count: 0
Platform: SKYLAKE
Subplatform: 0x1
PCI ID: 0x1916
PCI Revision: 0x07
PCI Subsystem: 1028:06ff
IOMMU enabled?: 0
DMC loaded: yes
DMC fw version: 1.27
GT awake: yes
RPM wakelock: yes
PM suspended: no
EIR: 0x00000000
IER: 0x08080000
GTIER[0]: 0x01010101
GTIER[1]: 0x01010101
GTIER[2]: 0x00000070
GTIER[3]: 0x00000101
PGTBL_ER: 0x00000000
FORCEWAKE: 0xffff0001
DERRMR: 0x2077efef
CCID: 0x00000000
fence[0] = 1fe803b01800001
fence[1] = 17fa03b01012001
fence[2] = 94300700844003
fence[3] = 280603b02000001
fence[4] = 304603b02840001
fence[5] = 388603b03080001
fence[6] = 00000000
fence[7] = 480d03b03fcb001
fence[8] = 00000000
fence[9] = 00000000
fence[10] = 00000000
fence[11] = 00000000
fence[12] = 00000000
fence[13] = 00000000
fence[14] = 00000000
fence[15] = 00000000
fence[16] = 00000000
fence[17] = 00000000
fence[18] = 00000000
fence[19] = 00000000
fence[20] = 00000000
fence[21] = 00000000
fence[22] = 00000000
fence[23] = 00000000
fence[24] = 00000000
fence[25] = 00000000
fence[26] = 00000000
fence[27] = 00000000
fence[28] = 00000000
fence[29] = 00000000
fence[30] = 00000000
fence[31] = 00000000
ERROR: 0x00000000
FAULT_TLB_DATA: 0x0000001b 0xb70f48ae
DONE_REG: 0x07ffffff
rcs0 command stream:
IDLE?: no
START: 0x0000d000
HEAD: 0x000036c0 [0x00000000]
TAIL: 0x000036c0 [0x00000000, 0x00000000]
CTL: 0x00003001
MODE: 0x00000000
HWS: 0xffffe000
ACTHD: 0x00000000 000036c0
IPEIR: 0x00000000
IPEHR: 0x7a000004
INSTDONE: 0xffdfffff
SC_INSTDONE: 0xffffffff
SAMPLER_INSTDONE[0][0]: 0xffffffff
SAMPLER_INSTDONE[0][1]: 0xffffffff
SAMPLER_INSTDONE[0][2]: 0xffffffff
ROW_INSTDONE[0][0]: 0xffffffff
ROW_INSTDONE[0][1]: 0xffffffff
ROW_INSTDONE[0][2]: 0xffffffff
BBADDR: 0x00000000_00fdf4b0
BB_STATE: 0x00000020
INSTPS: 0x00008840
INSTPM: 0x00000000
FADDR: 0x00000000 000106c0
RC PSMI: 0x00000010
FAULT_REG: 0x00000000
GFX_MODE: 0x00008000
PDP0: 0x000000023e137000
PDP1: 0x0000000000000000
PDP2: 0x0000000000000000
PDP3: 0x0000000000000000
ring->head: 0x00000000
ring->tail: 0x00000000
hangcheck timestamp: 0ms (4296315008; epoch)
engine reset count: 0
ELSP[0]: pid 548, seqno 16:0003372c!, prio 3, emitted 1431654459ms, start 0000d000, head 00003620, tail 000036c0
ELSP[1]: pid 0, seqno 5:000020f4, prio -4093, emitted 1431654459ms, start 00001000, head 00000890, tail 000008f8
Active context: [0] hw_id 0, prio 0, guilty 0 active 0
bcs0 command stream:
IDLE?: yes
START: 0x00009000
HEAD: 0x00200d08 [0x00000000]
TAIL: 0x00000d08 [0x00000000, 0x00000000]
CTL: 0x00003000
MODE: 0x00000200
HWS: 0xffffd000
ACTHD: 0x00000000 00200d08
IPEIR: 0x00000000
IPEHR: 0x04000001
INSTDONE: 0xfffffffe
BBADDR: 0x00000000_019f602c
BB_STATE: 0x00000020
INSTPS: 0x00000001
INSTPM: 0x00000000
FADDR: 0x00000000 00009d08
RC PSMI: 0x00000010
FAULT_REG: 0x00000000
GFX_MODE: 0x00008000
PDP0: 0x000000023e137000
PDP1: 0x0000000000000000
PDP2: 0x0000000000000000
PDP3: 0x0000000000000000
ring->head: 0x00000000
ring->tail: 0x00000000
hangcheck timestamp: -4492373ms (0)
engine reset count: 0
Active context: [0] hw_id 0, prio 0, guilty 0 active 0
vcs0 command stream:
IDLE?: yes
START: 0x00005000
HEAD: 0x00000058 [0x00000000]
TAIL: 0x00000058 [0x00000000, 0x00000000]
CTL: 0x00000000
MODE: 0x00000200
HWS: 0xffffc000
ACTHD: 0x00000000 00000058
IPEIR: 0x00000000
IPEHR: 0x00000000
INSTDONE: 0xfffffffe
BBADDR: 0x00000000_00000000
BB_STATE: 0x00000020
INSTPS: 0x00000001
INSTPM: 0x00000000
FADDR: 0x00000000 00000000
RC PSMI: 0x00000010
FAULT_REG: 0x00000000
GFX_MODE: 0x00008000
PDP0: 0x0000000000000000
PDP1: 0x0000000000000000
PDP2: 0x0000000000000000
PDP3: 0x0000000000000000
ring->head: 0x00000000
ring->tail: 0x00000000
hangcheck timestamp: -4492373ms (0)
engine reset count: 0
Active context: [0] hw_id 0, prio 0, guilty 0 active 0
vecs0 command stream:
IDLE?: yes
START: 0x00007000
HEAD: 0x00000058 [0x00000000]
TAIL: 0x00000058 [0x00000000, 0x00000000]
CTL: 0x00000000
MODE: 0x00000200
HWS: 0xffffb000
ACTHD: 0x00000000 00000058
IPEIR: 0x00000000
IPEHR: 0x00000000
INSTDONE: 0xfffffffe
BBADDR: 0x00000000_00000000
BB_STATE: 0x00000020
INSTPS: 0x00000001
INSTPM: 0x00000000
FADDR: 0x00000000 00000000
RC PSMI: 0x00000010
FAULT_REG: 0x00000000
GFX_MODE: 0x00008000
PDP0: 0x0000000000000000
PDP1: 0x0000000000000000
PDP2: 0x0000000000000000
PDP3: 0x0000000000000000
ring->head: 0x00000000
ring->tail: 0x00000000
hangcheck timestamp: -4492373ms (0)
engine reset count: 0
Active context: [0] hw_id 0, prio 0, guilty 0 active 0
Pinned (global) [32]:
00000000_fffff000 4096 41 00 LLC
00000000_ffffe000 4096 01 01 purgeable LLC
00000000_ffffd000 4096 01 01 purgeable LLC
00000000_ffffc000 4096 01 01 purgeable LLC
00000000_ffffb000 4096 01 01 purgeable LLC
00000000_ffffa000 4096 01 01 purgeable LLC
00000000_00001000 4096 41 00 LLC
00000000_fffe3000 94208 01 01 dirty LLC
00000000_00002000 4096 41 00 LLC
00000000_fffcc000 94208 01 01 dirty LLC
00000000_fffcb000 4096 01 01 LLC
00000000_00003000 4096 41 00 LLC
00000000_fffc8000 12288 01 01 dirty LLC
00000000_00004000 4096 41 00 LLC
00000000_fffc5000 12288 01 01 dirty LLC
00000000_00005000 4096 41 00 LLC
00000000_fffc2000 12288 01 01 dirty LLC
00000000_00006000 4096 41 00 LLC
00000000_fffbf000 12288 01 01 dirty LLC
00000000_00007000 4096 41 00 LLC
00000000_fffbc000 12288 01 01 dirty LLC
00000000_00008000 4096 41 00 LLC
00000000_fffb9000 12288 01 01 dirty LLC
00000000_00009000 16384 41 00 LLC
00000000_fffb6000 12288 01 01 dirty LLC
00000000_0000d000 16384 41 00 LLC
00000000_fff9f000 94208 01 01 dirty LLC
00000000_00040000 8294400 7f 00 uncached (name: 1)
00000000_00019000 16384 41 00 LLC
00000000_fff5a000 94208 01 01 dirty LLC
00000000_00840000 16384 40 00 dirty uncached
00000000_03080000 8388608 7e 00 X dirty uncached (fence: 5)
rcs0 --- HW Status = 0x00000000 ffffe000
:amGUC+TMtV!3cP:rqmuM6?tn]2edY]c/b$bZbDV^?B.47!Oh:nrr<$!!-4Z8!!!"8
rcs0 --- WA context = 0x00000000 fffcb000
:bjCpF+Fjp+?f?YP$5u'Z'OaI(!d:e<"*%C7"9NUF<q_Jg\M"]lecG2D#AM3iV9>CSk%16=3"9;0ItnHhnZr"/$='W)TklZqIU%5s&JNLE&J5Te`VC)X$jZ2(!!!#j
rcs0 --- NULL context = 0xffffffff ffffffff
:h!LVVoosD+-h56JB[B?p)N@4=Brl%l.@n[Oi^..N`FrmT2<&eC42F9ID@1?[J[7=2gs_79j`m<:OKf@7fQs-B_C2X@H0:>_Dc+uR@`MAGKr^/#4Hf(Ka<!l[^%9RV$2_'d;u$TN]:<Kr[Gb?9B:ihO$,?<1b=si^/h5IDdm8E*l6=-QdnME<g;YM[0oZOQN+3^E@i33?D7f9:N$$JASajtl.(p]10oTTkN+3^E)8=*FNNJ[UVS-uA`?K&.Qr][OYXN9"eubo>`;J&j$YT^i;_C_H_I3;8S7?)c/mr`*VS34]QPSIPj,K<_nc+&5^RtJiR"!LMY,c9>$!U/#X<V,B<4G5LC=<rtrC6>Tqe&.eG^#eY9&FJa>%onf]BB.4qUJC>O8J&UocuF`GW<NGe\?-1b,t+_G1<P>S(AU^1Aui(7!glI^V?$`0WNSi?3+A^1.g6uK$5QLjp*Tn&"#JrS0;8J0jonJYbcn%1P!aP)`TD-==V0gq8)5'"C**h(fK:-o:tu38+n=nre>TN6iZOFb"\M\c[OA*c8d&^%ktPFVujm!$().;S0;:H0jonJYbcn%Dh-+7)h!+n)F"4NoP_K-]0oOU1BHeR+nLG^/Ac!$?uldE7>K)>^m5hA<rlE$^t*pD4:(FD)R/6//1(8lKPWFDkgaWHUG]sTE-:tck6TiFLX#j-/DRKd_H%U8KC2j<jS/\%\XbP&5E?2l+2<c!TA??IdDrDeO,p&-r,g).]9D9ljlnD!!F?!bY*_ndice[62;7:m:M\u>)Y^9NN;M2$o_HGW[!oTr^WeVp9.4F)s%8]DEJ9in\Uhs__r_I`-O<fU$s4^kX=7OM9].^WL<^/d<XQ'r5&beb_&!A^"r+QcKPWe[>U=>`28`K!!8JT6iFg"dg^A)m07mO8$M[B6$,-jrK>3/u^`S?Hn0d.T*ZY(X&`AFt9fZ0trc,sr\`E2>%XnCsn3b&'Su:Dc+2$l'Nh]id9jcmGr8Uj/d@BK=AHK5r&?#AeYXO:A;S\S8^h3[MS/-(3'k&U%i:D])/eGO?.<%.rfQtCC9&Q0EL9:ln\2rP`?WYujS85$8h!daU4SlbS4Qa?>SFXAUAZHj7g2A[;D=u+S/2M@kBkUNSCUIZ?14B#6K\RY>^V_CDY*bdLm'N>F3PoVm)nNAZi*5&H1L;D8Ndi!HbT65WQJMd(2N3)ee)cha-\BOL5!L]]^\,gr`YSMP\R,Nlc]Pul`\ljAbrJ"%*cbP48^7.eT?6S>OP]8..0#t7:-@5*[lirf?_>@h&Dd&.5H/n_QC'MCD)aI.UlSO7PSJ[`,5GKZbf>&M5,t0R%gR\P6M,EV^,(B.ltmTcKu8pbhc!4HQCf-=m5dt8Iaa<mFPG)gid[8V?Y>A2s-UFX]F\1Gp\h?AWQN`oVtTT@qs0bRl"R!k)<\cTS^li&s$'h(`iV],Top[NM!qPF%@gm<7ItnJQEY/qCX3Qr@9qAU8Uaj>l%E;^dh=FGesYI&I=HG'B-/B/7;bX90NlCBD8I&-89cHMaJf-,pW3N";Y7dUJBe>.3%X#EMhE])!:VAoNE.%sK0KatnZ,60YF[V!YcIB8s)'35NW$7e2lA"sPC%ntGf6`1XS+Q8]79Lo';FkN]#*FWAPBJlH.V)*nc!OaoUSt]8rY$SqJ3bDM"e[6W>8I73L"=D)h.d#1V']M:<("G`_n=G=5t`hUAjm^II2N\4gsE5'Ap<=ESe&MK-J0e+Ocm-?-t[l>G31fH9OfZOJ/":-(deBr7!p-oZqdB&^q=%AiQ8;\j`mZ4/8Z?m2pSaDJp:m4#DhQAummg3QD%Q&S(Y!_n-@Z_]A9\.Bq\%_R1>k@/ET'l:+HsE>@HFPPD6Lh;#pZNo_NA_'P!'K1::[_n-q8%B3TPflad:[69_oB;h[SG&C51;a1"ffC(TjBsPnm<k<_m:Ef2A:jnsHO#Dd+DfC#L`Q6-(9D=`ZgW]91I\FZ\-2Sp,jg"VYM#1J\WN(doXH(IYOOa4>*P:Gc!TeAi`V9C&G42g\&cTeemeAd[X/?H'@TudI#k"!*/TA+sdUUcahq"6+G5\Z$cXVE*B=f0AhlBhfCN]"/Fqd#DpQ3it;<<hOcZlS$NQ8AGijZ'p'?t%OeMBG43=-])s%C>Yf2dt"?H@*Bik(pL^3-n#>spM^60"VZBT\S7(WbcQ'(tY^X^Cs*Sm#WX*uUZ[42Q&P?"7W\CO@1e,"K!gUA6k/7df!NHgHf&]dOmpB=4j/CbNH@`QutZM]^,C:RSIl/)&\3Ca#VOGt;_/GVV:o>);2488kONI*]ALe>[)jHRJ.Tg\rGr'DflGrp_a6C-&$QD_&aVKRmc;n)i8n5@(G8lMT1TS?T2i#ONa1;7lPb>32.C,"?p1g(mnQldU8(NbDJJ%qjb+btU!n&FS5dnCc/I#9O]+(ZAm#SSCor5NS0o[#(]%qM(Mp.+D9XYjW$^M7Di&-'XY/I-lQHW[*#&FE$Eo[EQ-r*sj!Y_oAaJhTU06b[3nNX)G(1DWY8Ad%7*pfQU2&LUU@q4YP1]&KYKJ[Fg%SS%GT\qn`[E3P'?,oPOX,`p2F93-;+CdMKfEUrHup1J<5BeOMfC6BC%>Y.2)ef(DfICNU\*raVG:dX.gCeScHS39o6uLLbI#=RuF^/FOoF$*CTgo!ii:pO<^[@a6dbj/S/Ei;>SQaR3P5Vd\N)k0*X?hYQno_q+(h!p7#=D+[!7\fe>[%Q\^UM%99\PHc35YH3<L%B(@u8l(h]l!rp]on[\uc%XK)(H[WIe"h*S*/__`T6kk]H4BObF`gp9)BFjo%E6(qp\O$#cemOY?F[FQT.TGs=IA>./+VJ1gMF.NMYQg%[p])>MSaYD%$8k2CgNP(f&pmA!MR2$WmM%N\8^r\fscddNl-L_1-3upG.jf'kb/1WY5-OQhVfp2Ru6Y4^\uDlVBeci67[].j*sHd[<[7%DQ*OnbH7WEh0NGa>)^@=2iiu]n'^kG>:W.lG+_[fW;P&R*'Sdb-.(*R?<YT7Q78?pN)Q0^HVIsTo]pk3h89I(VO_5EX[<1C]@Z(?bM+tMS#a!l7RC'[3R5"tMJhgQ%dSk=[Q!#'+C+<c30DaWWGC#bNiSs[["$7l`tHuqE*k>6g8N`,[X=r:eE\a&h;*YB>LADp3JQj:<\\Ge/[lc);lRJeFGX&"JbebNCEj+ra^d1jHL3kuf<3o`'?24bDS9/5GHR=OL^sshXLjB6\(@h^4+3&UFUW7@4bHsECtOd51Yf9>oneup\=+(qd!<jCELiE^dlhS3Xg*0j*L6TD\GF`55.3jt]'Bc;eGIkj(NhA09]0&*XTjP8Qh`r+HL0H'N:YWG+?bd.LHj&njSX#J78qHXQ,rt9fLX8JINJ?o*uCJ>+Y/Se#jk:T;d2h9HPAK9;baO]^tQ5Udt9g/.@C'.k0$<AoenmE\nTiS>NtBLU42HIg9A?L%gQ\E:]51]p?:pMAm)-Eh:M$(4L9@u'"q&CjE=m/450FI0I<3JNm%Z;GX%<GCL+UY&,ZSYjLfU9Pr6TT\27Qtq52?t2r2rE*phZ:_^X]OVlt;D+aFJskN>oPMOnrqaR1so(c@VoY-uukIH=mTbtZpKs*=$Rl2tPIDr"SF3X=9i'YaJ1/DMt**R(s/A#HTa>)$mqf6/2,NN^p"I0,nG/U-U(9l'?CO[=o[E\6eSnBg.Grr.Ys-tqRkq_@O\NjirpIB.9CD]uIcCJS-60Z1GX+hX_qX>R4;[O%KiDoZ]NJ_N+fYC;=7qT/.X9%0c%Yq0)]Xi,>k^A!DcHp[>T(722>a1dmA5-pg:J$ALj5-!<)k<I3=s2i$EaX?3H'/1<EQu'TN*as*li%onbc3lU*$F#eMfLEbZ<Phf?pFS`$4>uqmkE^Uir:8,`a1X:A=(uA^nYOe_N+Q[,N,>3cIjrcg/PGZcS]Z15n)Yajo_p?PB,HXV;mVFZj!d".??)4bANr/s/i&Mn:/;a$N27=.-,rlAPg\S@GfiHpApDHfb#Wc0\B8u>Q!@lU),@=icP]C%9J78Z)+lP"Q].`l.>*SVEKp=[42Zp8r;L]DrH=XoPY2qfNiZN`j7X6SWSS8KVt_tE6`$"0`u_/Vi!Z>6NhBbCeeb*8>^;CoUP:E)n0Qd)3%ao'LQ18=iQK'("aGab"^VhSX3<4e>4=E.:8)tn,iKDe=7HfgK"J&)ks%**bP%KVQjf5?2INit0Y)bY!lO.2_^>^l9+4oHJ?12-3)&6dQ%mQl_c@,K>X!+P)Sm3q@0RFU9n[=GP"KLd(1B!"0KFm3I)4q]i<oWfMZd-mKe*O=S-EpIfth;-_,_2Qc4;mL,2iob_Nt@,NJFSX`i]WPf)o0)In:*ckMm/\$Ts:[#5;fO`dc<)0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E2qZ,)L]@B!m..!]qp(2mbK`-%0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?!WTW94UGD]
bcs0 --- HW Status = 0x00000000 ffffd000
:c0_$G+92a'.)>uss1pl)_T(*?RG-TiUm4M`SY!K,Xo?2]!!"t?!HV';DZIMn
bcs0 --- NULL context = 0xffffffff ffffffff
:fBo)Q:r?&%Zf-WU+HhCi$"B^=b8B%K@U`dL',)#c#fOXe@KZrO0elN,5_1Xe8Vd[l!b?n#%d]1k4hm6.#m:;_2=H?"SE0]GAu,'iPbfhSV(##7rCTtPpYQKf?K"bjV*R9..8!L&8lCnMC6"AR#AHB)5'_6&5OsG_VM?7A1,h:!>L,2XN+][eRl3&[E&J<PB)&Q^T=tf<TC]O![FYMA?ae#<\G(OJ\<dW@[&8g9B.ql(`IGP4n)\c=q_%^+07RfA]V#)pA`apRAm=0999CQ-iB'3hO1@k@@5,tUBB-=H0imu!@B;i7@VcL,iTqSDJ8t93(uZH%(QN_f-5jc^GW1V,_n:&K`4-Tts1N47mm!i?Pbfg6V(##7"T%$*$8f-:
vcs0 --- HW Status = 0x00000000 ffffc000
:c0_$G0E;G'G/+1gs6MtSR$k'0hUEC@&g@mT:H+Il^U%"j!#tG$;tp=m;ZM"T
vcs0 --- NULL context = 0xffffffff ffffffff
:dd<QL@004Js7+,L"#^0',hNoL_&UN9C^AfjK*2GmK(FMD(a*5&"i:BJ9AmX<^5Ws`6%V#r\!_kUkKj,N`o,3Z^AKZsmE]k8s*$W:8lD(J;O"",e>4TdCZ6B'^?65FW&[o;.8!L&8lCnM:=9*@B#CXfY?N;`D.H5MT#'$nI:eOUdVQ,=)4s*KgeGaqCDc@uHtUC-n)Y@*r;B^[%`mI])7KD>2;m<a3Q!O'4+c85:\WJcP;Rh[p[^F?noNu4hZ_j3NT[C04j:AhrC^Wp<TOTM,9CDYrlS^jD6,76'#[m*@g-@['-1M_pD[OWls57K3!UO6_)u5G`A*Ab9>J2]:)2]Wb]AMYs+g<^k&^=#Pbfg$V(##7*!>Lb!"UjQ
vecs0 --- HW Status = 0x00000000 ffffb000
:c0_$G0E;G'G/+1gs6MtSR$k'0hUEC@&g@mT:H+Il^U%"j!#tG$;tp=m;ZM"T
vecs0 --- NULL context = 0xffffffff ffffffff
:g?kDT:r?&%;rNHJLaB;]6>3>N!hoh:A?djD5a[p3W(&*V'+c9OMM[1G&0Q[a)e(?X,SP-MKtr?VIi(*1_#YA$HqZmYRW`t[;uO+f=gpHDXFGsGDu;M;8lCnM;O"",LIZ2R#$Y)PJbXGCPbfg$V(##7p5[jk0CMtiX*%K+\`R[-Yr.Qo\_Oc5gUp[l2p9Gfc1F0s%^?n+?Q@!<estRdDSMcln:#lMhgWUN2grh$bYsLG=c(*O/hgpccBklgX^mDUj8/.<^G\hrSBoX\k:\l=8LKplcashJ(c'gS%KVVHoRC:(:DB/:iiElk?l8_mE&J;a^cZ*KNNfi<+(=MA#8lrkcNM0GE%`4#`]0<<I@-l<]:UbF/",kK8lCnM>*Pj4/_2SY!!):`
Num Pipes: 3
Pipe [0]:
Power: on
SRC: 077f0437
STAT: 00000000
Plane [0]:
CNTR: c4042400
STRIDE: 0000000f
SURF: 03080000
TILEOFF: 00000000
Cursor [0]:
CNTR: 04000027
POS: 031f01bc
BASE: 00840000
Pipe [1]:
Power: off
SRC: 00000000
STAT: 00000000
Plane [1]:
CNTR: 00000000
STRIDE: 00000000
SURF: 00000000
TILEOFF: 00000000
Cursor [1]:
CNTR: 00000000
POS: 00000000
BASE: 00000000
Pipe [2]:
Power: off
SRC: 00000000
STAT: 00000000
Plane [2]:
CNTR: 00000000
STRIDE: 00000000
SURF: 00000000
TILEOFF: 00000000
Cursor [2]:
CNTR: 00000000
POS: 00000000
BASE: 00000000
CPU transcoder: A
Power: off
CONF: 00000000
HTOTAL: 00000000
HBLANK: 00000000
HSYNC: 00000000
VTOTAL: 00000000
VBLANK: 00000000
VSYNC: 00000000
CPU transcoder: A
Power: off
CONF: 00000000
HTOTAL: 00000000
HBLANK: 00000000
HSYNC: 00000000
VTOTAL: 00000000
VBLANK: 00000000
VSYNC: 00000000
CPU transcoder: A
Power: off
CONF: 00000000
HTOTAL: 00000000
HBLANK: 00000000
HSYNC: 00000000
VTOTAL: 00000000
VBLANK: 00000000
VSYNC: 00000000
CPU transcoder: EDP
Power: on
CONF: c0000000
HTOTAL: 0f9f0eff
HBLANK: 0f9f0eff
HSYNC: 0f4f0f2f
VTOTAL: 0888086f
VBLANK: 0888086f
VSYNC: 08770871
is_mobile: no
is_lp: no
require_force_probe: no
has_64bit_reloc: yes
gpu_reset_clobbers_display: no
has_reset_engine: yes
has_fpga_dbg: yes
has_guc: yes
has_l3_dpf: no
has_llc: yes
has_logical_ring_contexts: yes
has_logical_ring_elsq: no
has_logical_ring_preemption: yes
has_pooled_eu: no
has_rc6: yes
has_rc6p: no
has_rps: yes
has_runtime_pm: yes
has_snoop: no
has_coherent_ggtt: yes
unfenced_needs_alignment: no
hws_needs_physical: no
cursor_needs_physical: no
has_csr: yes
has_ddi: yes
has_dp_mst: yes
has_fbc: yes
has_gmch: no
has_hotplug: yes
has_ipc: yes
has_overlay: no
has_psr: yes
overlay_needs_physical: no
supports_tv: no
Has logical contexts? yes
scheduler: f
slice0: 3 subslice(s) (0x7):
subslice0: 8 EUs (0xff)
subslice1: 8 EUs (0xff)
subslice2: 8 EUs (0xff)
subslice3: 0 EUs (0x0)
slice1: 0 subslice(s) (0x0):
subslice0: 0 EUs (0x0)
subslice1: 0 EUs (0x0)
subslice2: 0 EUs (0x0)
subslice3: 0 EUs (0x0)
slice2: 0 subslice(s) (0x0):
subslice0: 0 EUs (0x0)
subslice1: 0 EUs (0x0)
subslice2: 0 EUs (0x0)
subslice3: 0 EUs (0x0)
i915.vbt_firmware=(null)
i915.modeset=-1
i915.lvds_channel_mode=0
i915.panel_use_ssc=-1
i915.vbt_sdvo_panel_type=-1
i915.enable_dc=-1
i915.enable_fbc=1
i915.enable_psr=-1
i915.disable_power_well=1
i915.enable_ips=1
i915.invert_brightness=0
i915.enable_guc=0
i915.guc_log_level=0
i915.guc_firmware_path=(null)
i915.huc_firmware_path=(null)
i915.dmc_firmware_path=(null)
i915.mmio_debug=1
i915.edp_vswing=0
i915.reset=2
i915.inject_load_failure=0
i915.fastboot=-1
i915.force_probe=*
i915.alpha_support=yes
i915.enable_hangcheck=yes
i915.prefault_disable=no
i915.load_detect_test=no
i915.force_reset_modeset_test=no
i915.error_capture=yes
i915.disable_display=no
i915.verbose_state_checks=yes
i915.nuclear_pageflip=no
i915.enable_dp_mst=yes
i915.enable_dpcd_backlight=no
i915.enable_gvt=no
GuC firmware: i915/skl_guc_32.0.3.bin
status: fetch NONE, load NONE
version: wanted 32.0, found 0.0
header: offset 0, size 0
uCode: offset 0, size 0
RSA: offset 0, size 0
HuC firmware: i915/skl_huc_ver01_07_1398.bin
status: fetch NONE, load NONE
version: wanted 1.7, found 0.0
header: offset 0, size 0
uCode: offset 0, size 0
RSA: offset 0, size 0
Last edited by plp (2019-11-17 17:13:56)
Offline
Perhaps it's this bug:
Offline
Look like this might be the issue. Judging by the massive list of marked duplicates at the bottom this is a big issue for a lot of people
Offline
Yes that's the issue :
https://bugs.freedesktop.org/show_bug.cgi?id=111970
For now updating kernel to either linux-lts or linux-drm-tip-git resolves the issue.
Offline
5.4 should fix it. Have you tried?
Offline
Yes, it is fixed in 5.4.
Offline
Weird. For me this problem starts with 5.4.1.
Last edited by wioo (2019-12-04 10:47:22)
Offline
Problem still persist with 5.4.x.
I already noticed with 5.3.x, the system hangs for a few seconds but its recovered.
With 5.4.x its completely freezed only hard reset help and its happening very frequently. This is a very much alive and a very serious bug.
I have the linux-lts kernel too 4.19.87-1
With that no problem. So its a kernel related issue, and not fixed in 5.4.1!
Linux-drm-tip-git compilation takes forever, and that is a very bleeding edge kernel. Im not sure its a good idea to use it. Its safer use linux-lts.
Last edited by Archanfel80HUN (2019-12-04 11:14:18)
Offline
I noticed some strange freezes since some weeks aswell (on my XPS 13). 5.4.x made it worse, now i915 completely hangs. Logging in via SSH + reboot or powering down using the power button works tho...
Offline
With 5.4.x its completely freezed only hard reset help and its happening very frequently. This is a very much alive and a very serious bug.
I have the linux-lts kernel too 4.19.87-1
With that no problem. So its a kernel related issue, and not fixed in 5.4.1!
Linux-drm-tip-git compilation takes forever, and that is a very bleeding edge kernel. Im not sure its a good idea to use it. Its safer use linux-lts.
What are you going to do when linux-lts is rebased to 5.4+?
Are you assuming it will have been resolved by then without the need to report the issue upstream?
Offline
I confirm having the i915 soft reset with kernel 5.3 and a hard freeze with 5.4.1. Sorry to see such a critical issue in the mainstream kernel, waiting for the fix / workaround now...
Offline
I have the same issue with my machine, landed on this post when I searched with the dmesg log line, originally opened this post for it,
https://bbs.archlinux.org/viewtopic.php … 2#p1876042
Is there any workaround we can do until it's fixed?
Offline
@blazeboy the upstream bug report you linked to in your other thread reported in the issue in 4.18.0-10
which kernel versions is the issue present for on your system?
Offline
Yes I have the same problem. The system totally freezes with 5.4 and I need to reboot it.
Offline
I have same problem - was multiple several few second xorg freezes on 5.4.0 now total system hang (and loss of work) on 5.4.1. Now using linux-drm-tip-git from AUR (thats several hours I'm not getting back) and no hang for a day (but nvidia driver incompat so no multiscreen for me for a while).
Anyway the bugtracker (incidentally moved to https://gitlab.freedesktop.org/drm/intel/issues at same time with many similar bug reports) doesn't say which specific patches address the issue, just that it should be fixed in drm-tip. Anyone know how long it usually takes for drm-tip to make it into mainline kernel, or which patches to apply?
Offline
@alu1 The fix for 111970 was the three commits from https://cgit.freedesktop.org/~ickle/lin … =bug111970
however they are already in 5.4 as:
https://git.kernel.org/pub/scm/linux/ke … dd4c87ab56
https://git.kernel.org/pub/scm/linux/ke … 63ee5ea68f
https://git.kernel.org/pub/scm/linux/ke … 82b9a55253
Are you referring to a different bug report?
Offline
I can confirm this problem, too. I only noticed it after updating to 5.4.1 .
On 5.3.13 (I downgraded to that after the freeze occured twice) and on lts I have never experienced this (or at least I think so, might be that there was a hang very seldomly).
Offline
On 5.3.13 (I downgraded to that after the freeze occured twice) and on lts I have never experienced this (or at least I think so, might be that there was a hang very seldomly).
So you confirmed you do not have the same issue as the topic starter.
Edit:
There seem to be three issues on this thread which are being mixed together.
Two of which were present in 5.3. The first issue was fixed with 5.4. (111970)
The second issue is still present in 5.4.
The third issue was introduced in 5.4.
Last edited by loqs (2019-12-05 18:56:12)
Offline
Yes I have the same problem. The system totally freezes with 5.4 and I need to reboot it.
@loqs : You are right, of course.
I can confirm that I have problems like chrpinedo .
My log says:
kernel: Asynchronous wait on fence i915:Xorg[775]:fd34 timed out (hint:intel_atomic_commit_ready+>
Offline
I don't think it's just us, but the kernel devs are also conflating the different issues, where in my opinoin it went from severe <=5.4.0 (recoverable freezes) to critical in 5.4.1 (irrecoverable hangs). Empirically however it is fixed in drm-tip. The root cause of all three seems to be messing with c-states on idle which goes back to 5.3.12.
What fix to apply thereafter is beyond me and I must continue with drm-tip, not sure what to look out for to know if/when mainline is updated though.
The bug reports I'm referring to are:
https://gitlab.freedesktop.org/drm/intel/issues/713
https://gitlab.freedesktop.org/drm/intel/issues/712
https://gitlab.freedesktop.org/drm/intel/issues/674
https://gitlab.freedesktop.org/drm/intel/issues/673
https://gitlab.freedesktop.org/drm/intel/issues/670
https://gitlab.freedesktop.org/drm/intel/issues/3 (from depracated bugtracker)
There's no useful info there to work out what is fixed in drm-tip, and there's nothing new to add in terms of available information, other than noise.
Offline
What about testing linux-git? The drm pull for 5.5 is already present so you could check if the fix for your issue is in that pull.
Alternatively you could reverse bisect drp-tip to try and locate the commit containing the fix.
For the reverse bisect the last sync of drm-tip with mainline af42d3466bdc8f39806b26f593604fdc54140bcb (5.4-rc8) should be bad.
Edit:
https://gitlab.freedesktop.org/drm/intel/issues/674 the 5.4 only bug has been bisected (673 is marked as a duplicate of 674)
Last edited by loqs (2019-12-07 00:13:46)
Offline
https://bbs.archlinux.org/viewtopic.php?id=246841&p=2
systool -vm i915
Randomly try "i915.enable_dc=0 i915.enable_fbc=0 i915.enable_psr=0" as kernel parameters
On kernel 5.4.2-arch1-1 I need them all to eliminate micro freezes.
Offline
https://bbs.archlinux.org/viewtopic.php?id=246841&p=2
seth wrote:systool -vm i915
Randomly try "i915.enable_dc=0 i915.enable_fbc=0 i915.enable_psr=0" as kernel parameters
On kernel 5.4.2-arch1-1 I need them all to eliminate micro freezes.
Non of them help. I disabled all and still freezes.
Offline
Archanfel80HUN wrote:With 5.4.x its completely freezed only hard reset help and its happening very frequently. This is a very much alive and a very serious bug.
I have the linux-lts kernel too 4.19.87-1
With that no problem. So its a kernel related issue, and not fixed in 5.4.1!
Linux-drm-tip-git compilation takes forever, and that is a very bleeding edge kernel. Im not sure its a good idea to use it. Its safer use linux-lts.What are you going to do when linux-lts is rebased to 5.4+?
Are you assuming it will have been resolved by then without the need to report the issue upstream?
A kernel with a serious bug never will be LTS. And the first few release with the 5.x kernel was perfect. The rest is not my problem, im not a kernel developer anymore. I do this back then, only for android kernels, never again... Worst thing ever. Bug, bug, bug, bug.
Offline
673 / 674 has been marked fixed https://patchwork.freedesktop.org/patch/344105/
670, 713 and 712 were also marked as duplicates of 673.
Edit:
attempt to rebase patch onto 5.4.2
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 06a506c29463..b70a59cdcdf2 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -471,12 +471,6 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
return desc;
}
-static void unwind_wa_tail(struct i915_request *rq)
-{
- rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
- assert_ring_tail_valid(rq->ring, rq->tail);
-}
-
static struct i915_request *
__unwind_incomplete_requests(struct intel_engine_cs *engine)
{
@@ -495,7 +489,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
continue; /* XXX */
__i915_request_unsubmit(rq);
- unwind_wa_tail(rq);
/*
* Push the request back into the queue for later resubmission.
@@ -649,13 +642,29 @@ execlists_schedule_out(struct i915_request *rq)
i915_request_put(rq);
}
-static u64 execlists_update_context(const struct i915_request *rq)
+static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->hw_context;
- u64 desc;
+ u64 desc = ce->lrc_desc;
+ u32 tail;
- ce->lrc_reg_state[CTX_RING_TAIL + 1] =
- intel_ring_set_tail(rq->ring, rq->tail);
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ *
+ * We should never submit the context with the same RING_TAIL twice
+ * just in case we submit an empty ring, which confuses the HW.
+ *
+ * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
+ * the normal request to be able to always advance the RING_TAIL on
+ * subsequent resubmissions (for lite restore). Should that fail us,
+ * and we try and submit the same tail again, force the context
+ * reload.
+ */
+ tail = intel_ring_set_tail(rq->ring, rq->tail);
+ if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+ desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc_reg_state[CTX_RING_TAIL] = tail;
+ rq->tail = rq->wa_tail;
/*
* Make sure the context image is complete before we submit it to HW.
@@ -674,9 +683,7 @@ static u64 execlists_update_context(const struct i915_request *rq)
*/
mb();
- desc = ce->lrc_desc;
ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
-
return desc;
}
@@ -1149,16 +1156,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (!list_is_last(&last->sched.link,
&engine->active.requests))
return;
-
- /*
- * WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_fini_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
}
}
Last edited by loqs (2019-12-11 01:12:58)
Offline