You are not logged in.

#1 2019-11-17 10:17:02

plp
Member
Registered: 2013-01-24
Posts: 46

i915 Skylake GPU hangs with kernel 5.3.11

Hi guys and girls,

Since upgrading to 5.3.11 last night, I've experienced a couple of GPU hangs/crashes that made Xorg unresponsive for a few seconds.

As I don't have a clue about GPU drivers, I thought I'd post this here before filing an actual bug report upstream. Any insights will be appreciated.

Here's the dmesg excerpt:

% dmesg|grep 4798
[ 4798.860641] i915 0000:00:02.0: GPU HANG: ecode 9:0:0x00000000, hang on rcs0
[ 4798.860642] [drm] GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.
[ 4798.860642] [drm] Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel
[ 4798.860643] [drm] drm/i915 developers can then reassign to the right component if it's not a kernel issue.
[ 4798.860643] [drm] The gpu crash dump is required to analyze gpu hangs, so please always attach it.
[ 4798.860644] [drm] GPU crash dump saved to /sys/class/drm/card0/error
[ 4798.861654] i915 0000:00:02.0: Resetting rcs0 for hang on rcs0

And the crash dump:

% sudo cat /sys/class/drm/card0/error                                                                                                                                                                                      :(
GPU HANG: ecode 9:0:0x00000000, hang on rcs0
Kernel: 5.3.11-arch1-1 x86_64
Time: 1573925870 s 58835 us
Boottime: 4798 s 723003 us
Uptime: 4788 s 101907 us
Epoch: 4296315008 jiffies (300 HZ)
Capture: 4296316864 jiffies; 549557 ms ago, 6187 ms after epoch
Reset count: 0
Suspend count: 0
Platform: SKYLAKE
Subplatform: 0x1
PCI ID: 0x1916
PCI Revision: 0x07
PCI Subsystem: 1028:06ff
IOMMU enabled?: 0
DMC loaded: yes
DMC fw version: 1.27
GT awake: yes
RPM wakelock: yes
PM suspended: no
EIR: 0x00000000
IER: 0x08080000
GTIER[0]: 0x01010101
GTIER[1]: 0x01010101
GTIER[2]: 0x00000070
GTIER[3]: 0x00000101
PGTBL_ER: 0x00000000
FORCEWAKE: 0xffff0001
DERRMR: 0x2077efef
CCID: 0x00000000
  fence[0] = 1fe803b01800001
  fence[1] = 17fa03b01012001
  fence[2] = 94300700844003
  fence[3] = 280603b02000001
  fence[4] = 304603b02840001
  fence[5] = 388603b03080001
  fence[6] = 00000000
  fence[7] = 480d03b03fcb001
  fence[8] = 00000000
  fence[9] = 00000000
  fence[10] = 00000000
  fence[11] = 00000000
  fence[12] = 00000000
  fence[13] = 00000000
  fence[14] = 00000000
  fence[15] = 00000000
  fence[16] = 00000000
  fence[17] = 00000000
  fence[18] = 00000000
  fence[19] = 00000000
  fence[20] = 00000000
  fence[21] = 00000000
  fence[22] = 00000000
  fence[23] = 00000000
  fence[24] = 00000000
  fence[25] = 00000000
  fence[26] = 00000000
  fence[27] = 00000000
  fence[28] = 00000000
  fence[29] = 00000000
  fence[30] = 00000000
  fence[31] = 00000000
ERROR: 0x00000000
FAULT_TLB_DATA: 0x0000001b 0xb70f48ae
DONE_REG: 0x07ffffff
rcs0 command stream:
  IDLE?: no
  START: 0x0000d000
  HEAD:  0x000036c0 [0x00000000]
  TAIL:  0x000036c0 [0x00000000, 0x00000000]
  CTL:   0x00003001
  MODE:  0x00000000
  HWS:   0xffffe000
  ACTHD: 0x00000000 000036c0
  IPEIR: 0x00000000
  IPEHR: 0x7a000004
  INSTDONE: 0xffdfffff
  SC_INSTDONE: 0xffffffff
  SAMPLER_INSTDONE[0][0]: 0xffffffff
  SAMPLER_INSTDONE[0][1]: 0xffffffff
  SAMPLER_INSTDONE[0][2]: 0xffffffff
  ROW_INSTDONE[0][0]: 0xffffffff
  ROW_INSTDONE[0][1]: 0xffffffff
  ROW_INSTDONE[0][2]: 0xffffffff
  BBADDR: 0x00000000_00fdf4b0
  BB_STATE: 0x00000020
  INSTPS: 0x00008840
  INSTPM: 0x00000000
  FADDR: 0x00000000 000106c0
  RC PSMI: 0x00000010
  FAULT_REG: 0x00000000
  GFX_MODE: 0x00008000
  PDP0: 0x000000023e137000
  PDP1: 0x0000000000000000
  PDP2: 0x0000000000000000
  PDP3: 0x0000000000000000
  ring->head: 0x00000000
  ring->tail: 0x00000000
  hangcheck timestamp: 0ms (4296315008; epoch)
  engine reset count: 0
  ELSP[0]:  pid 548, seqno       16:0003372c!, prio 3, emitted 1431654459ms, start 0000d000, head 00003620, tail 000036c0
  ELSP[1]:  pid 0, seqno        5:000020f4, prio -4093, emitted 1431654459ms, start 00001000, head 00000890, tail 000008f8
  Active context: [0] hw_id 0, prio 0, guilty 0 active 0
bcs0 command stream:
  IDLE?: yes
  START: 0x00009000
  HEAD:  0x00200d08 [0x00000000]
  TAIL:  0x00000d08 [0x00000000, 0x00000000]
  CTL:   0x00003000
  MODE:  0x00000200
  HWS:   0xffffd000
  ACTHD: 0x00000000 00200d08
  IPEIR: 0x00000000
  IPEHR: 0x04000001
  INSTDONE: 0xfffffffe
  BBADDR: 0x00000000_019f602c
  BB_STATE: 0x00000020
  INSTPS: 0x00000001
  INSTPM: 0x00000000
  FADDR: 0x00000000 00009d08
  RC PSMI: 0x00000010
  FAULT_REG: 0x00000000
  GFX_MODE: 0x00008000
  PDP0: 0x000000023e137000
  PDP1: 0x0000000000000000
  PDP2: 0x0000000000000000
  PDP3: 0x0000000000000000
  ring->head: 0x00000000
  ring->tail: 0x00000000
  hangcheck timestamp: -4492373ms (0)
  engine reset count: 0
  Active context: [0] hw_id 0, prio 0, guilty 0 active 0
vcs0 command stream:
  IDLE?: yes
  START: 0x00005000
  HEAD:  0x00000058 [0x00000000]
  TAIL:  0x00000058 [0x00000000, 0x00000000]
  CTL:   0x00000000
  MODE:  0x00000200
  HWS:   0xffffc000
  ACTHD: 0x00000000 00000058
  IPEIR: 0x00000000
  IPEHR: 0x00000000
  INSTDONE: 0xfffffffe
  BBADDR: 0x00000000_00000000
  BB_STATE: 0x00000020
  INSTPS: 0x00000001
  INSTPM: 0x00000000
  FADDR: 0x00000000 00000000
  RC PSMI: 0x00000010
  FAULT_REG: 0x00000000
  GFX_MODE: 0x00008000
  PDP0: 0x0000000000000000
  PDP1: 0x0000000000000000
  PDP2: 0x0000000000000000
  PDP3: 0x0000000000000000
  ring->head: 0x00000000
  ring->tail: 0x00000000
  hangcheck timestamp: -4492373ms (0)
  engine reset count: 0
  Active context: [0] hw_id 0, prio 0, guilty 0 active 0
vecs0 command stream:
  IDLE?: yes
  START: 0x00007000
  HEAD:  0x00000058 [0x00000000]
  TAIL:  0x00000058 [0x00000000, 0x00000000]
  CTL:   0x00000000
  MODE:  0x00000200
  HWS:   0xffffb000
  ACTHD: 0x00000000 00000058
  IPEIR: 0x00000000
  IPEHR: 0x00000000
  INSTDONE: 0xfffffffe
  BBADDR: 0x00000000_00000000
  BB_STATE: 0x00000020
  INSTPS: 0x00000001
  INSTPM: 0x00000000
  FADDR: 0x00000000 00000000
  RC PSMI: 0x00000010
  FAULT_REG: 0x00000000
  GFX_MODE: 0x00008000
  PDP0: 0x0000000000000000
  PDP1: 0x0000000000000000
  PDP2: 0x0000000000000000
  PDP3: 0x0000000000000000
  ring->head: 0x00000000
  ring->tail: 0x00000000
  hangcheck timestamp: -4492373ms (0)
  engine reset count: 0
  Active context: [0] hw_id 0, prio 0, guilty 0 active 0
Pinned (global) [32]:
    00000000_fffff000     4096 41 00 LLC
    00000000_ffffe000     4096 01 01 purgeable LLC
    00000000_ffffd000     4096 01 01 purgeable LLC
    00000000_ffffc000     4096 01 01 purgeable LLC
    00000000_ffffb000     4096 01 01 purgeable LLC
    00000000_ffffa000     4096 01 01 purgeable LLC
    00000000_00001000     4096 41 00 LLC
    00000000_fffe3000    94208 01 01 dirty LLC
    00000000_00002000     4096 41 00 LLC
    00000000_fffcc000    94208 01 01 dirty LLC
    00000000_fffcb000     4096 01 01 LLC
    00000000_00003000     4096 41 00 LLC
    00000000_fffc8000    12288 01 01 dirty LLC
    00000000_00004000     4096 41 00 LLC
    00000000_fffc5000    12288 01 01 dirty LLC
    00000000_00005000     4096 41 00 LLC
    00000000_fffc2000    12288 01 01 dirty LLC
    00000000_00006000     4096 41 00 LLC
    00000000_fffbf000    12288 01 01 dirty LLC
    00000000_00007000     4096 41 00 LLC
    00000000_fffbc000    12288 01 01 dirty LLC
    00000000_00008000     4096 41 00 LLC
    00000000_fffb9000    12288 01 01 dirty LLC
    00000000_00009000    16384 41 00 LLC
    00000000_fffb6000    12288 01 01 dirty LLC
    00000000_0000d000    16384 41 00 LLC
    00000000_fff9f000    94208 01 01 dirty LLC
    00000000_00040000  8294400 7f 00 uncached (name: 1)
    00000000_00019000    16384 41 00 LLC
    00000000_fff5a000    94208 01 01 dirty LLC
    00000000_00840000    16384 40 00 dirty uncached
    00000000_03080000  8388608 7e 00 X dirty uncached (fence: 5)
rcs0 --- HW Status = 0x00000000 ffffe000
:amGUC+TMtV!3cP:rqmuM6?tn]2edY]c/b$bZbDV^?B.47!Oh:nrr<$!!-4Z8!!!"8
rcs0 --- WA context = 0x00000000 fffcb000
:bjCpF+Fjp+?f?YP$5u'Z'OaI(!d:e<"*%C7"9NUF<q_Jg\M"]lecG2D#AM3iV9>CSk%16=3"9;0ItnHhnZr"/$='W)TklZqIU%5s&JNLE&J5Te`VC)X$jZ2(!!!#j
rcs0 --- NULL context = 0xffffffff ffffffff
:h!LVVoosD+-h56JB[B?p)N@4=Brl%l.@n[Oi^..N`FrmT2<&eC42F9ID@1?[J[7=2gs_79j`m<:OKf@7fQs-B_C2X@H0:>_Dc+uR@`MAGKr^/#4Hf(Ka<!l[^%9RV$2_'d;u$TN]:<Kr[Gb?9B:ihO$,?<1b=si^/h5IDdm8E*l6=-QdnME<g;YM[0oZOQN+3^E@i33?D7f9:N$$JASajtl.(p]10oTTkN+3^E)8=*FNNJ[UVS-uA`?K&.Qr][OYXN9"eubo>`;J&j$YT^i;_C_H_I3;8S7?)c/mr`*VS34]QPSIPj,K<_nc+&5^RtJiR"!LMY,c9>$!U/#X<V,B<4G5LC=<rtrC6>Tqe&.eG^#eY9&FJa>%onf]BB.4qUJC>O8J&UocuF`GW<NGe\?-1b,t+_G1<P>S(AU^1Aui(7!glI^V?$`0WNSi?3+A^1.g6uK$5QLjp*Tn&"#JrS0;8J0jonJYbcn%1P!aP)`TD-==V0gq8)5'"C**h(fK:-o:tu38+n=nre>TN6iZOFb"\M\c[OA*c8d&^%ktPFVujm!$().;S0;:H0jonJYbcn%Dh-+7)h!+n)F"4NoP_K-]0oOU1BHeR+nLG^/Ac!$?uldE7>K)>^m5hA<rlE$^t*pD4:(FD)R/6//1(8lKPWFDkgaWHUG]sTE-:tck6TiFLX#j-/DRKd_H%U8KC2j<jS/\%\XbP&5E?2l+2<c!TA??IdDrDeO,p&-r,g).]9D9ljlnD!!F?!bY*_ndice[62;7:m:M\u>)Y^9NN;M2$o_HGW[!oTr^WeVp9.4F)s%8]DEJ9in\Uhs__r_I`-O<fU$s4^kX=7OM9].^WL<^/d<XQ'r5&beb_&!A^"r+QcKPWe[>U=>`28`K!!8JT6iFg"dg^A)m07mO8$M[B6$,-jrK>3/u^`S?Hn0d.T*ZY(X&`AFt9fZ0trc,sr\`E2>%XnCsn3b&'Su:Dc+2$l'Nh]id9jcmGr8Uj/d@BK=AHK5r&?#AeYXO:A;S\S8^h3[MS/-(3'k&U%i:D])/eGO?.<%.rfQtCC9&Q0EL9:ln\2rP`?WYujS85$8h!daU4SlbS4Qa?>SFXAUAZHj7g2A[;D=u+S/2M@kBkUNSCUIZ?14B#6K\RY>^V_CDY*bdLm'N>F3PoVm)nNAZi*5&H1L;D8Ndi!HbT65WQJMd(2N3)ee)cha-\BOL5!L]]^\,gr`YSMP\R,Nlc]Pul`\ljAbrJ"%*cbP48^7.eT?6S>OP]8..0#t7:-@5*[lirf?_>@h&Dd&.5H/n_QC'MCD)aI.UlSO7PSJ[`,5GKZbf>&M5,t0R%gR\P6M,EV^,(B.ltmTcKu8pbhc!4HQCf-=m5dt8Iaa<mFPG)gid[8V?Y>A2s-UFX]F\1Gp\h?AWQN`oVtTT@qs0bRl"R!k)<\cTS^li&s$'h(`iV],Top[NM!qPF%@gm<7ItnJQEY/qCX3Qr@9qAU8Uaj>l%E;^dh=FGesYI&I=HG'B-/B/7;bX90NlCBD8I&-89cHMaJf-,pW3N";Y7dUJBe>.3%X#EMhE])!:VAoNE.%sK0KatnZ,60YF[V!YcIB8s)'35NW$7e2lA"sPC%ntGf6`1XS+Q8]79Lo';FkN]#*FWAPBJlH.V)*nc!OaoUSt]8rY$SqJ3bDM"e[6W>8I73L"=D)h.d#1V']M:<("G`_n=G=5t`hUAjm^II2N\4gsE5'Ap<=ESe&MK-J0e+Ocm-?-t[l>G31fH9OfZOJ/":-(deBr7!p-oZqdB&^q=%AiQ8;\j`mZ4/8Z?m2pSaDJp:m4#DhQAummg3QD%Q&S(Y!_n-@Z_]A9\.Bq\%_R1>k@/ET'l:+HsE>@HFPPD6Lh;#pZNo_NA_'P!'K1::[_n-q8%B3TPflad:[69_oB;h[SG&C51;a1"ffC(TjBsPnm<k<_m:Ef2A:jnsHO#Dd+DfC#L`Q6-(9D=`ZgW]91I\FZ\-2Sp,jg"VYM#1J\WN(doXH(IYOOa4>*P:Gc!TeAi`V9C&G42g\&cTeemeAd[X/?H'@TudI#k"!*/TA+sdUUcahq"6+G5\Z$cXVE*B=f0AhlBhfCN]"/Fqd#DpQ3it;<<hOcZlS$NQ8AGijZ'p'?t%OeMBG43=-])s%C>Yf2dt"?H@*Bik(pL^3-n#>spM^60"VZBT\S7(WbcQ'(tY^X^Cs*Sm#WX*uUZ[42Q&P?"7W\CO@1e,"K!gUA6k/7df!NHgHf&]dOmpB=4j/CbNH@`QutZM]^,C:RSIl/)&\3Ca#VOGt;_/GVV:o>);2488kONI*]ALe>[)jHRJ.Tg\rGr'DflGrp_a6C-&$QD_&aVKRmc;n)i8n5@(G8lMT1TS?T2i#ONa1;7lPb>32.C,"?p1g(mnQldU8(NbDJJ%qjb+btU!n&FS5dnCc/I#9O]+(ZAm#SSCor5NS0o[#(]%qM(Mp.+D9XYjW$^M7Di&-'XY/I-lQHW[*#&FE$Eo[EQ-r*sj!Y_oAaJhTU06b[3nNX)G(1DWY8Ad%7*pfQU2&LUU@q4YP1]&KYKJ[Fg%SS%GT\qn`[E3P'?,oPOX,`p2F93-;+CdMKfEUrHup1J<5BeOMfC6BC%>Y.2)ef(DfICNU\*raVG:dX.gCeScHS39o6uLLbI#=RuF^/FOoF$*CTgo!ii:pO<^[@a6dbj/S/Ei;>SQaR3P5Vd\N)k0*X?hYQno_q+(h!p7#=D+[!7\fe>[%Q\^UM%99\PHc35YH3<L%B(@u8l(h]l!rp]on[\uc%XK)(H[WIe"h*S*/__`T6kk]H4BObF`gp9)BFjo%E6(qp\O$#cemOY?F[FQT.TGs=IA>./+VJ1gMF.NMYQg%[p])>MSaYD%$8k2CgNP(f&pmA!MR2$WmM%N\8^r\fscddNl-L_1-3upG.jf'kb/1WY5-OQhVfp2Ru6Y4^\uDlVBeci67[].j*sHd[<[7%DQ*OnbH7WEh0NGa>)^@=2iiu]n'^kG>:W.lG+_[fW;P&R*'Sdb-.(*R?<YT7Q78?pN)Q0^HVIsTo]pk3h89I(VO_5EX[<1C]@Z(?bM+tMS#a!l7RC'[3R5"tMJhgQ%dSk=[Q!#'+C+<c30DaWWGC#bNiSs[["$7l`tHuqE*k>6g8N`,[X=r:eE\a&h;*YB>LADp3JQj:<\\Ge/[lc);lRJeFGX&"JbebNCEj+ra^d1jHL3kuf<3o`'?24bDS9/5GHR=OL^sshXLjB6\(@h^4+3&UFUW7@4bHsECtOd51Yf9>oneup\=+(qd!<jCELiE^dlhS3Xg*0j*L6TD\GF`55.3jt]'Bc;eGIkj(NhA09]0&*XTjP8Qh`r+HL0H'N:YWG+?bd.LHj&njSX#J78qHXQ,rt9fLX8JINJ?o*uCJ>+Y/Se#jk:T;d2h9HPAK9;baO]^tQ5Udt9g/.@C'.k0$<AoenmE\nTiS>NtBLU42HIg9A?L%gQ\E:]51]p?:pMAm)-Eh:M$(4L9@u'"q&CjE=m/450FI0I<3JNm%Z;GX%<GCL+UY&,ZSYjLfU9Pr6TT\27Qtq52?t2r2rE*phZ:_^X]OVlt;D+aFJskN>oPMOnrqaR1so(c@VoY-uukIH=mTbtZpKs*=$Rl2tPIDr"SF3X=9i'YaJ1/DMt**R(s/A#HTa>)$mqf6/2,NN^p"I0,nG/U-U(9l'?CO[=o[E\6eSnBg.Grr.Ys-tqRkq_@O\NjirpIB.9CD]uIcCJS-60Z1GX+hX_qX>R4;[O%KiDoZ]NJ_N+fYC;=7qT/.X9%0c%Yq0)]Xi,>k^A!DcHp[>T(722>a1dmA5-pg:J$ALj5-!<)k<I3=s2i$EaX?3H'/1<EQu'TN*as*li%onbc3lU*$F#eMfLEbZ<Phf?pFS`$4>uqmkE^Uir:8,`a1X:A=(uA^nYOe_N+Q[,N,>3cIjrcg/PGZcS]Z15n)Yajo_p?PB,HXV;mVFZj!d".??)4bANr/s/i&Mn:/;a$N27=.-,rlAPg\S@GfiHpApDHfb#Wc0\B8u>Q!@lU),@=icP]C%9J78Z)+lP"Q].`l.>*SVEKp=[42Zp8r;L]DrH=XoPY2qfNiZN`j7X6SWSS8KVt_tE6`$"0`u_/Vi!Z>6NhBbCeeb*8>^;CoUP:E)n0Qd)3%ao'LQ18=iQK'("aGab"^VhSX3<4e>4=E.:8)tn,iKDe=7HfgK"J&)ks%**bP%KVQjf5?2INit0Y)bY!lO.2_^>^l9+4oHJ?12-3)&6dQ%mQl_c@,K>X!+P)Sm3q@0RFU9n[=GP"KLd(1B!"0KFm3I)4q]i<oWfMZd-mKe*O=S-EpIfth;-_,_2Qc4;mL,2iob_Nt@,NJFSX`i]WPf)o0)In:*ckMm/\$Ts:[#5;fO`dc<)0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E2qZ,)L]@B!m..!]qp(2mbK`-%0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?(re:h`\EE^0oTTZN+3^E@i33?!WTW94UGD]
bcs0 --- HW Status = 0x00000000 ffffd000
:c0_$G+92a'.)>uss1pl)_T(*?RG-TiUm4M`SY!K,Xo?2]!!"t?!HV';DZIMn
bcs0 --- NULL context = 0xffffffff ffffffff
:fBo)Q:r?&%Zf-WU+HhCi$"B^=b8B%K@U`dL',)#c#fOXe@KZrO0elN,5_1Xe8Vd[l!b?n#%d]1k4hm6.#m:;_2=H?"SE0]GAu,'iPbfhSV(##7rCTtPpYQKf?K"bjV*R9..8!L&8lCnMC6"AR#AHB)5'_6&5OsG_VM?7A1,h:!>L,2XN+][eRl3&[E&J<PB)&Q^T=tf<TC]O![FYMA?ae#<\G(OJ\<dW@[&8g9B.ql(`IGP4n)\c=q_%^+07RfA]V#)pA`apRAm=0999CQ-iB'3hO1@k@@5,tUBB-=H0imu!@B;i7@VcL,iTqSDJ8t93(uZH%(QN_f-5jc^GW1V,_n:&K`4-Tts1N47mm!i?Pbfg6V(##7"T%$*$8f-:
vcs0 --- HW Status = 0x00000000 ffffc000
:c0_$G0E;G'G/+1gs6MtSR$k'0hUEC@&g@mT:H+Il^U%"j!#tG$;tp=m;ZM"T
vcs0 --- NULL context = 0xffffffff ffffffff
:dd<QL@004Js7+,L"#^0',hNoL_&UN9C^AfjK*2GmK(FMD(a*5&"i:BJ9AmX<^5Ws`6%V#r\!_kUkKj,N`o,3Z^AKZsmE]k8s*$W:8lD(J;O"",e>4TdCZ6B'^?65FW&[o;.8!L&8lCnM:=9*@B#CXfY?N;`D.H5MT#'$nI:eOUdVQ,=)4s*KgeGaqCDc@uHtUC-n)Y@*r;B^[%`mI])7KD>2;m<a3Q!O'4+c85:\WJcP;Rh[p[^F?noNu4hZ_j3NT[C04j:AhrC^Wp<TOTM,9CDYrlS^jD6,76'#[m*@g-@['-1M_pD[OWls57K3!UO6_)u5G`A*Ab9>J2]:)2]Wb]AMYs+g<^k&^=#Pbfg$V(##7*!>Lb!"UjQ
vecs0 --- HW Status = 0x00000000 ffffb000
:c0_$G0E;G'G/+1gs6MtSR$k'0hUEC@&g@mT:H+Il^U%"j!#tG$;tp=m;ZM"T
vecs0 --- NULL context = 0xffffffff ffffffff
:g?kDT:r?&%;rNHJLaB;]6>3>N!hoh:A?djD5a[p3W(&*V'+c9OMM[1G&0Q[a)e(?X,SP-MKtr?VIi(*1_#YA$HqZmYRW`t[;uO+f=gpHDXFGsGDu;M;8lCnM;O"",LIZ2R#$Y)PJbXGCPbfg$V(##7p5[jk0CMtiX*%K+\`R[-Yr.Qo\_Oc5gUp[l2p9Gfc1F0s%^?n+?Q@!<estRdDSMcln:#lMhgWUN2grh$bYsLG=c(*O/hgpccBklgX^mDUj8/.<^G\hrSBoX\k:\l=8LKplcashJ(c'gS%KVVHoRC:(:DB/:iiElk?l8_mE&J;a^cZ*KNNfi<+(=MA#8lrkcNM0GE%`4#`]0<<I@-l<]:UbF/",kK8lCnM>*Pj4/_2SY!!):`
Num Pipes: 3
Pipe [0]:
  Power: on
  SRC: 077f0437
  STAT: 00000000
Plane [0]:
  CNTR: c4042400
  STRIDE: 0000000f
  SURF: 03080000
  TILEOFF: 00000000
Cursor [0]:
  CNTR: 04000027
  POS: 031f01bc
  BASE: 00840000
Pipe [1]:
  Power: off
  SRC: 00000000
  STAT: 00000000
Plane [1]:
  CNTR: 00000000
  STRIDE: 00000000
  SURF: 00000000
  TILEOFF: 00000000
Cursor [1]:
  CNTR: 00000000
  POS: 00000000
  BASE: 00000000
Pipe [2]:
  Power: off
  SRC: 00000000
  STAT: 00000000
Plane [2]:
  CNTR: 00000000
  STRIDE: 00000000
  SURF: 00000000
  TILEOFF: 00000000
Cursor [2]:
  CNTR: 00000000
  POS: 00000000
  BASE: 00000000
CPU transcoder: A
  Power: off
  CONF: 00000000
  HTOTAL: 00000000
  HBLANK: 00000000
  HSYNC: 00000000
  VTOTAL: 00000000
  VBLANK: 00000000
  VSYNC: 00000000
CPU transcoder: A
  Power: off
  CONF: 00000000
  HTOTAL: 00000000
  HBLANK: 00000000
  HSYNC: 00000000
  VTOTAL: 00000000
  VBLANK: 00000000
  VSYNC: 00000000
CPU transcoder: A
  Power: off
  CONF: 00000000
  HTOTAL: 00000000
  HBLANK: 00000000
  HSYNC: 00000000
  VTOTAL: 00000000
  VBLANK: 00000000
  VSYNC: 00000000
CPU transcoder: EDP
  Power: on
  CONF: c0000000
  HTOTAL: 0f9f0eff
  HBLANK: 0f9f0eff
  HSYNC: 0f4f0f2f
  VTOTAL: 0888086f
  VBLANK: 0888086f
  VSYNC: 08770871
is_mobile: no
is_lp: no
require_force_probe: no
has_64bit_reloc: yes
gpu_reset_clobbers_display: no
has_reset_engine: yes
has_fpga_dbg: yes
has_guc: yes
has_l3_dpf: no
has_llc: yes
has_logical_ring_contexts: yes
has_logical_ring_elsq: no
has_logical_ring_preemption: yes
has_pooled_eu: no
has_rc6: yes
has_rc6p: no
has_rps: yes
has_runtime_pm: yes
has_snoop: no
has_coherent_ggtt: yes
unfenced_needs_alignment: no
hws_needs_physical: no
cursor_needs_physical: no
has_csr: yes
has_ddi: yes
has_dp_mst: yes
has_fbc: yes
has_gmch: no
has_hotplug: yes
has_ipc: yes
has_overlay: no
has_psr: yes
overlay_needs_physical: no
supports_tv: no
Has logical contexts? yes
scheduler: f
slice0: 3 subslice(s) (0x7):
	subslice0: 8 EUs (0xff)
	subslice1: 8 EUs (0xff)
	subslice2: 8 EUs (0xff)
	subslice3: 0 EUs (0x0)
slice1: 0 subslice(s) (0x0):
	subslice0: 0 EUs (0x0)
	subslice1: 0 EUs (0x0)
	subslice2: 0 EUs (0x0)
	subslice3: 0 EUs (0x0)
slice2: 0 subslice(s) (0x0):
	subslice0: 0 EUs (0x0)
	subslice1: 0 EUs (0x0)
	subslice2: 0 EUs (0x0)
	subslice3: 0 EUs (0x0)
i915.vbt_firmware=(null)
i915.modeset=-1
i915.lvds_channel_mode=0
i915.panel_use_ssc=-1
i915.vbt_sdvo_panel_type=-1
i915.enable_dc=-1
i915.enable_fbc=1
i915.enable_psr=-1
i915.disable_power_well=1
i915.enable_ips=1
i915.invert_brightness=0
i915.enable_guc=0
i915.guc_log_level=0
i915.guc_firmware_path=(null)
i915.huc_firmware_path=(null)
i915.dmc_firmware_path=(null)
i915.mmio_debug=1
i915.edp_vswing=0
i915.reset=2
i915.inject_load_failure=0
i915.fastboot=-1
i915.force_probe=*
i915.alpha_support=yes
i915.enable_hangcheck=yes
i915.prefault_disable=no
i915.load_detect_test=no
i915.force_reset_modeset_test=no
i915.error_capture=yes
i915.disable_display=no
i915.verbose_state_checks=yes
i915.nuclear_pageflip=no
i915.enable_dp_mst=yes
i915.enable_dpcd_backlight=no
i915.enable_gvt=no
GuC firmware: i915/skl_guc_32.0.3.bin
	status: fetch NONE, load NONE
	version: wanted 32.0, found 0.0
	header: offset 0, size 0
	uCode: offset 0, size 0
	RSA: offset 0, size 0
HuC firmware: i915/skl_huc_ver01_07_1398.bin
	status: fetch NONE, load NONE
	version: wanted 1.7, found 0.0
	header: offset 0, size 0
	uCode: offset 0, size 0
	RSA: offset 0, size 0

Last edited by plp (2019-11-17 17:13:56)

Offline

#2 2019-11-17 17:36:19

plp
Member
Registered: 2013-01-24
Posts: 46

Re: i915 Skylake GPU hangs with kernel 5.3.11

Offline

#3 2019-11-18 23:25:33

Rotech
Member
Registered: 2009-06-02
Posts: 11

Re: i915 Skylake GPU hangs with kernel 5.3.11

Look like this might be the issue.  Judging by the massive list of marked duplicates at the bottom this is a big issue for a lot of people

https://bugs.freedesktop.org/show_bug.cgi?id=111970

Offline

#4 2019-11-19 23:52:16

made_in_nz
Member
From: Wellington, New Zealand
Registered: 2010-01-04
Posts: 53

Re: i915 Skylake GPU hangs with kernel 5.3.11

Yes that's the issue :
https://bugs.freedesktop.org/show_bug.cgi?id=111970

For now updating kernel to either linux-lts or linux-drm-tip-git resolves the issue.

Offline

#5 2019-11-27 14:35:47

samurai
Member
From: Turkey
Registered: 2010-04-03
Posts: 28

Re: i915 Skylake GPU hangs with kernel 5.3.11

5.4 should fix it. Have you tried?

Offline

#6 2019-12-04 01:46:13

made_in_nz
Member
From: Wellington, New Zealand
Registered: 2010-01-04
Posts: 53

Re: i915 Skylake GPU hangs with kernel 5.3.11

Yes, it is fixed in 5.4.

Offline

#7 2019-12-04 10:41:33

wioo
Member
Registered: 2017-05-18
Posts: 27

Re: i915 Skylake GPU hangs with kernel 5.3.11

Weird. For me this problem starts with 5.4.1.

Last edited by wioo (2019-12-04 10:47:22)

Offline

#8 2019-12-04 11:10:17

Archanfel80HUN
Member
Registered: 2017-04-27
Posts: 6

Re: i915 Skylake GPU hangs with kernel 5.3.11

Problem still persist with 5.4.x.
I already noticed with 5.3.x, the system hangs for a few seconds but its recovered.
With 5.4.x its completely freezed only hard reset help and its happening very frequently. This is a very much alive and a very serious bug.
I have the linux-lts kernel too 4.19.87-1
With that no problem. So its a kernel related issue, and not fixed in 5.4.1!
Linux-drm-tip-git compilation takes forever, and that is a very bleeding edge kernel. Im not sure its a good idea to use it. Its safer use linux-lts.

Last edited by Archanfel80HUN (2019-12-04 11:14:18)

Offline

#9 2019-12-04 17:10:13

opthomasprime
Member
Registered: 2017-06-22
Posts: 4

Re: i915 Skylake GPU hangs with kernel 5.3.11

I noticed some strange freezes since some weeks aswell (on my XPS 13). 5.4.x made it worse, now i915 completely hangs. Logging in via SSH + reboot or powering down using the power button works tho...

Offline

#10 2019-12-04 17:19:16

loqs
Member
Registered: 2014-03-06
Posts: 18,078

Re: i915 Skylake GPU hangs with kernel 5.3.11

Archanfel80HUN wrote:

With 5.4.x its completely freezed only hard reset help and its happening very frequently. This is a very much alive and a very serious bug.
I have the linux-lts kernel too 4.19.87-1
With that no problem. So its a kernel related issue, and not fixed in 5.4.1!
Linux-drm-tip-git compilation takes forever, and that is a very bleeding edge kernel. Im not sure its a good idea to use it. Its safer use linux-lts.

What are you going to do when linux-lts is rebased to 5.4+?
Are you assuming it will have been resolved by then without the need to report the issue upstream?

Offline

#11 2019-12-04 20:39:39

kjozsa
Member
Registered: 2008-06-05
Posts: 153

Re: i915 Skylake GPU hangs with kernel 5.3.11

I confirm having the i915 soft reset with kernel 5.3 and a hard freeze with 5.4.1. Sorry to see such a critical issue in the mainstream kernel, waiting for the fix / workaround now...

Offline

#12 2019-12-04 21:47:32

blazeeboy
Member
Registered: 2017-05-13
Posts: 3
Website

Re: i915 Skylake GPU hangs with kernel 5.3.11

I have the same issue with my machine, landed on this post when I searched with the dmesg log line, originally opened this post for it,
https://bbs.archlinux.org/viewtopic.php … 2#p1876042

Is there any workaround we can do until it's fixed?

Offline

#13 2019-12-04 23:36:28

loqs
Member
Registered: 2014-03-06
Posts: 18,078

Re: i915 Skylake GPU hangs with kernel 5.3.11

@blazeboy the upstream bug report you linked to in your other thread reported in the issue in 4.18.0-10
which kernel versions is the issue present for on your system?

Offline

#14 2019-12-05 09:44:46

chrpinedo
Member
Registered: 2017-10-12
Posts: 22

Re: i915 Skylake GPU hangs with kernel 5.3.11

Yes I have the same problem. The system totally freezes with 5.4 and I need to reboot it.

Offline

#15 2019-12-05 16:42:48

alu1
Member
Registered: 2014-05-16
Posts: 19

Re: i915 Skylake GPU hangs with kernel 5.3.11

I have same problem - was multiple several few second xorg freezes on 5.4.0 now total system hang (and loss of work) on 5.4.1.  Now using linux-drm-tip-git from AUR (thats several hours I'm not getting back) and no hang for a day (but nvidia driver incompat so no multiscreen for me for a while).

Anyway the bugtracker (incidentally moved to https://gitlab.freedesktop.org/drm/intel/issues at same time with many similar bug reports) doesn't say which specific patches address the issue, just that it should be fixed in drm-tip.  Anyone know how long it usually takes for drm-tip to make it into mainline kernel, or which patches to apply?

Offline

#16 2019-12-05 18:13:30

loqs
Member
Registered: 2014-03-06
Posts: 18,078

Re: i915 Skylake GPU hangs with kernel 5.3.11

Offline

#17 2019-12-05 18:18:24

Ancalagon123
Member
Registered: 2019-03-14
Posts: 7

Re: i915 Skylake GPU hangs with kernel 5.3.11

I can confirm this problem, too. I only noticed it after updating to 5.4.1 .

On 5.3.13 (I downgraded to that after the freeze occured twice) and on lts I have never experienced this (or at least I think so, might be that there was a hang very seldomly).

Offline

#18 2019-12-05 18:38:52

loqs
Member
Registered: 2014-03-06
Posts: 18,078

Re: i915 Skylake GPU hangs with kernel 5.3.11

Ancalagon123 wrote:

On 5.3.13 (I downgraded to that after the freeze occured twice) and on lts I have never experienced this (or at least I think so, might be that there was a hang very seldomly).

So you confirmed you do not have the same issue as the topic starter.
Edit:
There seem to be three issues on this thread which are being mixed together.
Two of which were present in 5.3.  The first issue was fixed with 5.4. (111970)
The second issue is still present in 5.4.
The third issue was introduced in 5.4.

Last edited by loqs (2019-12-05 18:56:12)

Offline

#19 2019-12-06 05:47:53

Ancalagon123
Member
Registered: 2019-03-14
Posts: 7

Re: i915 Skylake GPU hangs with kernel 5.3.11

chrpinedo wrote:

Yes I have the same problem. The system totally freezes with 5.4 and I need to reboot it.

@loqs : You are right, of course.

I can confirm that I have problems like chrpinedo .

My log says:

kernel: Asynchronous wait on fence i915:Xorg[775]:fd34 timed out (hint:intel_atomic_commit_ready+>

Offline

#20 2019-12-06 11:36:30

alu1
Member
Registered: 2014-05-16
Posts: 19

Re: i915 Skylake GPU hangs with kernel 5.3.11

I don't think it's just us, but the kernel devs are also conflating the different issues, where in my opinoin it went from severe <=5.4.0 (recoverable freezes) to critical in 5.4.1 (irrecoverable hangs).  Empirically however it is fixed in drm-tip.  The root cause of all three seems to be messing with c-states on idle which goes back to 5.3.12.

What fix to apply thereafter is beyond me and I must continue with drm-tip, not sure what to look out for to know if/when mainline is updated though.

The bug reports I'm referring to are:
https://gitlab.freedesktop.org/drm/intel/issues/713
https://gitlab.freedesktop.org/drm/intel/issues/712
https://gitlab.freedesktop.org/drm/intel/issues/674
https://gitlab.freedesktop.org/drm/intel/issues/673
https://gitlab.freedesktop.org/drm/intel/issues/670
https://gitlab.freedesktop.org/drm/intel/issues/3 (from depracated bugtracker)

There's no useful info there to work out what is fixed in drm-tip, and there's nothing new to add in terms of available information, other than noise.

Offline

#21 2019-12-06 12:07:13

loqs
Member
Registered: 2014-03-06
Posts: 18,078

Re: i915 Skylake GPU hangs with kernel 5.3.11

What about testing linux-git?  The drm pull for 5.5 is already present so you could check if the fix for your issue is in that pull.
Alternatively you could reverse bisect drp-tip to try and locate the commit containing the fix.
For the reverse bisect the last sync of drm-tip with mainline af42d3466bdc8f39806b26f593604fdc54140bcb (5.4-rc8) should be bad.
Edit:
https://gitlab.freedesktop.org/drm/intel/issues/674 the 5.4 only bug has been bisected (673 is marked as a duplicate of 674)

Last edited by loqs (2019-12-07 00:13:46)

Offline

#22 2019-12-07 21:18:23

janek
Member
Registered: 2012-02-18
Posts: 3

Re: i915 Skylake GPU hangs with kernel 5.3.11

https://bbs.archlinux.org/viewtopic.php?id=246841&p=2

seth wrote:
systool -vm i915

Randomly try "i915.enable_dc=0 i915.enable_fbc=0 i915.enable_psr=0" as kernel parameters

On kernel 5.4.2-arch1-1 I need them all to eliminate micro freezes.

Offline

#23 2019-12-08 20:08:51

Archanfel80HUN
Member
Registered: 2017-04-27
Posts: 6

Re: i915 Skylake GPU hangs with kernel 5.3.11

janek wrote:

https://bbs.archlinux.org/viewtopic.php?id=246841&p=2

seth wrote:
systool -vm i915

Randomly try "i915.enable_dc=0 i915.enable_fbc=0 i915.enable_psr=0" as kernel parameters

On kernel 5.4.2-arch1-1 I need them all to eliminate micro freezes.


Non of them help. I disabled all and still freezes.

Offline

#24 2019-12-08 20:13:14

Archanfel80HUN
Member
Registered: 2017-04-27
Posts: 6

Re: i915 Skylake GPU hangs with kernel 5.3.11

loqs wrote:
Archanfel80HUN wrote:

With 5.4.x its completely freezed only hard reset help and its happening very frequently. This is a very much alive and a very serious bug.
I have the linux-lts kernel too 4.19.87-1
With that no problem. So its a kernel related issue, and not fixed in 5.4.1!
Linux-drm-tip-git compilation takes forever, and that is a very bleeding edge kernel. Im not sure its a good idea to use it. Its safer use linux-lts.

What are you going to do when linux-lts is rebased to 5.4+?
Are you assuming it will have been resolved by then without the need to report the issue upstream?

A kernel with a serious bug never will be LTS. And the first few release with the 5.x kernel was perfect. The rest is not my problem, im not a kernel developer anymore. I do this back then, only for android kernels, never again... Worst thing ever. Bug, bug, bug, bug.

Offline

#25 2019-12-10 16:05:24

loqs
Member
Registered: 2014-03-06
Posts: 18,078

Re: i915 Skylake GPU hangs with kernel 5.3.11

673 / 674 has been marked fixed https://patchwork.freedesktop.org/patch/344105/
670,  713 and 712 were also marked as duplicates of 673.
Edit:
attempt to rebase patch onto 5.4.2

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 06a506c29463..b70a59cdcdf2 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -471,12 +471,6 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
 	return desc;
 }
 
-static void unwind_wa_tail(struct i915_request *rq)
-{
-	rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
-	assert_ring_tail_valid(rq->ring, rq->tail);
-}
-
 static struct i915_request *
 __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
@@ -495,7 +489,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 			continue; /* XXX */
 
 		__i915_request_unsubmit(rq);
-		unwind_wa_tail(rq);
 
 		/*
 		 * Push the request back into the queue for later resubmission.
@@ -649,13 +642,29 @@ execlists_schedule_out(struct i915_request *rq)
 	i915_request_put(rq);
 }
 
-static u64 execlists_update_context(const struct i915_request *rq)
+static u64 execlists_update_context(struct i915_request *rq)
 {
 	struct intel_context *ce = rq->hw_context;
-	u64 desc;
+	u64 desc = ce->lrc_desc;
+	u32 tail;
 
-	ce->lrc_reg_state[CTX_RING_TAIL + 1] =
-		intel_ring_set_tail(rq->ring, rq->tail);
+	/*
+	 * WaIdleLiteRestore:bdw,skl
+	 *
+	 * We should never submit the context with the same RING_TAIL twice
+	 * just in case we submit an empty ring, which confuses the HW.
+	 *
+	 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
+	 * the normal request to be able to always advance the RING_TAIL on
+	 * subsequent resubmissions (for lite restore). Should that fail us,
+	 * and we try and submit the same tail again, force the context
+	 * reload.
+	 */
+	tail = intel_ring_set_tail(rq->ring, rq->tail);
+	if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+		desc |= CTX_DESC_FORCE_RESTORE;
+	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
+	rq->tail = rq->wa_tail;
 
 	/*
 	 * Make sure the context image is complete before we submit it to HW.
@@ -674,9 +683,7 @@ static u64 execlists_update_context(const struct i915_request *rq)
 	 */
 	mb();
 
-	desc = ce->lrc_desc;
 	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
-
 	return desc;
 }
 
@@ -1149,16 +1156,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 			if (!list_is_last(&last->sched.link,
 					  &engine->active.requests))
 				return;
-
-			/*
-			 * WaIdleLiteRestore:bdw,skl
-			 * Apply the wa NOOPs to prevent
-			 * ring:HEAD == rq:TAIL as we resubmit the
-			 * request. See gen8_emit_fini_breadcrumb() for
-			 * where we prepare the padding after the
-			 * end of the request.
-			 */
-			last->tail = last->wa_tail;
 		}
 	}
 

Last edited by loqs (2019-12-11 01:12:58)

Offline

Board footer

Powered by FluxBB