From 068fce4293eb139f54d4825e1dbdcaf35e34da03 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 18 Aug 2023 17:50:59 -0700 Subject: [PATCH] Docx reader: omit "Table NN" from caption. Closes #9002. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/Docx/Parse.hs | 42 ++++++++++++--------- test/command/9002.docx | Bin 0 -> 12631 bytes test/command/9002.md | 20 ++++++++++ test/docx/table_captions_with_field.native | 10 ++--- 5 files changed, 50 insertions(+), 23 deletions(-) create mode 100644 test/command/9002.docx create mode 100644 test/command/9002.md diff --git a/pandoc.cabal b/pandoc.cabal index 9a4198b43776..11b8bb1d6f69 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -207,6 +207,7 @@ extra-source-files: test/command/*.md test/command/*.csl test/command/*.svg + test/command/9002.docx test/command/biblio.bib test/command/averroes.bib test/command/A.txt diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index aebc4a5d4e1a..71be50c185b4 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -86,13 +86,15 @@ import Text.Pandoc.XML.Light strContent, showElement, findAttr, + filterChild, filterChildrenName, filterElementName, + lookupAttrBy, parseXMLElement, elChildren, QName(QName, qName), Content(Elem), - Element(elContent, elName), + Element(..), findElements ) data ReaderEnv = ReaderEnv { envNotes :: Notes @@ -725,7 +727,25 @@ elemToBodyPart ns element parstyle <- elemToParagraphStyle ns element <$> asks envParStyles <*> asks envNumbering - parparts' <- mconcat <$> mapD (elemToParPart ns) (elChildren element) + + let hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) + + let isTableNumberElt el@(Element name attribs _ _) = + (qName name == "fldSimple" && + case lookupAttrBy ((== "instr") . qName) attribs of + Nothing -> False + Just instr -> "Table" `elem` T.words instr) || + (qName name == "instrText" && "Table" `elem` T.words (strContent el)) + + let isTable = hasCaptionStyle && + isJust (filterChild isTableNumberElt element) + + let stripOffLabel = dropWhile (not . isTableNumberElt) + + let children = (if isTable + then stripOffLabel + else id) $ elChildren element + parparts' <- mconcat <$> mapD (elemToParPart ns) children fldCharState <- gets stateFldCharState modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState} -- Word uses list enumeration for numbered headings, so we only @@ -734,21 +754,9 @@ elemToBodyPart ns element case pHeading parstyle of Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do mkListItem parstyle numId lvl parparts - _ -> let - hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) - - hasSimpleTableField = fromMaybe False $ do - fldSimple <- findChildByName ns "w" "fldSimple" element - instr <- findAttrByName ns "w" "instr" fldSimple - pure ("Table" `elem` T.words instr) - - hasComplexTableField = fromMaybe False $ do - instrText <- findElementByName ns "w" "instrText" element - pure ("Table" `elem` T.words (strContent instrText)) - - in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField) - then return $ TblCaption parstyle parparts - else return $ Paragraph parstyle parparts + _ -> if isTable + then return $ TblCaption parstyle parparts + else return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do diff --git a/test/command/9002.docx b/test/command/9002.docx new file mode 100644 index 0000000000000000000000000000000000000000..4722c53e5daf7161d63929eadf2b3b33d84c08e4 GIT binary patch literal 12631 zcmeHtgIWs+Pb@!aAuI{%>MjYZ5Isgjr8UO$g0ygtzEY!dNfOn7p02<&mxVnI~m4l&` zgSLXJjiJ3Jor|T#yPQ|x6j=ap(EI;8{)@jrUEHWu4+FB$UHo^X=tf1o!<-T-@GzbP zTDc==%+KITPcegS&mCysvWj5Quogt5%y%2iN&|k=D=8K*DD}>yCvUkEd}6gs*{PQo zcWJ#0u*kOK7&iEZ*}N@S+S;)s*uWTw>w9Cy+B?^{^5p!ZcX6pH(rAm+;rALk~H2Kbp&ob5E3s zvRn1D#{))SWAY}+C`Uk_IuJOp$Wjjt5EVG<7Kc^3D`>bjSLM-2t>SK7E9R5deP5v! zqz8+hSWC$%SIJ(_c%gtU_yX4UL1%W9X(cT*Z^HH(%rtk0cVMH^qI|-13&40Z>!q10x?nsBePfyR5pQUJ z_0jSk0swe&Mmgdariu|di47HfADlGa zg|puT zHY&1bqein&;W)O5>5CcV4Gp#h13hV4v|br3Hn45_*e$oSusv z`5<&3i3kJ2a*PxHV2=X3Wz-usPYdZt_-<-t2THnzU z6uo{4TYoqV7$|mu^!#@pWig{5g8_}*0ndI(9<*E@GW@$1p9ohMo&ZAlyhSuH0SwnWnhvAi<7pYu|@hi{xkj<6NPw=QPqjl7^ zv@9~F%%sHUZhQoDIOL`ngX-f9IB{7Fd9uXmzeM`&vkWLfS!y|k*~K3?Gf82|WH9ts zC?PxFum+pK-xD-gr)+Hbv0FoM^2ihN+Ps919dUe8!BaPYjAalPj5-if5)Yjs2-9aO zl%Xl<%Iz=i(#$1s991K=Pb4!@yM}p1 zRd5#d-)cts6DlXm`_)@h86DJU&Dsw#TGTPzpY5ddz6G1&M>~|MBNbF^&yn-Roeh4^ zOx+`wUbG9G0rZpf>p%J0KaK7lWb$MRgIyrONF_rdoAPiPKC2*MjS2@wKN3pGJVy!g zkiK88hR!5`TK+~vfJ4p?(aT0Ff6esCJ?$XNI&g+~#1Zl!vo#wMJ^f=61ZN-t(Fm}) zr^*WI2uGmT9AeoS&<$f*iZI`bWu{}JMEmZ5?w!s&|M6zf^}sgN9pSPi$sJFg7S3=h zQZa^<7+05ApuEZ&O3FJZffjaDL2blka=-*~Sc5EXHJnb0A2^iI(I{T{#}YwNx9;)R z712?E!S<|=g!)e(XQN|Y6h`cii76{nY?C7y+0wISkQotg_G)}n;l?D|)SJsW^<4|?cUB{ots4#U#m?Y!14HJF} zp+isMgd>9yw8_7xo1F_ccckHg^j-(Yyl>hul+|&8?FE*e?sLcv)=}ByDRW3EU@aW0 zKxoGngS`q$eRrzW?Z>Xs78nmr^V-^v+TFlLwNEDalF5{VsN^E1kYE@ptv`saTGy&QCSdHTdLo?oWJvOXoBCG|_8Et#&yDunuF>v`V>Ht41 zNZOmJ+pkg;?z;pbGM1KO;$~Rt&zBV)mo1Y*MdT9u;^_B2c;!bC_1|{t!oWriM=z85 z^=3m5?_FB^x1skGFk`Gbp^PsxgHd*q3!5p>PKdgT-V-;d(3ABg#JPX=H@0~HT|A-> zC7jGvD&F~S3OsPv%(a3NrsF*=A*`72i4tzhnfvhkLfV#E2Db)c`ia!iR7yEy6+JJ8 zvNo1n6@?{Mnk=7~r3IAtiKL}5hlj;+Am!)j+QcsaisDA_kL5NF!-l^8!r6(TlM=S~ zo586QGc|2Tbz<%eRSABBLj9LYUsvx!u7oFw-&v#DwJkfGNxAmT(=M}ro_)wN9VRG< zQB!TdbSZ$J9xB2bxsMga61QCDUjPmsq;!{JI9-lPKyiKQYI^ywlfHLtv3NU{5pLk> zsZ}TJ8tA?hC!414*7seIqqRk@N17nhNX<}_ zsyW|42xeKV!z@CwY%!4j!mCee)nB%v+skemQ~7z6x*&k@jj2ouziR0>;m^8s={};Z zEjVx6UA=V2E1GQ;rKi~4PgIa$H$Hc?JN2+ysdn0Q!*?VAbI3pAM02XQMTWW*vB42o zZhbgcxp1wX!?w-#Knhrpt4h7awU=jFBeD^8h@_GP|0=zgK&%)?vADO(*>G)~Gy??y48H>aF#Z_+983%?4e5WCjK74;Q&sUW91-LW=u3jY=25rtL)|jg z8PQAgMYfH4zsPnu^MdjW)9LkOG{V3Pl5pzN+I0Jh-fv3heb=dGiR@%Yd-ukSA z2d&)Jg-S35s~xCuKK&_`lUV=@cDIx~UBQGAEihpTm%>ub;V$*bZNBT^da)6xsqF#Z z29+;q4abWMm5gAYRd-~){iM~p$bMrkvtnVfof>boX^!oJ5K19dpne#-Sp;3WUO|W^ zOs~FexrxJOF76UYb&x|HFKr-O)_;hM zC~CIcmg;u!5RZDG+E_Y`kvAgmHBjMzYUns!+yf?&wkH`C#r2m;N#xlcwmg$NR)h`) z>Rd~}OC27+CUUlTK7WK1cFHE)0FLe%S$m7nYN1m)!a&Wtpur!;bwMlkzXBG-hNYNFS) zfzi4)gx#}*1!Gv@)yQngIPSVr?NFuzXSduf!IC7224(_hL<5&YJgHD9p~gxRBq5he z)OxUlp=qfSYGMpm{zW|ZV59+O9RgL`XN{TPR-*z>!E%iD*PKWgm?w99rH~sa&o7rm zt2L#{rl(VbgiX%%B&Hd$^-tG2^m=QPb+5v&=U>;17+TbhD;f-x9FwbjEYAzY3DXZ@ z;CCVjD)XM)&?CN}kwGQa%6s@e2cHb4;aJV8b!1|eC)YEdT_4*-sHg80pk^&R^qPA9 z?X8QZO|RFv-N48F4d|h*y9A%U=ZS{*ALmrw!6iYwQVgVB_YK~DmJq}vr)6s#ksK39 zZO(3!z3wA%HHWN|eY_{Ay!C1JaUQPe=s_t@h;P&%Ldg(iX4m+S`41Bf54cCyZ=`Jfpu?sNlJ`E`R3`%{Xqk zNV95AT~WAW>L*CICV|?_5WaP}gv(>7-N>AXeH};HV9KIS(7Q+#VzCRAlEi(0Iiax* z%}UI2(u8(D{A%0<>bw7lLU5qiAl`6(HZMu-SW}b-Ne+5J;mp&Zpx&7bszBMn`p4LC z_4);c&p!l*>!~v{_O6vThFzGtiB%b59Chgw_0qWsRUDUaMbAZc^bevY9%x?$UZyG< zb}^#ZS&lPqH*ZapHAPn>h)la|#^A{c8l$LfP&^o9^f-Q&ByU6-cd%DB@@h`*m3e>E z*Lb!XFegZ7`bOj3@j~4}WTf-(fr_E)rcQjp*|^4{)?Q1F`q$mJ{1IvmNqmxw4Kky` zl4gY}$h!&ZK}OSfwkwU@Msp=_dWy=&8w+8UPA*79ok;V?X`ix-dn(^V_|8)gv!ESL zD>^mPr@tr1yW{IWcjFsJ`V^nQOX2ERDQEVu?pA#AcyruUsr0>lMSvkR{A)>O1-C!J zqeHd3b>(2=#`<8X+~ZnszhS}2R?VA*^Rv?Wug-UfV51*Q<^sRDwJuWBpRDT+pWLbC zJ53$NfB%xTOmGX)IgiidkWP$n+j7%o4zHFk%=-G1R&LH=<4S&ka;amxQ6np`1u3y1 zFuk%vd4Imm>G@yR0Fe1&I=i4%qZHg<)&TZ~4i2VP#`eDiz&fQ>t5ptU4{e>7?x`!z zmL)sfY^iPWb}^Jjp*_7!%8;61AW@V8ZsU^ICLyWXSpmiA8`tQf_sqtDy~)`dZ5~Dr zK>vIq-^n&06%Z=vAm^Ps`&Eafqi|Qi;eKbAQF{nzR7fZ>xl;-p=Awc^OcKJ3KqdcF|Wf zyBgGuwYmo_xMcd>{pF6_%>Nh2x#D%(&Q(Glpq@Djw5CSd&)8eL; zvK|GJEt)kZ&5LZH%MEEqKJmqwL(%fxuzS`b(Z zU|`SS<(IWUV%5^m!x5dOeqT#oU68kdOc~I2068~M`kvS@gp}i*j5!Vk0a5?P{sUqz zI9-SGCv|p=lsq6#)Q|htVtryaBhJP5gs zILRRK&OJ0wa=&Z43KCr)u7WTGu`0Vd?_G)3_DsbQ)Uf8{s_78HC*6oPUc2OG|GiB) z7RRtN(vsB}2dZtCHC5CKg4gzT^9IGWUm2LCvjX1|Waf>_XJ&ZhYzJGK^S)E3QhIx} zomfXrN+mB3<>GhTQeI&3cH+Y)r+7q3Fq3<#ErJnGgVv&6qA(#^BVHLRC#aS6oJ1ih zryO>>8d-WAuH~l0eyKNiH6gK*;bp!Lgt|$*{JO3A(TTot)r0+-k)6oRjG1U07uQ`i zo^IOpn53|r<$-*a0S-&rZ{P;S2roa|zx|@_+Li>hBU@*&y`+V_!bY5T%S`uHbUUXv z&2%lrQ@y=4#Lwiy3RB_$7(BQQG{UC~x-zVTePjFj3FXJ?jn5-W#}m@O(E^Rhshf77 z8S@Q@Y()W)Ngzf*#?IQto?hSD&hYo$kN+`9panyCjI1<>{rBFK{DCCC+2}NesWKos z2h<^9M9IA4XfTD-f7!6|+XhGKSik9bbZTWw#Ty}7@~h?_rVJ(a1&@-jC0vxh z?nn#LM^_OcEewcZ5Motf8MU@%Dv&RLZ^O>-<^W>@l9|dE6q@X$NijB9K#&TT>oY+=vIjj7ctb}lf1Ow*;vdUZq-k&BtB3SCgA zjI%OAP=j8vV1vPp@%g)s5R#H2hN4*5_XqtS%`UFyUaaUf%NP)G#%cLdP2iXCnF0jM z>vu>Di+Fb-k;!1kal@JO!ccG{N^|H6{G;h+;|q;4$|E)>2!3PwxHQcCTwaOkUN&1t zzG{uY#8%WUg5yzXO(MxepLUZCk&HUQOrt?V=N&MlA2Bml`Gq4Qm-lsPx$c6(Vxj}O zA5>9e$ebw&wa5Mao@=W$6hL0VUOS zu-IGWcce+II#ldeJ9%0L~2}D!f^`t0EOgbsHY?9qSE^wTWV#14oK_ zes{DNN<}DbjKK*u1+BKNV!wbi7b@dO051F3h3}Nt73m*!I_nhnne=8J($MzNRi*}5 z;j)e};;tYnqq5sKSa(o0gf)5ZOx6yeEcF4dAM9xh&y6Yc_$}`6&TN4uRp;f)R8|C0+PItpS}?=-{LIK+@)zu+p>H^Oa)z zd%e4um%AN0!mGWB1o>$1MHS4>;r_=}GX^vmp*=XmX#u+1k9y5b?aAz_ z5GTsP(b94~p@m~d8VZaH$#HqqHlcwkeh1^Ie)7?Uy2RTUx!-romuF zId=6Bu^iEZB4@02Pw5$spu_3Civu-z^_>_YaBTZtE^E=T>U^Dl>q+qQa1JBj#UwrasPYI(9NA1318;>}})SK`0G24+D2m8~MiSdieRdaWGf&m4h zQnQwHBI!efY6!d!JyT6QpTDolqeAn#Ccq_Ud?B+&5(bkNzzVs3wzz&eyGnJ=eMhf? zU9D2^!{U1EYTBCM`5{u9K>F(I`+e6efT?GSzj3ajY?<`n&IgSP|2^xyltQyTIU^YA zxye({w5n6xT2HhQgK^kuE=KfvNt50o4S?ACv@>_o=HV z%vbaYR&vL39LWSS4$KN;n|4Vh@(_%eAhQJhlgIXITuPk^j-&Y~uZDdds6!gP^>Me5 z0Y-#gJ&YTQ;ng*D?}>VBjSxfx-wakVzj+rKY7&rxyRgH#&`_pBolt2(H0XHT)Xh!q z79In<{>~GRrQR2s*M?6q2nkj8qWD<(!t8ZPUaN85bLcn2PtqIdKFnqteUiCssq|Fl znf$^tS~ui3D4!6-zgCP`qJ)}_-Nq?x&nRckF&cKIQMC|H{Umad!a!fv|CR_fx7zB$ z$GBMSDYoA@x7SYgS-Dn6jlOvX$lHDDcw9Myib_Y8%{l z<+^4orF}tk=$z|0Uu;Z{O8&%PoO^d(8epFhOpPM+p`J}bndmF8(ub#vE0fkAcavmd z3UK_*V)L6K7IYS8y+3gC8`R6;*%roo?a zNPAtXRJeBKh+e$*!BXCf)faTv zEX*-rPuC^ZJhCE7PIQThAcWF%@IZOEbct=LiQvw!H#rCdja;&QWc{nP5n?a)&WY7? ziAYd)`gCXV_RkIs#hk|2Yc=8P#un|c*AE9yW$lwoPILy>xNEYwv)HKfZ5#fkht_Eh3o`o1w)dXd6OnoX&PIOetJT+jgq{scYYvr8>IoG z%Q$+^fXQSKRS>Amoj!k-H{`ymg?PS{z^b7{UyEvbM@7O%uaCtumA74bz&fd*ddrY> ztvXQczKeoqt7T9V&Z>SHG(g%;@im{a0r}M{FnAfxePy?g;hy%Mc!U@>6oR1f zI$v?M`r&lca&eTTw}-1{z#Wh(SmV#1EqpkB!qfVp_yKFwqB-OUstPLRiw*(_4nLN4 z3`^mfelPKwZg1{}c25-J2VZ%+z`Jt^L4yef1}bSx%y=C5@DXSz;bZS@=R=jM?ZzXJ zxI2zx$f5Lx%nfCp>E)eP#D^2cFoyceDMA3zp;02nuueF@FV~+IkjFKZ2PwV4q`%CJ zaw$J{j@1n_6XTRRobUNsuwYMe?UkrU1SeUVMIr|k`O6kuO@-+j!_L#6NdfMpJHf^g9(JoGZmu0hx{fLM8fPbtD_3Dn+8EvL^J(82 z8EgI86CArc^cy<68yGVck!tHw)`6lHF)g`3-Oy!OFMYIWu7Mi}CcJ^w~g~b=R-Oe)@LSJmnYc+l5R2l-d z#FWv$iIQ?433${4+zK;|)^4Brb{YnYh9tX(h9!^6so8VHb-aL?`~=I4VzYINYusR$ z9m#@sBWa!12Cf^P2fp$zK`j;YeU#&_QzP$>y|&HBN1Fz#om*^)-45{BbDY8Lh}{f{ z3g1(aHdD6S$Kp(Li8Z4fT0lm=ZnA*V}x zoG3#GNvjb25e|cBil6nW9}I-x$mahHLWD;CwJhLuoeUxPFR#L2Bz=a#kelNF{n6Z| ztbi8?k|vYg4ubTH6h-;|+O$6^`lUUB|7c=KvkJ$@qA7fG1(S1P1s=Pnj_8SD$ zRElhHd-^|H@6d1T7&7CS`t-_cVb*of^F>@)vGBQyV6Q<|O{FCaqTg&qp1FhSW?T-UNLLf}xXJ)pkgIY-!`L>m^Pb5la)gbt zQ3L!oz;NAyy=ZF1uYKMO%J8`r%e+<*0Uloh^efU5bVTEw5!E!3v4;$!kJaj6;%8~1A*N6H&m3jBP)qZ zGq%nT5M-kwkb+m)j|%JCFA!|hs*1o@^vV$7ouz~$tMkb7duH};30XfhjiN=cns{0!<2dkYMO7bm03V2i8?QY)xc z1B&976}N9oQb9FWsM)n(E7LmuM>K1GDNR_>_*Hz%Oj`$4zf+Pg^@kvBEVZ4NOZ|(0 z#*a$J^!k67+s?<#&NrDA|3?geGt;P_+y9%`bpW;G_&=gWmdtkxL*~lgQ0U`bM-ynW z1@rxijn5?nl$r1NAtY_O|@$TeGoioT>7`_?%0GDvT)$7d+O? z;+cUKa=x4NO0()C8^`7bi)qZBf}VIY>aq>ZJ(ChQXv-_)cvpyTqk(wzC9HVMP&)07 zta+;!RE?`xf^z!@TA(em$*GguxOeGh@3y@8Oacq zE)y)iN&FCDt3S8^PVqx@0;}JnqrFi0D^rzH?E2sK$!lljcvKKUOSC);I zySpE@cXxC#=A8sJhG@PfPrDplT&la7y(^`A$w%bak7R{&y-Mbc@1GR5kRM8S0oGXR z^^dkhkkIQ#moR=+?j3u%W~TRR!FuLl?1*(rRcR^XvASf6IgZSz3y8+!EA|wy7`;pw zDzU;g6=Pae*uZc(VLH7oI>ed6mtK}Lbb3-#V1A3muCB0IgTBY0y}$3p?9J-J$(=DV zA-O+w&dqQIo6k~Dzl9bTYc^o$G(SCuR#R}D3egl<7SA?Kg$Xj$jj?`7XmY%q%9T7Vqe+C{L{kee z+a~XJS7>t+f5A|cLVW&e?{S?sm_4w86CK41A6ptRIeA278a-t6PDtfg3HA7=3vmw@TY{^F} +

This is my table!

+ + + + + + +a +b + + + + + +

See Table 1 This is my table!

+``` diff --git a/test/docx/table_captions_with_field.native b/test/docx/table_captions_with_field.native index deb8afc6bb1c..4f81ce477e5a 100644 --- a/test/docx/table_captions_with_field.native +++ b/test/docx/table_captions_with_field.native @@ -1,7 +1,6 @@ [Para [Str "See",Space,Str "Table",Space,Str "1."] -,Para [] -,Table ("",[],[]) (Caption Nothing - [Para [Str "Table",Space,Str "1"]]) +,Para [Str "Table",Space,Str "1"] +,Table ("",[],[]) (Caption Nothing []) [(AlignDefault,ColWidth 0.7605739372523825) ,(AlignDefault,ColWidth 0.11971303137380876) ,(AlignDefault,ColWidth 0.11971303137380876)] @@ -32,8 +31,7 @@ (TableFoot ("",[],[]) []) ,Header 2 ("section", [], []) [] -,Table ("",[],[]) (Caption Nothing - [Para [Str "Table",Space,Str "2"]]) +,Table ("",[],[]) (Caption Nothing []) [(AlignDefault,ColWidth 0.3332963620230701) ,(AlignDefault,ColWidth 0.3332963620230701) ,(AlignDefault,ColWidth 0.3334072759538598)] @@ -50,5 +48,5 @@ [])] (TableFoot ("",[],[]) []) -,Para [] +,Para [Str "Table",Space,Str "2"] ,Para [Str "See",Space,Str "Table",Space,Str "2."]]