Skip to content

Commit

Permalink
Docx reader: omit "Table NN" from caption.
Browse files Browse the repository at this point in the history
Closes #9002.
  • Loading branch information
jgm committed Aug 19, 2023
1 parent bd4de14 commit 068fce4
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 23 deletions.
1 change: 1 addition & 0 deletions pandoc.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ extra-source-files:
test/command/*.md
test/command/*.csl
test/command/*.svg
test/command/9002.docx
test/command/biblio.bib
test/command/averroes.bib
test/command/A.txt
Expand Down
42 changes: 25 additions & 17 deletions src/Text/Pandoc/Readers/Docx/Parse.hs
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,15 @@ import Text.Pandoc.XML.Light
strContent,
showElement,
findAttr,
filterChild,
filterChildrenName,
filterElementName,
lookupAttrBy,
parseXMLElement,
elChildren,
QName(QName, qName),
Content(Elem),
Element(elContent, elName),
Element(..),
findElements )

data ReaderEnv = ReaderEnv { envNotes :: Notes
Expand Down Expand Up @@ -725,7 +727,25 @@ elemToBodyPart ns element
parstyle <- elemToParagraphStyle ns element
<$> asks envParStyles
<*> asks envNumbering
parparts' <- mconcat <$> mapD (elemToParPart ns) (elChildren element)

let hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)

let isTableNumberElt el@(Element name attribs _ _) =
(qName name == "fldSimple" &&
case lookupAttrBy ((== "instr") . qName) attribs of
Nothing -> False
Just instr -> "Table" `elem` T.words instr) ||
(qName name == "instrText" && "Table" `elem` T.words (strContent el))

let isTable = hasCaptionStyle &&
isJust (filterChild isTableNumberElt element)

let stripOffLabel = dropWhile (not . isTableNumberElt)

let children = (if isTable
then stripOffLabel
else id) $ elChildren element
parparts' <- mconcat <$> mapD (elemToParPart ns) children
fldCharState <- gets stateFldCharState
modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState}
-- Word uses list enumeration for numbered headings, so we only
Expand All @@ -734,21 +754,9 @@ elemToBodyPart ns element
case pHeading parstyle of
Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
mkListItem parstyle numId lvl parparts
_ -> let
hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)

hasSimpleTableField = fromMaybe False $ do
fldSimple <- findChildByName ns "w" "fldSimple" element
instr <- findAttrByName ns "w" "instr" fldSimple
pure ("Table" `elem` T.words instr)

hasComplexTableField = fromMaybe False $ do
instrText <- findElementByName ns "w" "instrText" element
pure ("Table" `elem` T.words (strContent instrText))

in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
then return $ TblCaption parstyle parparts
else return $ Paragraph parstyle parparts
_ -> if isTable
then return $ TblCaption parstyle parparts
else return $ Paragraph parstyle parparts

elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
Expand Down
Binary file added test/command/9002.docx
Binary file not shown.
20 changes: 20 additions & 0 deletions test/command/9002.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
```
% pandoc command/9002.docx -t html
^D
<table>
<caption><p>This is my table!</p></caption>
<colgroup>
<col style="width: 50%" />
<col style="width: 50%" />
</colgroup>
<thead>
<tr class="header">
<th>a</th>
<th>b</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
<p>See Table 1 This is my table!</p>
```
10 changes: 4 additions & 6 deletions test/docx/table_captions_with_field.native
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
[Para [Str "See",Space,Str "Table",Space,Str "1."]
,Para []
,Table ("",[],[]) (Caption Nothing
[Para [Str "Table",Space,Str "1"]])
,Para [Str "Table",Space,Str "1"]
,Table ("",[],[]) (Caption Nothing [])
[(AlignDefault,ColWidth 0.7605739372523825)
,(AlignDefault,ColWidth 0.11971303137380876)
,(AlignDefault,ColWidth 0.11971303137380876)]
Expand Down Expand Up @@ -32,8 +31,7 @@
(TableFoot ("",[],[])
[])
,Header 2 ("section", [], []) []
,Table ("",[],[]) (Caption Nothing
[Para [Str "Table",Space,Str "2"]])
,Table ("",[],[]) (Caption Nothing [])
[(AlignDefault,ColWidth 0.3332963620230701)
,(AlignDefault,ColWidth 0.3332963620230701)
,(AlignDefault,ColWidth 0.3334072759538598)]
Expand All @@ -50,5 +48,5 @@
[])]
(TableFoot ("",[],[])
[])
,Para []
,Para [Str "Table",Space,Str "2"]
,Para [Str "See",Space,Str "Table",Space,Str "2."]]

0 comments on commit 068fce4

Please sign in to comment.