使用 POI 读取 Word docx 中的书签、替换书签内容(汉字或合并外部文档内容)
生活随笔
收集整理的這篇文章主要介紹了
使用 POI 读取 Word docx 中的书签、替换书签内容(汉字或合并外部文档内容)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
通過操作書簽可以實現 word 模板替換變量的功能場景,本文一下代碼內容,直接可以復制使用正常編譯運行。
添加 maven 依賴
<dependencies><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>5.2.2</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>5.2.2</version></dependency><dependency><groupId>com.deepoove</groupId><artifactId>poi-tl</artifactId><version>1.12.0</version></dependency></dependencies>操作書簽的代碼
1、ShanhyXWPFDocumentMerge.java
package org.example;import com.deepoove.poi.xwpf.NiceXWPFDocument; import com.deepoove.poi.xwpf.XmlXWPFDocumentMerge; import org.apache.xmlbeans.XmlOptions; import org.apache.xmlbeans.impl.store.DomImpl; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; import org.w3c.dom.Node;import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Iterator; import java.util.List;/*** 文檔合并** @author shanhy*/ public class ShanhyXWPFDocumentMerge extends XmlXWPFDocumentMerge {/*** 將一個文檔合并到另外一個文檔指定段落的相對位置** @param source 當前文檔* @param mergeIterator 被合入的文檔* @param targetParagraphNode 目標段落Node* @param deleteTargetParagraph 是否刪除目標段落自身* @return* @throws Exception*/public ShanhyXWPFDocument mergeToParagraphBefore(ShanhyXWPFDocument source, Iterator<ShanhyXWPFDocument> mergeIterator, Node targetParagraphNode, boolean deleteTargetParagraph) throws Exception {CTBody body = source.getDocument().getBody();List<String> addParts = createMergeableStrings(source, mergeIterator);String[] startEnd = truncatedStartEndXmlFragment(body);// CTP mergedContainer = paragraph.getCTP();XmlOptions options = new XmlOptions();options.setUseSameLocale(((DomImpl.Dom) targetParagraphNode).locale());CTP mergedBody = CTP.Factory.parse(startEnd[0] + String.join("", addParts) + startEnd[1], options);// instead insert xml-fragment?// new XWPFParagraph(CTP.Factory.parse(mergedBody.getDomNode(), options), source);Node mergedContainerParentNode = targetParagraphNode.getParentNode();// 將引入文檔的整體插入到目標段落之前mergedContainerParentNode.insertBefore(mergedBody.getDomNode(), targetParagraphNode);if(deleteTargetParagraph) {// 刪除掉目標段落自身mergedContainerParentNode.removeChild(targetParagraphNode);}// mergedContainer.getDomNode().appendChild(CTP.Factory.parse(mergedBody.getDomNode(), options).getDomNode()); // mergedContainer.set(mergedBody); // String xmlText = truncatedOverlapWP(body); // body.set(CTBody.Factory.parse(xmlText)); // return source.generate(true);return source;}/*** 反射調用父類方法** @param methodName* @param params* @throws NoSuchMethodException* @throws InvocationTargetException* @throws IllegalAccessException*/private Object invokeSuperMethod(String methodName, Class<?>[] paramClasses, Object[] params) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {Method method = this.getClass().getSuperclass().getDeclaredMethod(methodName, paramClasses);method.setAccessible(true);return method.invoke(this, params);}@SuppressWarnings("unchecked")private List<String> createMergeableStrings(ShanhyXWPFDocument source, Iterator<ShanhyXWPFDocument> mergeIterator) {try {Object obj = invokeSuperMethod("createMergeableStrings", new Class[]{NiceXWPFDocument.class, Iterator.class}, new Object[]{source, mergeIterator});if (obj instanceof List<?>) {return (List<String>) obj;}} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) {throw new RuntimeException(e);}return new ArrayList<>();}private String[] truncatedStartEndXmlFragment(CTBody body) {try {Object obj = invokeSuperMethod("truncatedStartEndXmlFragment", new Class[]{CTBody.class}, new Object[]{body});return (String[]) obj;} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) {throw new RuntimeException(e);}}private String truncatedOverlapWP(CTBody body) {try {Object obj = invokeSuperMethod("truncatedOverlapWP", new Class[]{CTBody.class}, new Object[]{body});return (String) obj;} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) {throw new RuntimeException(e);}} }2、ShanhyXWPFDocument.java
package org.example;import com.deepoove.poi.xwpf.NiceXWPFDocument; import org.w3c.dom.Node;import java.io.IOException; import java.io.InputStream; import java.util.Iterator;/*** 處理 Docx 文檔內容處理** @author shanhy*/ public class ShanhyXWPFDocument extends NiceXWPFDocument {public ShanhyXWPFDocument(InputStream in) throws IOException {super(in);}/*** 將一個文檔合并到另外一個文檔指定段落的相對位置** @param source 當前文檔* @param mergeIterator 被合入的文檔* @param targetParagraphNode 目標段落Node* @param deleteTargetParagraph 是否刪除目標段落自身** @throws Exception*/public void mergeToParagraphBefore(ShanhyXWPFDocument source, Iterator<ShanhyXWPFDocument> mergeIterator, Node targetParagraphNode, boolean deleteTargetParagraph) throws Exception {new ShanhyXWPFDocumentMerge().mergeToParagraphBefore(this, mergeIterator, targetParagraphNode, deleteTargetParagraph);} }3、ShanhyDocxBookmarkService.java
package org.example;import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFRun; import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlOptions; import org.apache.xmlbeans.impl.store.DomImpl; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; import org.w3c.dom.Node; import org.w3c.dom.NodeList;import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map;/*** 書簽替換處理類* * @author shanhy*/ public class ShanhyDocxBookmarkService {/*** 因為docx為xml格式的結構,一下為docx中定義的部分常量引用**/public static final String RUN_NODE_NAME = "w:r";public static final String TEXT_NODE_NAME = "w:t";public static final String BOOKMARK_START_TAG = "w:bookmarkStart";public static final String BOOKMARK_END_TAG = "w:bookmarkEnd";public static final String BOOKMARK_ID_ATTR_ID = "w:id";public static final String NODENAME_BODY = "w:body";public static final String NODENAME_PARAGRAPH = "w:p";public static final String BOOKMARK_ID_ATTR_NAME = "w:name";public static final String STYLE_NODE_NAME = "w:rPr";public static final String PARAGRAPH_PROPERTIES_NAME = "w:pPr";/*** 讀取 docx 文件中的所有書簽(注意不支持書簽嵌套,書簽嵌套書簽的情況只識別最外層書簽)** @param docx*/public List<Node> getBookmarksFromDocx(XWPFDocument docx) {Node bodyNode = docx.getDocument().getBody().getDomNode();// 遞歸讀取 bookmarkStart 節點,返回bookmarkNode集合(<w:bookmarkStart w:id="1" w:name="書簽名稱"/>)List<Node> bookmarkNodeList = new ArrayList<>();getBookmarksFromNode(bodyNode, bookmarkNodeList);return bookmarkNodeList;}/*** 遞歸解析所有Node節點,將bookmark記錄到集合中** @param node* @param bookmarkNodeList*/public void getBookmarksFromNode(Node node, List<Node> bookmarkNodeList) {if (node.getNodeName().equals(BOOKMARK_START_TAG)) {bookmarkNodeList.add(node);} else if (node.getNodeName().equals(BOOKMARK_END_TAG) || node.getNodeName().equals(PARAGRAPH_PROPERTIES_NAME)) {// Nothing} else {NodeList childNodes = node.getChildNodes();for (int i = 0, j = childNodes.getLength(); i < j; i++) {Node childNode = childNodes.item(i);getBookmarksFromNode(childNode, bookmarkNodeList);}}}/*** docx 文件中書簽的替換** @param docx* @param outputStream* @param dataMap* @throws IOException*/public void replaceDocxBookmarks(ShanhyXWPFDocument docx, OutputStream outputStream, Map<String, Object> dataMap) throws Exception {// 獲取所有書簽List<Node> startBookmarkList = getBookmarksFromDocx(docx);// 替換書簽內容for (Node startBookmarkNode : startBookmarkList) {String bookmarkName = startBookmarkNode.getAttributes().getNamedItem("w:name").getNodeValue();if (dataMap.containsKey(bookmarkName)) {Object data = dataMap.get(bookmarkName);if (data instanceof String) {// 內容是文本replaceDocxBookmarkFromString(getFirstParentParagraphByNode(startBookmarkNode, docx), startBookmarkNode, (String) data);} else if (data instanceof ShanhyXWPFDocument) { // 內容是外部 docx 文檔replaceDocxBookmarkFromDocx(docx, getFirstParentNodeByNode(startBookmarkNode, docx), startBookmarkNode, (ShanhyXWPFDocument) data);} else {throw new RuntimeException("替換書簽的內容源數據格式暫不支持");}}}docx.write(outputStream);docx.close();}/*** 將一個docx文檔替換到docx的書簽中* 1.要求書簽盡量設置在文檔的換行起點* 2.被合入的docx會作為換行起點開始合入,如果bookmark沒有設置在一個換行的起點,程序會自動從該bookmark節點開始尋找,* 定位到父節點為body的對應p節點的下一個節點,然后將需要合入的文檔所有內容追加到該p節點的下一個** @param bookmarkParentParagraph* @param startBookmarkNode* @param content*/public void replaceDocxBookmarkFromString(XWPFParagraph bookmarkParentParagraph, Node startBookmarkNode, String content) {Node nextNode = startBookmarkNode.getNextSibling();boolean contentReplaced = false;while (nextNode != null) {// 節點是結束符if (nextNode.getNodeName().equals(BOOKMARK_END_TAG)) {break;}// 1.尋找startBookmark的下一個 w:r 節點,然后將該節點中的 w:t 節點的真正文字內容替換掉(這樣可以保留原來的bookmark的文字樣式)if (!contentReplaced && nextNode.getNodeName().equals("w:r")) {NodeList runChildNodes = nextNode.getChildNodes();for (int i = 0, j = runChildNodes.getLength(); i < j; i++) {if (runChildNodes.item(i).getNodeName().equals("w:t")) {// w:t 是真正的文本內容runChildNodes.item(i).getFirstChild().setNodeValue(content);}}contentReplaced = true;} else {// 2.然后繼續向下刪除 endBookmark 之前的所有節點startBookmarkNode.getParentNode().removeChild(nextNode);}nextNode = nextNode.getNextSibling();}//1-endif (!contentReplaced) {XWPFRun run = bookmarkParentParagraph.createRun();run.setText(content);Node newChildNode = run.getCTR().getDomNode();Node startBookmarkNextNode = startBookmarkNode.getNextSibling();if (startBookmarkNextNode == null) {startBookmarkNode.getParentNode().appendChild(newChildNode);} else {startBookmarkNode.getParentNode().insertBefore(newChildNode, startBookmarkNextNode);}}}/*** 將一個docx文檔替換到docx的書簽中* 1.要求書簽盡量設置在文檔的換行起點* 2.被合入的docx會作為換行起點開始合入,如果bookmark沒有設置在一個換行的起點,程序會自動從該bookmark節點開始尋找,* 定位到父節點為body的對應p節點的下一個節點,然后將需要合入的文檔所有內容追加到該p節點的下一個** @param startBookmarkNode* @param contentDocx*/public void replaceDocxBookmarkFromDocx(ShanhyXWPFDocument docx, Node bookmarkParentNode, Node startBookmarkNode, ShanhyXWPFDocument contentDocx) throws Exception {//1-start.刪除bookmark原始內容Node nextNode = startBookmarkNode.getNextSibling();while (nextNode != null) {// 循環查找結束符String nodeName = nextNode.getNodeName();if (nodeName.equals(BOOKMARK_END_TAG)) {break;}// 刪除中間的非結束節點,即刪除原書簽內容Node delNode = nextNode;nextNode = nextNode.getNextSibling();startBookmarkNode.getParentNode().removeChild(delNode);}//1-enddocx.mergeToParagraphBefore(docx, Collections.singletonList(contentDocx).iterator(), bookmarkParentNode, true);}/*** 向上遞歸獲得指定node節點的上一個p節點** @param node* @return*/public XWPFParagraph getFirstParentParagraphByNode(Node node, XWPFDocument docx) throws XmlException {XmlOptions options = new XmlOptions();options.setUseSameLocale(((DomImpl.Dom) node).locale());return new XWPFParagraph(CTP.Factory.parse(getFirstParentNodeByNode(node, docx), options), docx);}/*** 向上遞歸獲得指定node節點的上一個p節點** @param node* @return*/public Node getFirstParentNodeByNode(Node node, XWPFDocument docx) throws XmlException {Node parentNode = node.getParentNode();if (NODENAME_PARAGRAPH.equals(parentNode.getNodeName())) {return parentNode;}return getFirstParentNodeByNode(parentNode, docx);}}4、BookmarkDemo.java
package org.example;import java.nio.file.Files; import java.nio.file.Paths; import java.util.HashMap; import java.util.Map;public class BookmarkDemo {public static void main(String[] args) throws Exception {long start = System.currentTimeMillis();ShanhyDocxBookmarkService bookmarkService = new ShanhyDocxBookmarkService();// 讀取 docx 文件中的所有書簽(基于解析xmlDom文檔讀取) // List<Node> bookmarkNodeList = bookmarkService.getBookmarksFromDocx(new XWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\模板_Password_Removed.docx")))); // bookmarkNodeList.forEach(bookmarkNode -> { // NamedNodeMap attrs = bookmarkNode.getAttributes(); // System.out.println("書簽id=" + attrs.getNamedItem(ShanhyDocxBookmarkService.BOOKMARK_ID_ATTR_ID).getNodeValue() + ", 書簽name=" + attrs.getNamedItem(ShanhyDocxBookmarkService.BOOKMARK_ID_ATTR_NAME).getNodeValue() + ""); // });// 替換 docx 文件中的 bookmark 內容Map<String, Object> dataMap = new HashMap<>(); // dataMap.put("strong", "單紅宇"); // dataMap.put("footnotes", "李小雨"); // dataMap.put("替換的書簽", new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx")))); // dataMap.put("Hello", new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx")))); // dataMap.put("替換的書簽", new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx"))));ShanhyXWPFDocument mergeContentDocx = new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\content_table2.docx")));mergeContentDocx.getTables().forEach(tbl -> { tbl.setWidthType(TableWidthType.PCT); tbl.setWidth("100%"); });dataMap.put("AAA", mergeContentDocx);dataMap.put("String書簽", "單紅宇");dataMap.put("沒有內容的書簽", "書簽內容Hello"); // dataMap.put("測試docx書簽", "內容內容內容"); // bookmarkService.replaceDocxBookmarks(new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\template.docx"))), Files.newOutputStream(Paths.get("D:\\Desktop\\docx\\out-2.docx")), dataMap);bookmarkService.replaceDocxBookmarks(new ShanhyXWPFDocument(Files.newInputStream(Paths.get("D:\\Desktop\\docx\\template.docx"))), Files.newOutputStream(Paths.get("D:\\Desktop\\docx\\out-2.docx")), dataMap);System.out.println("耗時=" + (System.currentTimeMillis() - start) + "ms");}}測試的合并效果如下:
在編輯 word 書簽時,可以開啟顯示書簽模式,這樣可以直觀的 word 頁面中看到書簽標記,如下圖所示:
如果你有獲取 docx 的 xml 內容的需求,則代碼為:
new XWPFDocument(inputStream).getDocument().xmlText(); new XWPFDocument(inputStream).getDocument().getBody().xmlText();其他:
1、本文因為業務需求需要使用書簽方式,如果你使用變量 {{username}}、{{快遞地址}} 這樣的方式,則直接使用 poi-tl 庫的直接操作會更簡單。
2、poi-tl 是一個方便操作 word 的開源項目: https://github.com/Sayi/poi-tl
(END)
總結
以上是生活随笔為你收集整理的使用 POI 读取 Word docx 中的书签、替换书签内容(汉字或合并外部文档内容)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 外贸客户如何跟进?外贸客户类型及跟进策略
- 下一篇: 手机归属地查询接口,手机号查3大运营商的