word 转 html cms,Java 将Word文件转换为HTML格式文件
前言:在很多時(shí)候我們都需要到項(xiàng)目中導(dǎo)入word文檔,但是后期再次在前段顯示這個(gè)文檔的時(shí)候前端往往需要的是html格式的,所以這個(gè)時(shí)候就會(huì)提出一個(gè)需求:?你們存文檔的時(shí)候能不能存成html格式的?? 于是這篇文章的內(nèi)容就可以滿足這個(gè)需求
我是通過MultiPartFile?類來(lái)實(shí)現(xiàn)的,上代碼:
一、首先導(dǎo)入需要的依賴包:
org.apache.poi
poi-scratchpad
3.17
org.apache.poi
poi-ooxml
3.17
fr.opensagres.xdocreport
fr.opensagres.xdocreport.converter.docx.xwpf
2.0.1
二、編寫代碼:
package com.lmt.service.file;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.UUID;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
import org.w3c.dom.Document;
import fr.opensagres.poi.xwpf.converter.core.ImageManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
@Component
public class WordToHtml {
private static final Logger logger = LoggerFactory.getLogger(WordToHtml.class);
//轉(zhuǎn)換的方法
public File convert(MultipartFile file) {
//獲得文件的名字
String filename = file.getOriginalFilename();
//獲得文件的擴(kuò)展名
String suffix=filename.substring(filename.lastIndexOf("."));
String newName=UUID.randomUUID().toString();
// TODO 需要保存在一個(gè)新的位置
//將文件保存在D:/test/文件下
File convFile = new File("D:/test/" + newName +suffix);
FileOutputStream fos = null;
try {
//創(chuàng)建文件
convFile.createNewFile();
fos = new FileOutputStream(convFile);
fos.write(file.getBytes());
} catch (IOException ex) {
logger.error("上傳文件出錯(cuò)!", ex);
return null;
} finally {
IOUtils.closeQuietly(fos);
}
// 輸入文件名的所在文件夾
// 加上反斜杠
String parentDirectory = convFile.getParent();
if (!parentDirectory.endsWith("\\")) {
parentDirectory = parentDirectory + "\\";
}
if (filename.endsWith(".docx")) {
return docxConvert(parentDirectory, convFile.getAbsolutePath(),newName);
} else if (filename.endsWith(".doc")) {
return docConvert(parentDirectory, convFile.getAbsolutePath(),newName);
} else {
logger.error("不支持的文件格式!");
return null;
}
}
private File docxConvert(String parentDirectory, String filename,String newName) {
try {
XWPFDocument document = new XWPFDocument(new FileInputStream(filename));
XHTMLOptions options = XHTMLOptions.create().setImageManager(new ImageManager(new File(parentDirectory), UUID.randomUUID().toString())).indent(4);
FileOutputStream out = new FileOutputStream(new File(parentDirectory + newName+ ".html"));
XHTMLConverter.getInstance().convert(document, out, options);
return new File(parentDirectory + newName+ ".html");
} catch (IOException ex) {
logger.error("word轉(zhuǎn)化出錯(cuò)!", ex);
return null;
}
}
private File docConvert(String parentDirectory, String filename,String newName) {
try {
HWPFDocument document = new HWPFDocument(new FileInputStream(filename));
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
// converter默認(rèn)對(duì)圖片不作處理,需要手動(dòng)下載圖片并嵌入到html中
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
@Override
public String savePicture(byte[] bytes, PictureType pictureType, String s, float v, float v1) {
String imageFilename = parentDirectory + "";
String identity=UUID.randomUUID().toString();
File imageFile = new File(imageFilename, identity+s);
imageFile.getParentFile().mkdirs();
InputStream in = null;
FileOutputStream out = null;
try {
in = new ByteArrayInputStream(bytes);
out = new FileOutputStream(imageFile);
IOUtils.copy(in, out);
} catch (IOException ex) {
logger.error("word轉(zhuǎn)化出錯(cuò)!", ex);
} finally {
if (in != null) {
IOUtils.closeQuietly(in);
}
if (out != null) {
IOUtils.closeQuietly(out);
}
}
return imageFile.getName();
}
});
wordToHtmlConverter.processDocument(document);
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();
String result = new String(out.toByteArray());
FileWriter writer = new FileWriter(parentDirectory + newName + ".html");
writer.write(result);
writer.close();
} catch (IOException | TransformerException | ParserConfigurationException ex) {
logger.error("word轉(zhuǎn)化出錯(cuò)!", ex);
}
return new File(parentDirectory + newName + ".html");
}
/**
* 將上傳的Word文檔轉(zhuǎn)化成HTML字符串
* @param attachfile
* @return
*/
public String convertToHtml(MultipartFile attachfile) {
String wordContent = "";
// 將Word文件轉(zhuǎn)換為html
File file = convert(attachfile);
// 讀取html文件
if (file != null) {
return "文件轉(zhuǎn)換成功"
}
return "文件轉(zhuǎn)換失敗";
}
代碼的含義已經(jīng)在代碼行的注釋上有了,哪里有問題,歡迎大家隨時(shí)在評(píng)論下方留言!
總結(jié)
以上是生活随笔為你收集整理的word 转 html cms,Java 将Word文件转换为HTML格式文件的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: rdd.foreach(print)报错
- 下一篇: go run main.go 参数_介绍