日期:2014-05-20  浏览次数:20831 次

怎么修改可以读取中文
package com.goldcell.word;

import java.io.File;  
import java.io.FileInputStream;  
import java.io.FileNotFoundException;  
  
import org.apache.poi.hwpf.HWPFDocument;  
import org.apache.poi.hwpf.usermodel.Paragraph;  
import org.apache.poi.hwpf.usermodel.Range;  
import org.apache.poi.hwpf.usermodel.Table;  
import org.apache.poi.hwpf.usermodel.TableCell;  
import org.apache.poi.hwpf.usermodel.TableIterator;  
import org.apache.poi.hwpf.usermodel.TableRow;  
  
import java.io.File;  
import java.io.FileInputStream;  
import java.io.InputStream;  
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
   
import org.apache.poi.POIXMLDocument;  
import org.apache.poi.POIXMLTextExtractor;  
import org.apache.poi.hwpf.extractor.WordExtractor;  
import org.apache.poi.openxml4j.opc.OPCPackage;  
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;  
  
  
import org.apache.poi.poifs.filesystem.POIFSFileSystem;  

public class AnalyticWord {
public static void main(String[] args) {
String file = "D://word.doc"; //定义word文件
//new AnalyticWord().getWordValues(file);
List<String> list = new AnalyticWord().getWordText(file);
for (String string : list) {
System.out.println(string);
}
}

public List<String[][]> getWordValues(String file){
List<String[][]>word = new ArrayList<String[][]>();
try {
List<String[][]> wordValue = new ArrayList<String[][]>();
//表格数据
POIFSFileSystem pfs = new POIFSFileSystem(new FileInputStream(file));  
HWPFDocument hwpf = new HWPFDocument(pfs); 
Range range = hwpf.getRange();//得到文档的读取范围
 
//迭代文档中的表格  
TableIterator it = new TableIterator(range);  
while (it.hasNext()) {  
Table tb = (Table) it.next();  
String[][] tables = null;
if(tb.numRows() > 0) tables = new String[tb.numRows()][tb.getRow(0).numCells()];  
for (int i = 0; i < tb.numRows(); i++) {  
TableRow tr = tb.getRow(i);  
//迭代列,默认从0开始  
for (int j = 0; j < tr.numCells(); j++) {  
TableCell td = tr.getCell(j);//取得单元格  
//取得单元格的内容  
for(int k=0;k<td.numParagraphs();k++){  
Paragraph para =td.getParagraph(k);  
String s = para.text();  
//System.out.print()+"\t"); //输出单元格数据
if( s.indexOf("") < 1) {
tables[i][j] = " ";
continue;
}
tables[i][j] = s.substring(0, s.indexOf(""));

// System.out.print(tables[i][j]);
}  
// System.out.println();//没一行完后换行

// System.out.println();//第一个表格完后换一行
wordValue.add(tables);
}
List<String> wordTest = new AnalyticWord().getWordText(file);
for (int i = 0,k=0; i < wordTest.size(); i++) {
if("tables".equals(wordTest.get(i))){
word.add(wordValue.get(k++));
}else{