Excel2003Reader.java 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. package com.kmall.common.utils.excel;
  2. import org.apache.poi.hssf.eventusermodel.*;
  3. import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
  4. import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
  5. import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
  6. import org.apache.poi.hssf.model.HSSFFormulaParser;
  7. import org.apache.poi.hssf.record.*;
  8. import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  9. import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  10. import java.io.IOException;
  11. import java.io.InputStream;
  12. import java.util.ArrayList;
  13. import java.util.HashMap;
  14. import java.util.List;
  15. import java.util.Map;
  16. /**
  17. * Excel2003格式解析
  18. *
  19. * @author Scott
  20. * @email
  21. * @date 2017年10月28日 13:11:27
  22. */
  23. public class Excel2003Reader implements HSSFListener {
  24. private int minColumns = -1;
  25. private POIFSFileSystem fs;
  26. /**
  27. * 最后一行行号
  28. */
  29. private int lastRowNumber;
  30. /**
  31. * 最后一列列号
  32. */
  33. private int lastColumnNumber;
  34. /**
  35. * Should we output the formula, or the value it has?
  36. */
  37. private boolean outputFormulaValues = true;
  38. /**
  39. * For parsing Formulas
  40. */
  41. private SheetRecordCollectingListener workbookBuildingListener;
  42. // 工作薄
  43. private HSSFWorkbook stubWorkbook;
  44. // Records we pick up as we process
  45. private SSTRecord sstRecord;
  46. private FormatTrackingHSSFListener formatListener;
  47. // 表索引
  48. private int sheetIndex = -1;
  49. private BoundSheetRecord[] orderedBSRs;
  50. @SuppressWarnings("rawtypes")
  51. private ArrayList boundSheetRecords = new ArrayList();
  52. // For handling formulas with string results
  53. private int nextRow;
  54. private int nextColumn;
  55. private boolean outputNextStringRecord;
  56. // 存储行记录的容器
  57. private List<String> rowlist = new ArrayList<String>();
  58. ;
  59. // 单Sheet数据
  60. private List<String[]> sheetData = new ArrayList<String[]>();
  61. // 多Sheet数据
  62. private Map<Integer, List<String[]>> workData = new HashMap<Integer, List<String[]>>();
  63. /**
  64. * 遍历excel下所有的sheet
  65. *
  66. * @param fileStream 处理文件流
  67. * @throws IOException 抛出IO异常
  68. */
  69. public void process(InputStream fileStream)
  70. throws IOException {
  71. this.fs = new POIFSFileSystem(fileStream);
  72. MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
  73. this);
  74. formatListener = new FormatTrackingHSSFListener(listener);
  75. HSSFEventFactory factory = new HSSFEventFactory();
  76. HSSFRequest request = new HSSFRequest();
  77. if (outputFormulaValues) {
  78. request.addListenerForAllRecords(formatListener);
  79. } else {
  80. workbookBuildingListener = new SheetRecordCollectingListener(
  81. formatListener);
  82. request.addListenerForAllRecords(workbookBuildingListener);
  83. }
  84. factory.processWorkbookEvents(request, fs);
  85. }
  86. /**
  87. * HSSFListener 监听方法,处理 Record
  88. *
  89. * @param record 行记录
  90. */
  91. @SuppressWarnings("unchecked")
  92. @Override
  93. public void processRecord(Record record) {
  94. int thisRow = -1;
  95. int thisColumn = -1;
  96. String thisStr = null;
  97. String value = null;
  98. switch (record.getSid()) {
  99. case BoundSheetRecord.sid:
  100. boundSheetRecords.add(record);
  101. break;
  102. case BOFRecord.sid:
  103. BOFRecord br = (BOFRecord) record;
  104. if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
  105. // 如果有需要,则建立子工作薄
  106. if (workbookBuildingListener != null && stubWorkbook == null) {
  107. stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
  108. }
  109. if (sheetIndex >= 0) {
  110. List<String[]> data = new ArrayList<String[]>();
  111. data.addAll(sheetData);
  112. workData.put(sheetIndex, data);
  113. sheetData.clear();
  114. }
  115. sheetIndex++;
  116. if (orderedBSRs == null) {
  117. orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
  118. }
  119. }
  120. break;
  121. case SSTRecord.sid:
  122. sstRecord = (SSTRecord) record;
  123. break;
  124. case BlankRecord.sid:
  125. BlankRecord brec = (BlankRecord) record;
  126. thisRow = brec.getRow();
  127. thisColumn = brec.getColumn();
  128. thisStr = "";
  129. rowlist.add(thisColumn, thisStr);
  130. break;
  131. case BoolErrRecord.sid: // 单元格为布尔类型
  132. BoolErrRecord berec = (BoolErrRecord) record;
  133. thisRow = berec.getRow();
  134. thisColumn = berec.getColumn();
  135. thisStr = berec.getBooleanValue() + "";
  136. rowlist.add(thisColumn, thisStr);
  137. break;
  138. case FormulaRecord.sid: // 单元格为公式类型
  139. FormulaRecord frec = (FormulaRecord) record;
  140. thisRow = frec.getRow();
  141. thisColumn = frec.getColumn();
  142. if (outputFormulaValues) {
  143. if (Double.isNaN(frec.getValue())) {
  144. // Formula result is a string
  145. // This is stored in the next record
  146. outputNextStringRecord = true;
  147. nextRow = frec.getRow();
  148. nextColumn = frec.getColumn();
  149. } else {
  150. thisStr = formatListener.formatNumberDateCell(frec);
  151. }
  152. } else {
  153. thisStr = '"' + HSSFFormulaParser.toFormulaString(
  154. stubWorkbook, frec.getParsedExpression()) + '"';
  155. }
  156. rowlist.add(thisColumn, thisStr);
  157. break;
  158. case StringRecord.sid:// 单元格中公式的字符串
  159. if (outputNextStringRecord) {
  160. // String for formula
  161. StringRecord srec = (StringRecord) record;
  162. thisStr = srec.getString();
  163. thisRow = nextRow;
  164. thisColumn = nextColumn;
  165. outputNextStringRecord = false;
  166. }
  167. break;
  168. case LabelRecord.sid:
  169. LabelRecord lrec = (LabelRecord) record;
  170. thisColumn = lrec.getColumn();
  171. value = lrec.getValue().trim();
  172. value = value.equals("") ? " " : value;
  173. this.rowlist.add(thisColumn, value);
  174. break;
  175. case LabelSSTRecord.sid: // 单元格为字符串类型
  176. LabelSSTRecord lsrec = (LabelSSTRecord) record;
  177. thisColumn = lsrec.getColumn();
  178. if (sstRecord == null) {
  179. rowlist.add(thisColumn, " ");
  180. } else {
  181. value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
  182. value = value.equals("") ? " " : value;
  183. rowlist.add(thisColumn, value);
  184. }
  185. break;
  186. case NumberRecord.sid: // 单元格为数字类型
  187. NumberRecord numrec = (NumberRecord) record;
  188. thisColumn = numrec.getColumn();
  189. value = formatListener.formatNumberDateCell(numrec).trim();
  190. value = value.equals("") ? " " : value;
  191. // 向容器加入列值
  192. rowlist.add(thisColumn, value);
  193. break;
  194. default:
  195. break;
  196. }
  197. // 遇到新行的操作
  198. if (thisRow != -1 && thisRow != lastRowNumber) {
  199. lastColumnNumber = -1;
  200. }
  201. // 空值的操作
  202. if (record instanceof MissingCellDummyRecord) {
  203. MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
  204. thisColumn = mc.getColumn();
  205. rowlist.add(thisColumn, "");
  206. }
  207. // 更新行和列的值
  208. if (thisRow > -1) {
  209. lastRowNumber = thisRow;
  210. }
  211. if (thisColumn > -1) {
  212. lastColumnNumber = thisColumn;
  213. }
  214. // 行结束时的操作
  215. if (record instanceof LastCellOfRowDummyRecord) {
  216. if (minColumns > 0) {
  217. // 列值重新置空
  218. if (lastColumnNumber == -1) {
  219. lastColumnNumber = 0;
  220. }
  221. }
  222. lastColumnNumber = -1;
  223. // 每行结束时, 数据写入集合
  224. sheetData.add(rowlist.toArray(new String[]{}));
  225. // 清空容器
  226. rowlist.clear();
  227. }
  228. }
  229. /**
  230. * 获取数据(单Sheet)
  231. *
  232. * @param sheetIndex sheet下标
  233. * @return List<String[]> 数据
  234. */
  235. public List<String[]> getSheetData(Integer sheetIndex) {
  236. return workData.get(sheetIndex);
  237. }
  238. /**
  239. * 获取数据(多Sheet)
  240. *
  241. * @return Map<Integer, List<String[]>> 多sheet的数据
  242. */
  243. public Map<Integer, List<String[]>> getSheetData() {
  244. return workData;
  245. }
  246. }