Files
AG-ONE/IlogDynamicModule/src/main/java/com/ag/demo/PdfSplitService.java
2024-07-02 11:40:01 +08:00

86 lines
2.8 KiB
Java

package com.ag.demo;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.convert.Convert;
import com.ag.util.StrUtil;
import com.alibaba.fastjson.JSON;
import org.apache.pdfbox.multipdf.Splitter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
public class PdfSplitService {
String sourcePath = "F:\\pdfsplit\\MDB1R981_678692.pdf";
String targetPath = "F:\\pdfsplit\\";
@Test
public void getPdfTextByPages() throws IOException {
PDDocument document = PDDocument.load(new File(sourcePath));
document.setAllSecurityToBeRemoved(true);
PDFTextStripper stripper = new PDFTextStripper();
stripper.setSortByPosition(true);
List<List<String>> txts = CollUtil.list(false);
for(int i = 1; i <= document.getNumberOfPages() ; i++) {
stripper.setStartPage(i);
stripper.setEndPage(i);
String text = stripper.getText(document);
if(!StrUtil.isEmpty(text)){
List<String> rows = CollUtil.list(false);
String[] arrs = text.split("\\r\\n");
for (String row : arrs){
if (!StrUtil.isEmpty(row.trim())){
rows.add(row);
}
}
txts.add(rows);
}
}
Map<String, String> map = new LinkedHashMap<>();
String billNo = "";
for (int i=0; i<txts.size(); i++){
List<String> txt = txts.get(i);
boolean hasNumber = false;
for (String row : txt){
if(StrUtil.isNumeric(row.trim())){
System.out.println(row);
hasNumber = true;
billNo = row.trim();
break;
}
}
if (hasNumber){
map.put(billNo, i+"_"+i);
}else {
map.put(billNo, (i-1)+"_"+i);
}
//System.out.println("page:~~~~~~~~~~~~~~~~"+(i+1));
}
System.out.println(JSON.toJSONString(map));
Splitter splitter = new Splitter();
for (Map.Entry<String, String> entry : map.entrySet()) {
String val = entry.getValue();
int start = Convert.toInt(val.split("_")[0])+1;
int end = Convert.toInt(val.split("_")[1])+1;
splitter.setStartPage(start);
splitter.setEndPage(end);
splitter.setSplitAtPage(end-start+1);
List<PDDocument> pages = splitter.split(document);
for (PDDocument pd : pages) {
String pdfName = targetPath + entry.getKey() + ".pdf";
pd.save(pdfName);
}
}
}
}