当我有它的坐标时,pdfbox是否提供了一些用于突出显示文本的实用程序?
文本的界限是已知的.
我知道还有其他库提供相同的功能,如pdfclown等.但pdfbox提供类似的东西吗?
小智 10
这是这里第 1 条的扩展答案,基本上与上面的代码相同。
改进了当前文档中相对于页面大小的坐标点,以及非常浅的黄色,有时如果单词很短且较小,则很难看清。
还要突出显示从左上角到右上角的 X、Y 坐标的完整单词。获取字符串中第一个字符和最后一个字符的坐标。
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
public class MainSource extends PDFTextStripper {
public MainSource() throws IOException {
super();
}
public static void main(String[] args) throws IOException {
PDDocument document = null;
String fileName = "C:/AnyPDFFile.pdf";
try {
document = PDDocument.load( new File(fileName) );
PDFTextStripper stripper = new MainSource();
stripper.setSortByPosition( true );
stripper.setStartPage( 0 );
stripper.setEndPage( document.getNumberOfPages() );
Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
stripper.writeText(document, dummy);
File file1 = new File("C:/AnyPDFFile-New.pdf");
document.save(file1);
}
finally {
if( document != null ) {
document.close();
}
}
}
/**
* Override the default functionality of PDFTextStripper.writeString()
*/
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
boolean isFound = false;
float posXInit = 0,
posXEnd = 0,
posYInit = 0,
posYEnd = 0,
width = 0,
height = 0,
fontHeight = 0;
String[] criteria = {"Word1", "Word2", "Word3", ....};
for (int i = 0; i < criteria.length; i++) {
if (string.contains(criteria[i])) {
isFound = true;
}
}
if (isFound) {
posXInit = textPositions.get(0).getXDirAdj();
posXEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth();
posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj();
posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getYDirAdj();
width = textPositions.get(0).getWidthDirAdj();
height = textPositions.get(0).getHeightDir();
System.out.println(string + "X-Init = " + posXInit + "; Y-Init = " + posYInit + "; X-End = " + posXEnd + "; Y-End = " + posYEnd + "; Font-Height = " + fontHeight);
/* numeration is index-based. Starts from 0 */
float quadPoints[] = {posXInit, posYEnd + height + 2, posXEnd, posYEnd + height + 2, posXInit, posYInit - 2, posXEnd, posYEnd - 2};
List<PDAnnotation> annotations = document.getPage(this.getCurrentPageNo() - 1).getAnnotations();
PDAnnotationTextMarkup highlight = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
PDRectangle position = new PDRectangle();
position.setLowerLeftX(posXInit);
position.setLowerLeftY(posYEnd);
position.setUpperRightX(posXEnd);
position.setUpperRightY(posYEnd + height);
highlight.setRectangle(position);
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right)
// of the area to be highlighted
highlight.setQuadPoints(quadPoints);
PDColor yellow = new PDColor(new float[]{1, 1, 1 / 255F}, PDDeviceRGB.INSTANCE);
highlight.setColor(yellow);
annotations.add(highlight);
}
}
}
Run Code Online (Sandbox Code Playgroud)
好吧,我发现了这一点.很简单.
PDDocument doc = PDDocument.load(/*path to the file*/);
PDPage page = (PDPage)doc.getDocumentCatalog.getAllPages.get(i);
List annots = page.getAnnotations;
PDAnnotationTextMarkup markup = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.Su....);
markup.setRectangle(/*your PDRectangle*/);
markup.setQuads(/*float array of size eight with all the vertices of the PDRectangle in anticlockwise order*/);
annots.add(markup);
doc.save(/*path to the output file*/);
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
6536 次 |
| 最近记录: |