Program to read a PDF file and write the content of PDF into text
file using itext 5.3.5 library. Here Each page of a PDF is written to a
separate text file. Ex. First page of PDF is written to first txt file.
Second page of a PDF is written to second text file and so on. First you
need to download the library then import it into your project. Here is
the source code. Enjoy programming!!!
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
/**
* This class is used to read an existing
* pdf file using iText jar.
* @author javawithease
*/
public class PDFReadExample {
public static void main(String args[]){
BufferedWriter bw = null;
FileWriter fw = null;
try {
//Create PdfReader instance.
PdfReader pdfReader = new PdfReader("test.pdf");
//Get the number of pages in pdf.
int pages = pdfReader.getNumberOfPages();
//Iterate the pdf through pages.
for(int i=1; i<=pages; i++) {
//Extract the page content using PdfTextExtractor.
String pageContent =
PdfTextExtractor.getTextFromPage(pdfReader, i);
//Print the page content on console.
System.out.println("Content on Page "
+ i + ": " + pageContent);
File file = new File("newfile"+i+".txt");
if (file.createNewFile()){
System.out.println("File is created!");
}else{
System.out.println("File already exists.");
}
// creates a FileWriter Object
FileWriter writer = new FileWriter(file);
// Writes the content to the file
writer.write(pageContent);
writer.flush();
}
//Close the PdfReader.
pdfReader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
/**
* This class is used to read an existing
* pdf file using iText jar.
* @author javawithease
*/
public class PDFReadExample {
public static void main(String args[]){
BufferedWriter bw = null;
FileWriter fw = null;
try {
//Create PdfReader instance.
PdfReader pdfReader = new PdfReader("test.pdf");
//Get the number of pages in pdf.
int pages = pdfReader.getNumberOfPages();
//Iterate the pdf through pages.
for(int i=1; i<=pages; i++) {
//Extract the page content using PdfTextExtractor.
String pageContent =
PdfTextExtractor.getTextFromPage(pdfReader, i);
//Print the page content on console.
System.out.println("Content on Page "
+ i + ": " + pageContent);
File file = new File("newfile"+i+".txt");
if (file.createNewFile()){
System.out.println("File is created!");
}else{
System.out.println("File already exists.");
}
// creates a FileWriter Object
FileWriter writer = new FileWriter(file);
// Writes the content to the file
writer.write(pageContent);
writer.flush();
}
//Close the PdfReader.
pdfReader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
This can read entire pdf book and write the contentsame to pdf text files pagewise.
ReplyDelete