import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.IOException;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
 

/**
 * PDFTextParser -- processes the PDFs into text with apache pdf box
 *
 * @author Alex Mousavi and Kyle Jorgensen
 * @version Choice Points Project1 for CS56, Spring 2011
 * @see PDFTextParserTest
 */

//class PDFTextParser adapted from http://thottingal.in/blog/2009/06/24/pdfbox-extract-text-from-pdf/ 

public class PDFTextParser {
 
    
    // Extract text from PDF Document
    static String pdftoText(String fileName) {
	PDFParser parser;
	String parsedText = null;;
	PDFTextStripper pdfStripper = null;
	PDDocument pdDoc = null;
	COSDocument cosDoc = null;
	File file = new File(fileName);
	if (!file.isFile()) {
	    System.err.println("File " + fileName + " does not exist.");
	    return null;
	}
	try {
	    parser = new PDFParser(new FileInputStream(file));
	} catch (IOException e) {
	    System.err.println("Unable to open PDF Parser. " + e.getMessage());
	    return null;
	}
	try {
	    parser.parse();
	    cosDoc = parser.getDocument();
	    pdfStripper = new PDFTextStripper();
	    pdDoc = new PDDocument(cosDoc);
	    pdfStripper.setStartPage(1);
	    pdfStripper.setEndPage(5);
	    parsedText = pdfStripper.getText(pdDoc);
	} catch (Exception e) {
	    System.err
		.println("An exception occured in parsing the PDF Document."
			 + e.getMessage());
	} finally {
	    try {
		if (cosDoc != null)
		    cosDoc.close();
		if (pdDoc != null)
		    pdDoc.close();
	    } catch (Exception e) {
		e.printStackTrace();
	    }
	}
	return parsedText;
    }


    // Extract text from PDF Document
    static String pdftoText(InputStream is) {
	PDFParser parser;
	String parsedText = null;;
	PDFTextStripper pdfStripper = null;
	PDDocument pdDoc = null;
	COSDocument cosDoc = null;

	try {
	    parser = new PDFParser(is);
	} catch (IOException e) {
	    System.err.println("Unable to open PDF Parser. " + e.getMessage());
	    return null;
	}
	try {
	    parser.parse();
	    cosDoc = parser.getDocument();
	    pdfStripper = new PDFTextStripper();
	    pdDoc = new PDDocument(cosDoc);
	    pdfStripper.setStartPage(1);
	    pdfStripper.setEndPage(5);
	    parsedText = pdfStripper.getText(pdDoc);
	} catch (Exception e) {
	    System.err
		.println("An exception occured in parsing the PDF Document."
			 + e.getMessage());
	} finally {
	    try {
		if (cosDoc != null)
		    cosDoc.close();
		if (pdDoc != null)
		    pdDoc.close();
	    } catch (Exception e) {
		e.printStackTrace();
	    }
	}
	return parsedText;
    }


}