import java.io.FileInputStream;
import java.io.BufferedReader ;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.StringWriter;
import java.io.FileNotFoundException;

import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.hssf.extractor.ExcelExtractor ;
import org.apache.poi.hwpf.extractor.WordExtractor ; 
import org.apache.poi.hslf.extractor.PowerPointExtractor ; 

import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.WriteOutContentHandler;
import org.apache.tika.Tika;


public class Pdftotext{

	public static String pdfToTxt(String pdf){
		String txt = "" ;
		try{
			PDFTextParser pdfTextParserObj = new PDFTextParser();
			txt = pdfTextParserObj.pdftoText(pdf);
		}
		catch (Exception e) {
            //e.printStackTrace();
        }
        return txt ;
    }
	public static String xlsToTxt(String filename){
		String text ="";
		try {
			FileInputStream myInput = new FileInputStream(filename);
			POIFSFileSystem myFileSystem = new POIFSFileSystem(myInput);
			ExcelExtractor excelExtractor = new ExcelExtractor(myFileSystem) ;
			excelExtractor.setIncludeSheetNames(false) ;			
			text = excelExtractor.getText() ;
		}
		catch (Exception e) {
            //e.printStackTrace();
        }
		return text ;
	}
	public static String wordToTxt(String filename){
		String text ="";
		try {
			FileInputStream myInput = new FileInputStream(filename);
			POIFSFileSystem myFileSystem = new POIFSFileSystem(myInput);
			WordExtractor wordExtractor = new WordExtractor(myFileSystem) ;
			text = wordExtractor.getText() ;
		}
		catch (Exception e) {
            //e.printStackTrace();
        }
		return text ;
	}
	public static String pptToTxt(String filename){
		String text ="";
		try {
			FileInputStream myInput = new FileInputStream(filename);
			POIFSFileSystem myFileSystem = new POIFSFileSystem(myInput);
			PowerPointExtractor pptExtractor = new PowerPointExtractor(myFileSystem) ;
			text = pptExtractor.getText() ;
		}
		catch (Exception e) {
            //e.printStackTrace();
        }
		return text ;
	}
	
	public static String rtfToTxt(String filename){
		String content = "";
		try {
			FileInputStream myInput = new FileInputStream(filename);
			Metadata metadata = new Metadata();
			StringWriter writer = new StringWriter();
			Tika tika = new Tika();
			tika.getParser().parse(
						 myInput,
						 new WriteOutContentHandler(writer),
						 metadata,
						 new ParseContext());
			content = writer.toString();
			content = content.replaceAll("\\s+"," ");
		}
		catch (Exception e) {
			//e.printStackTrace();
		}
        return content ;
    }
	public static int exec(String[] command){
		Runtime runtime = Runtime.getRuntime();
		int code = -1 ;
		try {
			final Process process = runtime.exec(command);
			code = process.waitFor();
			BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
			String line = "";
			try {
				while((line = reader.readLine()) != null) {
					// Traitement du flux d'erreur de l'application si besoin est
					System.out.println(line) ;
				}
			} finally {
				reader.close();
			}
		} catch(Exception e) {
			e.printStackTrace();
		}
		return code ;
	}
	public static String renameExtensionFiletoPdf(String str)
	{
		String lowstr = str.toLowerCase() ;
		int last = lowstr.lastIndexOf(".") ;
		String extension =	lowstr.substring(last) ;
		int pos = -1 ;
		if(".doc".equals(extension) || ".docx".equals(extension)){
			pos = lowstr.lastIndexOf(".doc") ;
			str = str.substring(0, pos) + ".pdf" ;
		}
		else if(".xls".equals(extension) || ".xlsx".equals(extension)){
			pos = lowstr.lastIndexOf(".xls") ;
			str = str.substring(0, pos) + ".pdf" ;
		}
		return str ;
	}
	public static String word2txt(String dirPython, String dirProgramOffice, String infile){
		String outfile = renameExtensionFiletoPdf(infile);
		//String command = "\""+dirProgramOffice+"/python\" \""+dirProgramOffice+"/DocumentConverter.py\" \""+infile+"\" \""+ outfile+"\"";
		String[] command = new String[]{dirPython, dirProgramOffice+"/DocumentConverter.py", infile, outfile} ;
		int ret = exec(command) ;
		if(ret==0){
			String contenu = pdfToTxt(outfile) ;
			//System.out.println(contenu) ;
			return contenu ;
		}
		return "ERROR_OPENOFFICE";
	}
	public static void main(String[] args) throws Exception{
		//String nom_pdf = "C:\\Users\\Administrateur\\Desktop\\ueRSX208.pdf" ;
		//String contenu = Pdf2Text.parsePdf(nom_pdf) ;
		//String nom_pdf = "D:\\bureau\\Classeur_2.xls" ;
		
		//String nom_pdf = "D:\\bureau\\test.ppt" ;
		//String contenu = pptToTxt(nom_pdf) ;
		//System.out.println(contenu) ;
		String dirProgramOffice = "C:/Program Files/OpenOffice.org 3/program";
		String dirPython = "C:/Program Files/OpenOffice.org 3/program/python";
		String infile = "D:/bureau/Nouveau dossier/Test.doc" ;
		//String dirProgramOffice = "/opt/openoffice.org3/program";
		//String dirPython = "/usr/bin/python";
		//String infile = "/home/www/upload/Anomalies_extranet_delai.doc" ;
		String ret= Pdftotext.word2txt(dirPython, dirProgramOffice, infile) ;
		System.out.println(ret) ;
		
	}
}