converter1.py 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. # -*- coding: utf-8 -*-
  2. import PyPDF2
  3. import os
  4. if(os.path.isdir("temp") == False):
  5. os.mkdir("temp")
  6. txtpath = ""
  7. pdfpath = ""
  8. pdfpath = input("Enter the name of your pdf file - please use backslash when typing in directory path: ") #Provide the path for your pdf here
  9. txtpath = input("Enter the name of your txt file - please use backslash when typing in directory path: ") #Provide the path for the output text file
  10. BASEDIR = os.path.realpath("temp") # This is the sample base directory where all your text files will be stored if you do not give a specific path
  11. print(BASEDIR)
  12. if(len(txtpath) == 0):
  13. txtpath = os.path.join(BASEDIR,os.path.basename(os.path.normpath(pdfpath)).replace(".pdf", "")+".txt")
  14. pdfobj = open(pdfpath, 'rb')
  15. pdfread = PyPDF2.PdfFileReader(pdfobj)
  16. x = pdfread.numPages
  17. for i in range(x):
  18. pageObj = pdfread.getPage(i)
  19. with open(txtpath, 'a+') as f:
  20. f.write((pageObj.extractText()))
  21. print(pageObj.extractText()) #This just provides the overview of what is being added to your output, you can remove it if want
  22. pdfobj.close()