python-extract-text-from-pdf.py 521 B

1234567891011121314151617181920212223
  1. #Importing PDF reader PyPDF2
  2. import PyPDF2
  3. #Open file Path
  4. pdf_File = open('simple.pdf', 'rb')
  5. #Create PDF Reader Object
  6. pdf_Reader = PyPDF2.PdfFileReader(pdf_File)
  7. count = pdf_Reader.numPages # counts number of pages in pdf
  8. TextList = []
  9. #Extracting text data from each page of the pdf file
  10. for i in range(count):
  11. try:
  12. page = pdf_Reader.getPage(i)
  13. TextList.append(page.extractText())
  14. except:
  15. pass
  16. #Converting multiline text to single line text
  17. TextString = " ".join(TextList)
  18. print(TextString)