python-pdf-text-to-audio.py 773 B

123456789101112131415161718192021222324252627282930313233343536
  1. #Importing Libraries
  2. #Importing Google Text to Speech library
  3. from gtts import gTTS
  4. #Importing PDF reader PyPDF2
  5. import PyPDF2
  6. #Open file Path
  7. pdf_File = open('simple.pdf', 'rb')
  8. #Create PDF Reader Object
  9. pdf_Reader = PyPDF2.PdfFileReader(pdf_File)
  10. count = pdf_Reader.numPages # counts number of pages in pdf
  11. textList = []
  12. #Extracting text data from each page of the pdf file
  13. for i in range(count):
  14. try:
  15. page = pdf_Reader.getPage(i)
  16. textList.append(page.extractText())
  17. except:
  18. pass
  19. #Converting multiline text to single line text
  20. textString = " ".join(textList)
  21. print(textString)
  22. #Set language to english (en)
  23. language = 'en'
  24. #Call GTTS
  25. myAudio = gTTS(text=textString, lang=language, slow=False)
  26. #Save as mp3 file
  27. myAudio.save("Audio.mp3")