project_creator.py 3.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. ############################################################################
  2. ##### Transposon Annotator reasonaTE - part of Transposon Ultimate #########
  3. ##### Kevin Riehl (kevin.riehl.de@gmail.com, 2021) #########################
  4. ############################################################################
  5. # Imports
  6. from Bio.SeqRecord import SeqRecord
  7. from Bio import SeqIO
  8. from os import path
  9. import os.path
  10. # Methods
  11. def make_rc_record(record):
  12. return SeqRecord(seq = record.seq.reverse_complement(), id = record.id, description="")
  13. def copySequenceClean(fromFile,projectFolderPath):
  14. # Copy sequence and clean heads
  15. f1 = open(fromFile,"r")
  16. f2 = open(os.path.join(projectFolderPath,"sequence.fasta"),"w+")
  17. f3 = open(os.path.join(projectFolderPath,"sequence_heads.txt"),"w+")
  18. line = f1.readline()
  19. counter = 0
  20. while line!="":
  21. if(line.startswith(">")):
  22. counter += 1
  23. f3.write(">seq"+str(counter)+"\t"+line)
  24. f2.write(">seq"+str(counter)+"\n")
  25. else:
  26. f2.write(line.upper())
  27. line = f1.readline()
  28. f1.close()
  29. f2.close()
  30. f3.close()
  31. # Create reverse complement Fasta file
  32. records = map(make_rc_record, SeqIO.parse(os.path.join(projectFolderPath,"sequence.fasta"), "fasta"))
  33. SeqIO.write(records, os.path.join(projectFolderPath,"sequence_rc.fasta"), "fasta")
  34. records = map(make_rc_record, SeqIO.parse(os.path.join(projectFolderPath,"sequence_rc.fasta"), "fasta"))
  35. SeqIO.write(records, os.path.join(projectFolderPath,"sequence.fasta"), "fasta")
  36. def createProject(projectFolder, projectName, inputFasta):
  37. # Check if project folder exists
  38. if(not path.isdir(projectFolder)):
  39. os.mkdir(projectFolder)
  40. # Check if given project already exits
  41. projectFolderPath = os.path.join(projectFolder,projectName)
  42. if(path.isdir(projectFolderPath)):
  43. print("Project already exists, process aborted")
  44. return "EXIT"
  45. os.mkdir(projectFolderPath)
  46. # Create folder structure for annotation softwares
  47. os.mkdir(os.path.join(projectFolderPath,"tirvish"))
  48. os.mkdir(os.path.join(projectFolderPath,"tirvish_rc"))
  49. os.mkdir(os.path.join(projectFolderPath,"sinescan"))
  50. os.mkdir(os.path.join(projectFolderPath,"sinefind"))
  51. os.mkdir(os.path.join(projectFolderPath,"sinefind_rc"))
  52. os.mkdir(os.path.join(projectFolderPath,"repMasker"))
  53. os.mkdir(os.path.join(projectFolderPath,"repeatmodel"))
  54. os.mkdir(os.path.join(projectFolderPath,"must"))
  55. os.mkdir(os.path.join(projectFolderPath,"mitetracker"))
  56. os.mkdir(os.path.join(projectFolderPath,"mitetracker_rc"))
  57. os.mkdir(os.path.join(projectFolderPath,"mitefind"))
  58. os.mkdir(os.path.join(projectFolderPath,"mitefind_rc"))
  59. os.mkdir(os.path.join(projectFolderPath,"ltrPred"))
  60. os.mkdir(os.path.join(projectFolderPath,"ltrHarvest"))
  61. os.mkdir(os.path.join(projectFolderPath,"helitronScanner"))
  62. os.mkdir(os.path.join(projectFolderPath,"helitronScanner_rc"))
  63. os.mkdir(os.path.join(projectFolderPath,"transposonPSI"))
  64. os.mkdir(os.path.join(projectFolderPath,"NCBICDD1000"))
  65. os.mkdir(os.path.join(projectFolderPath,"parsedAnnotations"))
  66. os.mkdir(os.path.join(projectFolderPath,"transposonCandA"))
  67. os.mkdir(os.path.join(projectFolderPath,"transposonCandB"))
  68. os.mkdir(os.path.join(projectFolderPath,"transposonCandC"))
  69. os.mkdir(os.path.join(projectFolderPath,"transposonCandD"))
  70. os.mkdir(os.path.join(projectFolderPath,"transposonCandE"))
  71. os.mkdir(os.path.join(projectFolderPath,"transposonCandF"))
  72. os.mkdir(os.path.join(projectFolderPath,"finalResults"))
  73. # Copy DNA into folder
  74. copySequenceClean(inputFasta,projectFolderPath)
  75. #createProject("projects", "testProject", "G:/CambridgeGenData/GenSeq/RHIZIPHAGUS_IRR/rir17contigs.fasta")