test_extension_strobemer_obj.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. from src.extension_strobemer_obj import *
  2. import pytest
  3. print("Pipe start")
  4. def test_object_performance():
  5. input_file = "test_data/toy_data.fna"
  6. raw_seq_dict = generate_string_dict_from_genome(input_file)
  7. temp_obj = ext_strobemer_obj(k=30, n=2, l=15, smin=10, label="test", filename="none")
  8. temp_obj.load_seq_dict(raw_seq_dict)
  9. temp_obj.build_kmer_dict_with_start_pos()
  10. temp_obj.build_both_randstrobe_dict()
  11. temp_obj.print_info()
  12. #temp_obj.export_to_pkl()
  13. # check 2 version of randstrobe
  14. test_ext = ext_strobemer_obj(k=30, n=2, l=15, smin=15, label="test")
  15. test_ext.load_seq_dict(raw_seq_dict)
  16. test_ext.build_kmer_dict_with_start_pos()
  17. test_ext.build_both_randstrobe_dict()
  18. # smin = l means a regular strobemer
  19. assert test_ext.ext_randstrobe_dict == temp_obj.regu_randstrobe_dict
  20. assert test_ext.ext_randstrobe_dict == test_ext.regu_randstrobe_dict
  21. del test_ext
  22. # trie practice
  23. some_manual_prefix = 'CTAATGGAGAAACTCAT'
  24. assert len(temp_obj.kmer_tst.keys(some_manual_prefix)) > 0
  25. # find kmer in trie that is a prefix of a given seq
  26. some_unique_kmer = 'GTCTTGCAATAATGGCAAAACTAAATGTAC'
  27. assert temp_obj.kmer_tst.prefixes(some_unique_kmer + 'aaaaa') == [some_unique_kmer]
  28. # kmer and prefix lookup
  29. temp_obj.kmer_query('GTCTTGCAATAATGGCAAAACTAAATGTAC', kmer_type='kmer')
  30. temp_obj.kmer_query('GTCTTGCAATAATGGCAAAACTAAATGTAC', kmer_type='rs')
  31. temp_obj.kmer_query('GTCTTGCAATAATGGCAAAACTAAATGTAC', kmer_type='ers')
  32. temp_obj.prefix_query('GTCTTA', kmer_type='kmer')
  33. temp_obj.prefix_query('GTCTTA', kmer_type='ers')
  34. temp_obj.prefix_query('GTCTTGCAATAATGGCAAAACTAAATGTAC', kmer_type='kmer')