123456789101112131415161718192021222324252627282930313233343536373839 |
- from src.extension_strobemer_obj import *
- import pytest
- print("Pipe start")
- def test_object_performance():
- input_file = "test_data/toy_data.fna"
- raw_seq_dict = generate_string_dict_from_genome(input_file)
- temp_obj = ext_strobemer_obj(k=30, n=2, l=15, smin=10, label="test", filename="none")
- temp_obj.load_seq_dict(raw_seq_dict)
- temp_obj.build_kmer_dict_with_start_pos()
- temp_obj.build_both_randstrobe_dict()
- temp_obj.print_info()
- #temp_obj.export_to_pkl()
-
- # check 2 version of randstrobe
- test_ext = ext_strobemer_obj(k=30, n=2, l=15, smin=15, label="test")
- test_ext.load_seq_dict(raw_seq_dict)
- test_ext.build_kmer_dict_with_start_pos()
- test_ext.build_both_randstrobe_dict()
- # smin = l means a regular strobemer
- assert test_ext.ext_randstrobe_dict == temp_obj.regu_randstrobe_dict
- assert test_ext.ext_randstrobe_dict == test_ext.regu_randstrobe_dict
- del test_ext
-
- # trie practice
- some_manual_prefix = 'CTAATGGAGAAACTCAT'
- assert len(temp_obj.kmer_tst.keys(some_manual_prefix)) > 0
- # find kmer in trie that is a prefix of a given seq
- some_unique_kmer = 'GTCTTGCAATAATGGCAAAACTAAATGTAC'
- assert temp_obj.kmer_tst.prefixes(some_unique_kmer + 'aaaaa') == [some_unique_kmer]
-
- # kmer and prefix lookup
- temp_obj.kmer_query('GTCTTGCAATAATGGCAAAACTAAATGTAC', kmer_type='kmer')
- temp_obj.kmer_query('GTCTTGCAATAATGGCAAAACTAAATGTAC', kmer_type='rs')
- temp_obj.kmer_query('GTCTTGCAATAATGGCAAAACTAAATGTAC', kmer_type='ers')
- temp_obj.prefix_query('GTCTTA', kmer_type='kmer')
- temp_obj.prefix_query('GTCTTA', kmer_type='ers')
- temp_obj.prefix_query('GTCTTGCAATAATGGCAAAACTAAATGTAC', kmer_type='kmer')
|