run.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435
  1. from datetime import datetime
  2. import pathlib
  3. from datetime import datetime
  4. import text_guide
  5. import importlib.util
  6. import time
  7. from os import listdir
  8. from os.path import isfile, join
  9. if __name__ == '__main__':
  10. # iterating over configuration files found in ./configs
  11. config_path = './configs'
  12. config_file_names = sorted([f for f in listdir(config_path) if isfile(join(config_path, f))])
  13. experiment_start = datetime.utcnow()
  14. time_list = list()
  15. for config_file_name in config_file_names:
  16. spec = importlib.util.spec_from_file_location('configs', join(config_path, config_file_name))
  17. config = importlib.util.module_from_spec(spec)
  18. spec.loader.exec_module(config)
  19. config = config.ExperimentConfig()
  20. print(f"\nThe analyzed config file name is: {config.config_name}\n")
  21. # the experiment begins
  22. print("Reading source data.")
  23. config = text_guide.utils.read_data(data_folder=config.data_folder, filename=config.filename,
  24. nrows=config.nrows, text_column=config.text_column, config=config)
  25. # text guide
  26. stime = datetime.utcnow()
  27. config = text_guide.utils.return_selected_window_of_tokens(config=config)
  28. config.df.reset_index(drop=True).to_feather(f"./{config.data_folder}/{config.config_name}.ftr")
  29. time_list.append((config.config_name, round(((datetime.utcnow() - stime).total_seconds() / 60), 2)))
  30. print(f"Time list: {time_list}")