123456789101112131415161718192021222324252627282930313233343536373839404142434445464748 |
- from datetime import datetime
- from collections import OrderedDict
- import pathlib
- """
- Configuration file for Text Guide.
- How to use:
- Define folder containing data file, filename of the data file, name of the column in the data file containing
- unstructured textual data, select the truncation method and the name of the file (dictionary) containing
- pairs of "key: value" where key == important token, and value == value of feature importance. Select the sorting
- method for the important token dictionary.
- """
- class ExperimentConfig(OrderedDict):
- def __init__(self):
- super().__init__()
- self.start_time = datetime.utcnow()
- self.config_name = f"{pathlib.Path(__file__).stem}"
- self.df = None
-
- self.data_folder = 'data'
- self.filename = "dmoz_100_instances.ftr"
- self.text_column = "Text"
- self.nrows = 7
- self.truncation_method = 'text_guide'
- self.feature_importance_file = f"dmoz_30_1500_sITFL.p"
-
- self.feature_importance_sort = 'descending'
-
- self.desired_length = 300
- self.one_side_neighbours = 3
-
- self.beg_part = 0.1
-
- self.end_part = 0.2
-
- self.over_length = 1
-
- self.number_of_important_token_occurrences = 1
-
- self.fill_up_to_limit = True
-
-
- self.cpu_threads = 14
|