def _pseudonymize_identifier_and_measure( input_csv_path: str, identifier_unit_id_type: str, measure_unit_id_type: str, job_id: str ) -> str: unique_idents = set() unique_measure_values = set() with open(input_csv_path, newline='', encoding='utf-8') as csv_file: for line in csv_file: row = line.strip().split(';') unit_id = row[1] value = row[2] unique_idents.add(unit_id) unique_measure_values.add(value) identifier_to_pseudonym = pseudonym_service.pseudonymize( list(unique_idents), identifier_unit_id_type, job_id ) value_to_pseudonym = pseudonym_service.pseudonymize( list(unique_measure_values), measure_unit_id_type, job_id ) output_csv_path = input_csv_path.replace('.csv', '_pseudonymized.csv') target_file = open(output_csv_path, 'w', newline='', encoding='utf-8') with open(input_csv_path, newline='', encoding='utf-8') as csv_file: for line in csv_file: row = line.strip().split(';') line_number: int = row[0] unit_id: str = row[1] value: str = row[2] start_date: str = row[3] stop_date: str = row[4] target_file.write( ';'.join([ str(line_number), str(identifier_to_pseudonym[unit_id]), str(value_to_pseudonym[value]), start_date, stop_date ]) + '\n' ) target_file.close() return output_csv_path