123456789101112131415161718192021222324252627282930313233 |
- def _pseudonymize_identifier_only(
- input_csv_path: str,
- unit_id_type: str,
- job_id: str
- ) -> str:
- unique_identifiers = set()
- with open(input_csv_path, newline='', encoding='utf8') as csv_file:
- for line in csv_file:
- unit_id = line.strip().split(';')[1]
- unique_identifiers.add(unit_id)
- identifier_to_pseudonym = pseudonym_service.pseudonymize(
- list(unique_identifiers), unit_id_type, job_id
- )
- output_csv_path = input_csv_path.replace('.csv', '_pseudonymized.csv')
- target_file = open(output_csv_path, 'w', newline='', encoding='utf-8')
- with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
- for line in csv_file:
- row = line.strip().split(';')
- line_number: int = row[0]
- unit_id: str = row[1]
- value: str = row[2]
- start_date: str = row[3]
- stop_date: str = row[4]
- target_file.write(
- ';'.join([
- str(line_number),
- str(identifier_to_pseudonym[unit_id]),
- value,
- start_date, stop_date
- ]) + '\n'
- )
- target_file.close()
- return output_csv_path
|