LiuFan
/
PrivacyScanData


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233
							def _pseudonymize_identifier_only(
    input_csv_path: str,
    unit_id_type: str,
    job_id: str
) -> str:
    unique_identifiers = set()
    with open(input_csv_path, newline='', encoding='utf8') as csv_file:
        for line in csv_file:
            unit_id = line.strip().split(';')[1]
            unique_identifiers.add(unit_id)
    identifier_to_pseudonym = pseudonym_service.pseudonymize(
        list(unique_identifiers), unit_id_type, job_id
    )
    output_csv_path = input_csv_path.replace('.csv', '_pseudonymized.csv')
    target_file = open(output_csv_path, 'w', newline='', encoding='utf-8')
    with open(input_csv_path, newline='', encoding='utf-8') as csv_file:
        for line in csv_file:
            row = line.strip().split(';')
            line_number: int = row[0]
            unit_id: str = row[1]
            value: str = row[2]
            start_date: str = row[3]
            stop_date: str = row[4]
            target_file.write(
                ';'.join([
                    str(line_number),
                    str(identifier_to_pseudonym[unit_id]),
                    value,
                    start_date, stop_date
                ]) + '\n'
            )
    target_file.close()
    return output_csv_path