dataFrameAnonymizer_2.py 792 B

123456789101112131415161718192021
  1. def anonymize(self, df, k, l=0):
  2. # Check inputs
  3. if df is None or len(df) == 0:
  4. raise Exception("Dataframe is empty")
  5. if self.sensitive_attribute_columns is None or len(self.sensitive_attribute_columns) == 0:
  6. raise Exception("Provide at least one sensitive attribute column")
  7. if not self.feature_columns:
  8. self.init_feature_colums(df)
  9. if self.avg_columns:
  10. for c in self.avg_columns:
  11. if not is_numeric_dtype(df[c]):
  12. raise Exception("Column " + c + " is not numeric and average cannot be calculated.")
  13. mondrian = MondrianAnonymizer(df, self.feature_columns, self.sensitive_attribute_columns)
  14. partitions = mondrian.partition(k, l)
  15. dfa = self.build_anonymized_dataframe(df, partitions)
  16. return dfa