main_47.py 687 B

123456789101112131415161718
  1. def pseudonymize(self, size=None):
  2. """
  3. Return pseudonymized values for this attribute, which is used to
  4. substitute identifiable data with a reversible, consistent value.
  5. """
  6. size = size or self.size
  7. if size != self.size:
  8. attr = Series(np.random.choice(self.bins, size=size, p=self.prs))
  9. else:
  10. attr = self
  11. if self.categorical:
  12. mapping = {b: utils.pseudonymise_string(b) for b in self.bins}
  13. return attr.map(lambda x: mapping[x])
  14. if self.type == 'string':
  15. return attr.map(utils.pseudonymise_string)
  16. elif self.is_numerical or self.type == 'datetime':
  17. return attr.map(str).map(utils.pseudonymise_string)