json_handler_4.py 1.4 KB

1234567891011121314151617181920212223242526272829303132
  1. def delete_matches_from_json_file(input_file, to_delete, compressed=False):
  2. deleted_rows = 0
  3. with BufferOutputStream() as out_stream:
  4. input_file, writer = initialize(input_file, out_stream, compressed)
  5. content = input_file.read().decode("utf-8")
  6. total_rows = 0
  7. for parsed, line in json_lines_iterator(content, include_unparsed=True):
  8. total_rows += 1
  9. should_delete = False
  10. for column in to_delete:
  11. if column["Type"] == "Simple":
  12. record = get_value(column["Column"], parsed)
  13. if record and record in column["MatchIds"]:
  14. should_delete = True
  15. break
  16. else:
  17. matched = []
  18. for col in column["Columns"]:
  19. record = get_value(col, parsed)
  20. if record:
  21. matched.append(record)
  22. if matched in column["MatchIds"]:
  23. should_delete = True
  24. break
  25. if should_delete:
  26. deleted_rows += 1
  27. else:
  28. writer.write(bytes(line + "\n", "utf-8"))
  29. if compressed:
  30. writer.close()
  31. stats = Counter({"ProcessedRows": total_rows, "DeletedRows": deleted_rows})
  32. return out_stream, stats