1234567891011121314151617181920212223242526272829303132 |
- def delete_matches_from_json_file(input_file, to_delete, compressed=False):
- deleted_rows = 0
- with BufferOutputStream() as out_stream:
- input_file, writer = initialize(input_file, out_stream, compressed)
- content = input_file.read().decode("utf-8")
- total_rows = 0
- for parsed, line in json_lines_iterator(content, include_unparsed=True):
- total_rows += 1
- should_delete = False
- for column in to_delete:
- if column["Type"] == "Simple":
- record = get_value(column["Column"], parsed)
- if record and record in column["MatchIds"]:
- should_delete = True
- break
- else:
- matched = []
- for col in column["Columns"]:
- record = get_value(col, parsed)
- if record:
- matched.append(record)
- if matched in column["MatchIds"]:
- should_delete = True
- break
- if should_delete:
- deleted_rows += 1
- else:
- writer.write(bytes(line + "\n", "utf-8"))
- if compressed:
- writer.close()
- stats = Counter({"ProcessedRows": total_rows, "DeletedRows": deleted_rows})
- return out_stream, stats
|