def delete_matches_from_json_file(input_file, to_delete, compressed=False): deleted_rows = 0 with BufferOutputStream() as out_stream: input_file, writer = initialize(input_file, out_stream, compressed) content = input_file.read().decode("utf-8") total_rows = 0 for parsed, line in json_lines_iterator(content, include_unparsed=True): total_rows += 1 should_delete = False for column in to_delete: if column["Type"] == "Simple": record = get_value(column["Column"], parsed) if record and record in column["MatchIds"]: should_delete = True break else: matched = [] for col in column["Columns"]: record = get_value(col, parsed) if record: matched.append(record) if matched in column["MatchIds"]: should_delete = True break if should_delete: deleted_rows += 1 else: writer.write(bytes(line + "\n", "utf-8")) if compressed: writer.close() stats = Counter({"ProcessedRows": total_rows, "DeletedRows": deleted_rows}) return out_stream, stats