123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- from decimal import Decimal
- import pyarrow as pa
- import pyarrow.parquet as pq
- import pandas as pd
- """
- Script to generate the basic.parquet file used by acceptance tests
- """
- filename = "basic.parquet"
- df = pd.DataFrame(
- {
- "customer_id": ["12345", "23456", "34567"],
- "customerId": [12345, 23456, 34567],
- "customer_id_decimal": [
- Decimal("123.450"),
- Decimal("234.560"),
- Decimal("345.670"),
- ],
- "user_info": [
- {
- "personal_information": {
- "email": "12345@test.com",
- "first_name": "John",
- "last_name": "Doe",
- }
- },
- {
- "personal_information": {
- "email": "23456@test.com",
- "first_name": "Jane",
- "last_name": "Doe",
- }
- },
- {
- "personal_information": {
- "email": "34567@test.com",
- "first_name": "Mary",
- "last_name": "Smith",
- }
- },
- ],
- "days_off": [
- ["2020-01-01", "2020-01-02"],
- ["2020-01-01", "2020-01-07"],
- ["2020-01-05"],
- ],
- }
- )
- table = pa.Table.from_pandas(df)
- pq.write_table(table, filename)
- parquet_file = pq.ParquetFile(filename)
- schema = parquet_file.metadata.schema.to_arrow_schema().remove_metadata()
- table2 = parquet_file.read()
- df = table2.to_pandas()
- print("File written. Data:\n\n{}".format(df))
|