123456789101112131415161718192021222324252627282930313233 |
- #!/bin/env/python
- import pandas as pd
- import numpy as np
- import json
- from datetime import datetime
- from zat.log_to_dataframe import LogToDataFrame
- import glob
- import os
- import sys
- logFile = sys.argv[1]
- baseName = sys.argv[2]
- s3Bucket = sys.argv[3]
- # Zat hangs if the file is not in ascii or if it is a conn-summary file
- if baseName == 'conn-summary':
- print("conn-summary detected, exiting")
- sys.exit()
- current_date = datetime.now()
- dateDay = current_date.strftime('%d')
- dateMonth = current_date.strftime('%m')
- dateYear = current_date.strftime('%Y')
- dateStr = current_date.strftime('%Y-%m-%d-%H')
- print(logFile)
- log_to_df = LogToDataFrame()
- df = log_to_df.create_dataframe(logFile)
- # add a type to make analysis easier in combined dataframes
- df['type']=baseName
- df.to_parquet(s3Bucket + dateYear + '/' + dateMonth + '/' + dateDay + '/' + baseName + '-' + dateStr + '.parquet',engine='fastparquet', compression='gzip')
|