12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- def get_unzip_hdfs_file(hdfs_file_url, save_dir):
-
- if os.path.isdir(save_dir):
- pass
- else:
- os.mkdir(save_dir)
-
- filename = hdfs_file_url.split("/").pop()
-
- save_filename = ""
-
- if filename.endswith(".gz"):
- save_filename = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) + ".gz"
- else:
- save_filename = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
-
- if save_dir.endswith("/"):
- save_file = save_dir + save_filename
- else:
- save_file = save_dir + "/" + save_filename
-
- hadoop_get = 'hadoop fs -get %s %s' % (hdfs_file_url, save_file)
- logger.info("download hdfs file cammond: " + hadoop_get)
-
- try:
- os.system(hadoop_get)
- except Exception as e:
- logger.error(e)
- return False
-
- if save_file.endswith(".gz"):
-
- try:
-
- f_name = save_file.replace(".gz", "")
-
- g_file = gzip.GzipFile(save_file)
-
- open(f_name, "w+").write(g_file.read())
-
- g_file.close()
- return f_name
- except Exception as e:
- logger.error(e)
- return False
- else:
- return save_file
|