utils.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924
  1. import torch
  2. import shutil
  3. import os
  4. def save_checkpoint(state, is_best, file_path, file_name='checkpoint.pth.tar'):
  5. """
  6. Saves the current state of the model. Does a copy of the file
  7. in case the model performed better than previously.
  8. Parameters:
  9. state (dict): Includes optimizer and model state dictionaries.
  10. is_best (bool): True if model is best performing model.
  11. file_path (str): Path to save the file.
  12. file_name (str): File name with extension (default: checkpoint.pth.tar).
  13. """
  14. save_path = os.path.join(file_path, file_name)
  15. torch.save(state, save_path)
  16. if is_best:
  17. shutil.copyfile(save_path, os.path.join(file_path, 'model_best.pth.tar'))
  18. def save_task_checkpoint(file_path, task_num):
  19. """
  20. Saves the current state of the model for a given task by copying existing checkpoint created by the
  21. save_checkpoint function.
  22. Parameters:
  23. file_path (str): Path to save the file,
  24. task_num (int): Number of task increment.
  25. """
  26. save_path = os.path.join(file_path, 'checkpoint_task_' + str(task_num) + '.pth.tar')
  27. shutil.copyfile(os.path.join(file_path, 'checkpoint.pth.tar'), save_path)
  28. def pickle_dump(item, out_file):
  29. with open(out_file, "wb") as opened_file:
  30. pickle.dump(item, opened_file)
  31. def write_to_clf(clf_data, save_file):
  32. # Save dataset for text classification to file.
  33. """
  34. clf_data: List[List[str]] [[text1, label1],[text2,label2]...]
  35. file format: tsv, row: text + tab + label
  36. """
  37. with open(save_file, 'w', encoding='utf-8') as f:
  38. f.writelines("\n".join(["\t".join(str(r) for r in row) for row in clf_data]))
  39. def write_to_seq2seq(seq_data, save_file):
  40. """
  41. clf_data: List[List[str]] [[src1, tgt1],[src2,tgt2]...]
  42. file format: tsv, row: src + tab + tgt
  43. """
  44. with open(save_file, 'w', encoding='utf-8') as f:
  45. f.writelines("\n".join(["\t".join([str(r) for r in row]) for row in seq_data]))
  46. def write_to_ner(cls, ner_data, save_file):
  47. """
  48. :param cls:
  49. :param ner_data:
  50. :param save_file:
  51. :return:
  52. """
  53. with open(save_file, 'w', encoding='utf-8') as f:
  54. f.writelines("\n".join(["\t".join(str(r) for r in row) for row in ner_data]))
  55. def quick_save(self, model, save_name, optimizer=None):
  56. save_path = os.path.join(self.save_dir, save_name + '_weights.pth')
  57. if optimizer:
  58. opt_weights = optimizer.get_weights()
  59. np.save(os.path.join(self.save_dir, save_name + '_opt_weights'), opt_weights)
  60. model.save_weights(save_path, save_format='h5')
  61. def save(self, model, iter_nb, train_metrics_values, test_metrics_values, tasks_weights=[], optimizer=None):
  62. self.logs_dict['train'][str(iter_nb)] = {}
  63. self.logs_dict['val'][str(iter_nb)] = {}
  64. for k in range(len(self.metrics)):
  65. self.logs_dict['train'][str(iter_nb)][self.metrics[k]] = float(train_metrics_values[k])
  66. self.logs_dict['val'][str(iter_nb)][self.metrics[k]] = float(test_metrics_values[k])
  67. if len(tasks_weights) > 0:
  68. for k in range(len(tasks_weights)):
  69. self.logs_dict['val'][str(iter_nb)]['weight_' + str(k)] = tasks_weights[k]
  70. with open(self.logs_file, 'w') as f:
  71. json.dump(self.logs_dict, f)
  72. ckpt = {
  73. 'model_state_dict': model.state_dict(),
  74. 'iter_nb': iter_nb,
  75. }
  76. if optimizer:
  77. ckpt['optimizer_state_dict'] = optimizer.state_dict()
  78. # Saves best miou score if reached
  79. if 'MEAN_IOU' in self.metrics:
  80. miou = float(test_metrics_values[self.metrics.index('MEAN_IOU')])
  81. if miou > self.best_miou and iter_nb > 0:
  82. print('Best miou. Saving it.')
  83. torch.save(ckpt, self.best_miou_weights_file)
  84. self.best_miou = miou
  85. self.config_dict['best_miou'] = self.best_miou
  86. # Saves best relative error if reached
  87. if 'REL_ERR' in self.metrics:
  88. rel_error = float(test_metrics_values[self.metrics.index('REL_ERR')])
  89. if rel_error < self.best_rel_error and iter_nb > 0:
  90. print('Best rel error. Saving it.')
  91. torch.save(ckpt, self.best_rel_error_weights_file)
  92. self.best_rel_error = rel_error
  93. self.config_dict['best_rel_error'] = self.best_rel_error
  94. # Saves last checkpoint
  95. torch.save(ckpt, self.last_checkpoint_weights_file)
  96. self.iter_nb = iter_nb
  97. self.config_dict['iter'] = self.iter_nb
  98. with open(self.config_file, 'w') as f:
  99. json.dump(self.config_dict, f)
  100. def extract_spec(dataset='train'):
  101. f = open(data_path + dataset + '_list.txt', 'r')
  102. i = 0
  103. for file_name in f:
  104. i = i + 1
  105. if not (i % 10):
  106. print(i)
  107. # load audio file
  108. file_name = file_name.rstrip('\n')
  109. file_path = data_path + file_name
  110. # print file_path
  111. y0, sr = librosa.load(file_path, sr=22050)
  112. # we use first 1 second
  113. half = len(y0) / 4
  114. y = y0[:round(half)]
  115. # mfcc
  116. mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=MFCC_DIM)
  117. # delta mfcc and double delta
  118. delta_mfcc = librosa.feature.delta(mfcc)
  119. ddelta_mfcc = librosa.feature.delta(mfcc, order=2)
  120. # STFT
  121. D = np.abs(librosa.core.stft(y, hop_length=512, n_fft=1024, win_length=1024))
  122. D_dB = librosa.amplitude_to_db(D, ref=np.max)
  123. # mel spectrogram
  124. mel_S = librosa.feature.melspectrogram(S=D, sr=sr, n_mels=128)
  125. S_dB = librosa.power_to_db(mel_S, ref=np.max) # log compression
  126. # spectral centroid
  127. spec_centroid = librosa.feature.spectral_centroid(S=D)
  128. # concatenate all features
  129. features = np.concatenate([mfcc, delta_mfcc, ddelta_mfcc, spec_centroid], axis=0)
  130. # save mfcc as a file
  131. file_name = file_name.replace('.wav', '.npy')
  132. save_file = spec_path + file_name
  133. if not os.path.exists(os.path.dirname(save_file)):
  134. os.makedirs(os.path.dirname(save_file))
  135. np.save(save_file, features)
  136. f.close();
  137. def extract_codebook(dataset='train'):
  138. f = open(data_path + dataset + '_list.txt', 'r')
  139. i = 0
  140. for file_name in f:
  141. i = i + 1
  142. if not (i % 10):
  143. print(i)
  144. # load audio file
  145. file_name = file_name.rstrip('\n')
  146. file_path = data_path + file_name
  147. # #print file_path
  148. y0, sr = librosa.load(file_path, sr=22050)
  149. # we use first 1 second
  150. half = len(y0) / 4
  151. y = y0[:round(half)]
  152. # STFT
  153. S_full, phase = librosa.magphase(librosa.stft(y, n_fft=1024, window='hann', hop_length=256, win_length=1024))
  154. n = len(y)
  155. # Check the shape of matrix: row must corresponds to the example index !!!
  156. X = S_full.T
  157. # codebook by using K-Means Clustering
  158. K = 20
  159. kmeans = KMeans(n_clusters=K, random_state=0).fit(X)
  160. features_kmeans = np.zeros(X.shape[0])
  161. # for each sample, summarize feature!!!
  162. codebook = np.zeros(K)
  163. for sample in range(X.shape[0]):
  164. features_kmeans[sample] = kmeans.labels_[sample]
  165. # codebook histogram!
  166. unique, counts = np.unique(features_kmeans, return_counts=True)
  167. for u in unique:
  168. u = int(u)
  169. codebook[u] = counts[u]
  170. # save mfcc as a file
  171. file_name = file_name.replace('.wav', '.npy')
  172. save_file = codebook_path + file_name
  173. if not os.path.exists(os.path.dirname(save_file)):
  174. os.makedirs(os.path.dirname(save_file))
  175. np.save(save_file, codebook)
  176. f.close()
  177. def run(self):
  178. file = QtCore.QFile(self.filePath)
  179. if not file.open(QtCore.QIODevice.WriteOnly):
  180. self.saveFileFinished.emit(SAVE_FILE_ERROR, self.urlStr, self.filePath)
  181. file.write(self.fileData)
  182. file.close()
  183. self.saveFileFinished.emit(0, self.urlStr, self.filePath)
  184. def saveFile(self, fileName, data):
  185. file = QtCore.QFile(fileName)
  186. if not file.open(QtCore.QIODevice.WriteOnly):
  187. return False
  188. file.write(data.readAll())
  189. file.close()
  190. return True
  191. def serialize(self):
  192. """Callback to serialize the array."""
  193. string_file = io.BytesIO()
  194. try:
  195. numpy.save(string_file, self.array, allow_pickle=False)
  196. serialized = string_file.getvalue()
  197. finally:
  198. string_file.close()
  199. return serialized
  200. def train(self, save=False, save_dir=None):
  201. train_img_list = glob.glob(self.path_train + "/*")
  202. print(train_img_list)
  203. train_features = []
  204. for img_file in train_img_list:
  205. img = io.imread(img_file)
  206. img = color.rgb2lab(img)
  207. img_features = self.extract_texton_feature(img, self.fb, self.nb_features)
  208. train_features.extend(img_features)
  209. train_features = np.array(train_features)
  210. print(train_features.shape)
  211. kmeans_cluster = MiniBatchKMeans(n_clusters=self.nb_clusters, verbose=1, max_iter=300)
  212. kmeans_cluster.fit(train_features)
  213. print(kmeans_cluster.cluster_centers_)
  214. print(kmeans_cluster.cluster_centers_.shape)
  215. self.cluster = kmeans_cluster
  216. # save kmeans result
  217. if save is True:
  218. with open(save_dir, 'wb') as f:
  219. pickle.dump(self.cluster, f)
  220. def save(self, event):
  221. if not self.filename:
  222. self.save_as(event)
  223. else:
  224. if self.writefile(self.filename):
  225. self.set_saved(True)
  226. try:
  227. self.editwin.store_file_breaks()
  228. except AttributeError: # may be a PyShell
  229. pass
  230. self.text.focus_set()
  231. return "break"
  232. def writefile(self, filename):
  233. self.fixlastline()
  234. chars = self.encode(self.text.get("1.0", "end-1c"))
  235. if self.eol_convention != "\n":
  236. chars = chars.replace("\n", self.eol_convention)
  237. try:
  238. f = open(filename, "wb")
  239. f.write(chars)
  240. f.flush()
  241. f.close()
  242. return True
  243. except IOError as msg:
  244. tkMessageBox.showerror("I/O Error", str(msg),
  245. master=self.text)
  246. return False
  247. def save_response_content(response,
  248. destination,
  249. file_size=None,
  250. chunk_size=32768):
  251. if file_size is not None:
  252. pbar = tqdm(total=math.ceil(file_size / chunk_size), unit='chunk')
  253. readable_file_size = sizeof_fmt(file_size)
  254. else:
  255. pbar = None
  256. with open(destination, 'wb') as f:
  257. downloaded_size = 0
  258. for chunk in response.iter_content(chunk_size):
  259. downloaded_size += chunk_size
  260. if pbar is not None:
  261. pbar.update(1)
  262. pbar.set_description(f'Download {sizeof_fmt(downloaded_size)} '
  263. f'/ {readable_file_size}')
  264. if chunk: # filter out keep-alive new chunks
  265. f.write(chunk)
  266. if pbar is not None:
  267. pbar.close()
  268. def generateHuman(cloth_list, person_id, sex):
  269. haveAcc = 0
  270. # load acc
  271. hair = open('modeleTxt/hair.txt', 'r').readlines()
  272. shoe = open('modeleTxt/shoe.txt', 'r').readlines()
  273. pifu = open('modeleTxt/skin.txt', 'r').readlines()
  274. if not os.path.exists(person_save_Folder):
  275. os.makedirs(person_save_Folder)
  276. if sex > 0:
  277. Gender1 = 1000000
  278. else:
  279. Gender1 = 0
  280. # setting
  281. Gender = '%.6f' % (Gender1 / 1000000)
  282. Muscle = '%.6f' % (random.randint(0, 1000000) / 1000000)
  283. African_1 = random.randint(0, 1000000)
  284. African = '%.6f' % (African_1 / 1000000)
  285. Asian_1 = random.randint(0, 1000000 - African_1)
  286. Asian = '%.6f' % (Asian_1 / 1000000)
  287. Caucasian = '%.6f' % ((1000000 - Asian_1 - African_1) / 1000000)
  288. if Gender1 > 1000000 / 2:
  289. m_height = random.gauss(170, 5.7) / 200
  290. while m_height > 1:
  291. m_height = random.gauss(170, 5.7) / 200
  292. Height = '%.6f' % (m_height)
  293. else:
  294. m_height = random.gauss(160, 5.2) / 200
  295. while m_height > 1:
  296. m_height = random.gauss(160, 5.2) / 200
  297. Height = '%.6f' % (m_height)
  298. BreastSize = '%.6f' % (random.randint(0, 70) / 100)
  299. Age = '%.6f' % (random.randint(20, 90) / 100)
  300. BreastFirmness = '%.6f' % (random.randint(30, 100) / 100)
  301. Weight = '%.6f' % (random.randint(0, 1000000) / 1000000)
  302. file_name = 'B' + str(person_id)
  303. # creating person file
  304. f = open(person_save_Folder + file_name + ".mhm", 'a')
  305. f.write('# Written by MakeHuman 1.1.1\n')
  306. f.write('version v1.1.1\n')
  307. f.write('tags ' + file_name + '\n')
  308. f.write('camera 0.0 0.0 0.0 0.0 0.0 1.0\n')
  309. f.write('modifier macrodetails-universal/Muscle ' + Muscle + '\n')
  310. f.write('modifier macrodetails/African ' + African + '\n')
  311. f.write('modifier macrodetails-proportions/BodyProportions 0.500000\n')
  312. f.write('modifier macrodetails/Gender ' + Gender + '\n')
  313. f.write('modifier macrodetails-height/Height ' + Height + '\n')
  314. f.write('modifier breast/BreastSize ' + BreastSize + '\n')
  315. f.write('modifier macrodetails/Age ' + Age + '\n')
  316. f.write('modifier breast/BreastFirmness ' + BreastFirmness + '\n')
  317. f.write('modifier macrodetails/Asian ' + Asian + '\n')
  318. f.write('modifier macrodetails/Caucasian ' + Caucasian + '\n')
  319. f.write('modifier macrodetails-universal/Weight ' + Weight + '\n')
  320. f.write('skeleton cmu_mb.mhskel\n')
  321. f.write('eyes HighPolyEyes 2c12f43b-1303-432c-b7ce-d78346baf2e6\n')
  322. # adding clothes
  323. if Gender1 > 1000000 / 2:
  324. f.write(hair[random.randint(0, len(hair) - 1)])
  325. else:
  326. f.write(hair[random.randint(0, len(hair) - 1)])
  327. f.write(shoe[random.randint(0, len(shoe) - 1)])
  328. for i in range(0, len(cloth_list)):
  329. f.write(cloth_list[i] + '\n')
  330. f.write('clothesHideFaces True\n')
  331. f.write(pifu[random.randint(0, len(pifu) - 1)])
  332. f.write('material Braid01 eead6f99-d6c6-4f6b-b6c2-210459d7a62e braid01.mhmat\n')
  333. f.write('material HighPolyEyes 2c12f43b-1303-432c-b7ce-d78346baf2e6 eyes/materials/brown.mhmat\n')
  334. f.write('subdivide False\n')
  335. def notice_write(request):
  336. if request.method == 'POST':
  337. form = ContentForm(request.POST)
  338. form_file = FileForm(request.POST, request.FILES)
  339. if form.is_valid():
  340. question = form.save(commit=False)
  341. question.author = request.user
  342. question.create_date = timezone.now()
  343. question.boardname_id = 7
  344. question.save()
  345. if form_file.is_valid():
  346. form_file = FileForm(request.POST, request.FILES)
  347. file_save = form_file.save(commit=False)
  348. file_save.author = request.user
  349. file_save.postcontent = question
  350. file_save.boardname_id = 7
  351. file_save.file = request.FILES.get("file")
  352. file_save.save()
  353. return redirect('notice_view')
  354. return render(request, 'notice_write.html')
  355. def test_write(request):
  356. if request.method == 'POST':
  357. form = ContentForm(request.POST)
  358. form_file = FileForm(request.POST, request.FILES)
  359. if form.is_valid():
  360. question = form.save(commit=False)
  361. question.author = request.user
  362. question.create_date = timezone.now()
  363. question.boardname_id = 14
  364. question.save()
  365. if form_file.is_valid():
  366. form_file = FileForm(request.POST, request.FILES)
  367. file_save = form_file.save(commit=False)
  368. file_save.author = request.user
  369. file_save.postcontent = question
  370. file_save.boardname_id = 14
  371. file_save.file = request.FILES.get("file")
  372. file_save.save()
  373. return redirect('test_list')
  374. return render(request, 'test_write.html')
  375. def down_file(url, name, path):
  376. if os.path.exists(path):
  377. return
  378. print("开始下载:" + name + ".mp3")
  379. headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
  380. "Accept-Encoding": "gzip, deflate, br",
  381. "Accept-Language": "zh-CN,zh;q=0.9",
  382. "Upgrade-Insecure-Requests": "1",
  383. 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
  384. count = 0
  385. while count < 3:
  386. try:
  387. r = requests.get(url, headers=headers, stream=True, timeout=60)
  388. # print(r.status_code)
  389. if (r.status_code == 200):
  390. with open(path, "wb+") as f:
  391. for chunk in r.iter_content(1024):
  392. f.write(chunk)
  393. print("完成下载:" + name + ".mp3")
  394. break
  395. except Exception as e:
  396. print(e)
  397. print("下载出错:" + name + ".mp3,3秒后重试")
  398. if os.path.exists(path):
  399. os.remove(path)
  400. time.sleep(3)
  401. count += 1
  402. pass
  403. def save_as():
  404. global file_name
  405. content = content_text.get(1.0, 'end')
  406. with open(file_name, 'w') as save:
  407. save.write(content)
  408. def export_save(data_player, data_kick, guild_id, save_name=""):
  409. if save_name: save_name = "_" + save_name
  410. print(" - Partie enregistrée -")
  411. with open(f"saves/save{save_name}.json", "w") as file:
  412. file.write(json.dumps(
  413. {
  414. "players": [data_player[player_id].export() for player_id in data_player],
  415. "kicks": data_kick,
  416. "guild_id": guild_id
  417. }, indent=4))
  418. def conv(heic_path, save_dir, filetype, quality):
  419. # 保存先のディレクトリとファイル名
  420. extension = "." + filetype
  421. save_path = save_dir / filetype / pathlib.Path(*heic_path.parts[1:]).with_suffix(extension)
  422. # フォルダ作成
  423. save_path.parent.mkdir(parents=True, exist_ok=True)
  424. # HEICファイルpyheifで読み込み
  425. heif_file = pyheif.read(heic_path)
  426. # 読み込んだファイルの中身をdata変数へ
  427. data = Image.frombytes(
  428. heif_file.mode,
  429. heif_file.size,
  430. heif_file.data,
  431. "raw",
  432. heif_file.mode,
  433. heif_file.stride,
  434. )
  435. # JPEGで保存
  436. data.save(save_path, quality=quality)
  437. print("保存:", save_path)
  438. def parsing_sravni_ru(soup):
  439. names = soup.find_all('span', class_='_106rrj0') # scraping names
  440. # scraping age childrens
  441. age_divs = soup.find_all('div', {'style': 'grid-area:firstCell-1', 'class': '_pjql8'})
  442. ages = []
  443. for i in age_divs:
  444. age_span = i.find('span')
  445. ages.append(age_span)
  446. # scraping course duration
  447. duration_divs = soup.find_all('div', {'style': 'grid-area:secondCell-1', 'class': '_pjql8'})
  448. durations = []
  449. for i in duration_divs:
  450. duration_span = i.find('span')
  451. durations.append(duration_span)
  452. # scraping price
  453. prices = soup.find_all('span', class_='_e9qrci _k8dl2y')
  454. items = []
  455. for (n, l, i, p) in zip(names, ages, durations, prices):
  456. name = n.text.strip()
  457. age = l.text.strip()
  458. duration = i.text.strip()
  459. price = p.text.strip().replace('\xa0', '')
  460. items.append(
  461. {
  462. 'name': name,
  463. 'age': age,
  464. 'duration': duration,
  465. 'price': price,
  466. }
  467. )
  468. # save json file
  469. with open("./data/items.json", "w", encoding="utf-8") as f:
  470. json.dump(items, f, indent=4, ensure_ascii=False)
  471. with open("./data/items.csv", 'a', encoding="utf-8") as file:
  472. for i in items:
  473. writer = csv.writer(file)
  474. writer.writerow(
  475. (
  476. i['name'],
  477. i['age'],
  478. i['duration'],
  479. i['price']
  480. )
  481. )
  482. def save_to_file(self, path):
  483. with open(path, "w") as f:
  484. f.write(self.cert_pem())
  485. f.write(self.key_pem())
  486. def save_cert_to_file(self, path):
  487. with open(path, "w") as f:
  488. f.write(self.cert_pem())
  489. def _save_large_file(self, os_path, content, format):
  490. """Save content of a generic file."""
  491. if format not in {'text', 'base64'}:
  492. raise web.HTTPError(
  493. 400,
  494. "Must specify format of file contents as 'text' or 'base64'",
  495. )
  496. try:
  497. if format == 'text':
  498. bcontent = content.encode('utf8')
  499. else:
  500. b64_bytes = content.encode('ascii')
  501. bcontent = base64.b64decode(b64_bytes)
  502. except Exception as e:
  503. raise web.HTTPError(
  504. 400, u'Encoding error saving %s: %s' % (os_path, e)
  505. )
  506. with self.perm_to_403(os_path):
  507. if os.path.islink(os_path):
  508. os_path = os.path.join(os.path.dirname(os_path), os.readlink(os_path))
  509. with io.open(os_path, 'ab') as f:
  510. f.write(bcontent)
  511. def get_unzip_hdfs_file(hdfs_file_url, save_dir):
  512. # 判断保存路径是否存在,不存在的话创建此目录
  513. if os.path.isdir(save_dir):
  514. pass
  515. else:
  516. os.mkdir(save_dir)
  517. # hdfs文件名
  518. filename = hdfs_file_url.split("/").pop()
  519. # 保存到本地的文件名
  520. save_filename = ""
  521. # 判断是否为压缩文件
  522. if filename.endswith(".gz"):
  523. save_filename = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) + ".gz"
  524. else:
  525. save_filename = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
  526. # 判断保存路径最后是否有/
  527. if save_dir.endswith("/"):
  528. save_file = save_dir + save_filename
  529. else:
  530. save_file = save_dir + "/" + save_filename
  531. # 生成下载hdfs文件的命令
  532. hadoop_get = 'hadoop fs -get %s %s' % (hdfs_file_url, save_file)
  533. logger.info("download hdfs file cammond: " + hadoop_get)
  534. # shell执行生成的hdfs命令
  535. try:
  536. os.system(hadoop_get)
  537. except Exception as e:
  538. logger.error(e)
  539. return False
  540. # 判断下载的hdfs文件是否为压缩文件
  541. if save_file.endswith(".gz"):
  542. # 对此压缩文件进行压缩
  543. try:
  544. # 解压后的文件名
  545. f_name = save_file.replace(".gz", "")
  546. # 解压缩
  547. g_file = gzip.GzipFile(save_file)
  548. # 写入文件
  549. open(f_name, "w+").write(g_file.read())
  550. # 关闭文件流
  551. g_file.close()
  552. return f_name
  553. except Exception as e:
  554. logger.error(e)
  555. return False
  556. else:
  557. return save_file
  558. """
  559. 根据HDFS文件目录下载此目录下所有的文件
  560. 参数说明:
  561. hdfs_dir:HDFS文件目录
  562. save_dir:要保存的目录
  563. 返回结果说明:执行成功返回True,执行失败返回False
  564. """
  565. def get_unzip_hdfs_file_from_dir(hdfs_dir, save_dir):
  566. # 命令:获取hdfs目录下的文件
  567. hadoop_ls = "hadoop fs -ls %s | grep -i '^-'" % hdfs_dir
  568. # 解压后的文件列表
  569. save_file_list = []
  570. # 执行shell命令
  571. hdfs_result = exec_sh(hadoop_ls, None)
  572. # 获取命令执行输出
  573. hdfs_stdout = hdfs_result["stdout"]
  574. # print("hdfs_stdout = " + hdfs_stdout)
  575. # 要下载的HDFS文件列表
  576. hdfs_list = []
  577. # 判断是否有输出
  578. if hdfs_stdout:
  579. # 以行分割, 一行是一个文件的信息
  580. hdfs_lines = hdfs_stdout.split("\n")
  581. # 对每一行进行处理
  582. for line in hdfs_lines:
  583. # 以空白字符为分割符获取hdfs文件名
  584. line_list = re.split("\s+", line)
  585. # -rw-r--r-- 2 caoweidong supergroup 42815 2017-01-23 14:20 /user/000000_0.gz
  586. if line_list.__len__() == 8:
  587. # print("line_list[7] = " + line_list[7])
  588. # HDFS文件加入下载列表
  589. hdfs_list.append(line_list[7])
  590. else:
  591. pass
  592. # 下载文件
  593. for file in hdfs_list:
  594. save_filename = get_unzip_hdfs_file(file, save_dir)
  595. save_file_list.append(save_filename)
  596. return save_file_list
  597. else:
  598. return False
  599. def save_game(self):
  600. save_file = open("saves/main_save.xml", "w+")
  601. level = self.save_level()
  602. self.tree.append(level)
  603. team = self.save_team()
  604. self.tree.append(team)
  605. # Store XML tree in file
  606. save_file.write(etree.tostring(self.tree, pretty_print=True, encoding="unicode"))
  607. save_file.close()
  608. def save_upload_file(
  609. self,
  610. file: UploadFile,
  611. save_dir_path: pathlib.Path,
  612. job_id: str,
  613. dt_string: str,
  614. ) -> pathlib.Path:
  615. """Save `file` under `save_dir_path`.
  616. Args:
  617. file (UploadFile): A file want to save.
  618. save_dir_path (pathlib.Path): A path to directory where file will be saved.
  619. job_id (str): A job id. This will used part of filename.
  620. dt_string (str): A datetime info. This will used part of filename.
  621. Return:
  622. pathlib.Path: A path where file is saved.
  623. """
  624. if not save_dir_path.exists():
  625. save_dir_path.mkdir(parents=True, exist_ok=True)
  626. save_path: Final = save_dir_path / f"{dt_string}_{job_id}_{file.filename}"
  627. try:
  628. with save_path.open("wb") as f:
  629. shutil.copyfileobj(file.file, f)
  630. finally:
  631. file.file.close()
  632. return save_path
  633. def save_output(output, list_to_save):
  634. if not output:
  635. with open(output, "w") as f:
  636. for item in list_to_save:
  637. f.write("%s\n" % item)
  638. print(f"Output file: {output}")
  639. def _saveTestWavFile(self, filename, wav_data):
  640. with open(filename, "wb") as f:
  641. file_path = os.path.join(dir_name, "some_audio_%d.wav" % i)
  642. self._saveTestWavFile(file_path, wav_data)
  643. def _save_large_file(self, os_path, content, format):
  644. """Save content of a generic file."""
  645. if format not in {'text', 'base64'}:
  646. raise web.HTTPError(
  647. 400,
  648. "Must specify format of file contents as 'text' or 'base64'",
  649. )
  650. try:
  651. if format == 'text':
  652. bcontent = content.encode('utf8')
  653. else:
  654. b64_bytes = content.encode('ascii')
  655. bcontent = base64.b64decode(b64_bytes)
  656. except Exception as e:
  657. raise web.HTTPError(
  658. 400, u'Encoding error saving %s: %s' % (os_path, e)
  659. )
  660. with self.perm_to_403(os_path):
  661. if os.path.islink(os_path):
  662. os_path = os.path.join(os.path.dirname(os_path), os.readlink(os_path))
  663. with io.open(os_path, 'ab') as f:
  664. f.write(bcontent)
  665. def _post_save_script(model, os_path, contents_manager, **kwargs):
  666. """convert notebooks to Python script after save with nbconvert
  667. replaces `jupyter notebook --script`
  668. """
  669. from nbconvert.exporters.script import ScriptExporter
  670. warnings.warn("`_post_save_script` is deprecated and will be removed in Notebook 5.0", DeprecationWarning)
  671. if model['type'] != 'notebook':
  672. return
  673. global _script_exporter
  674. if _script_exporter is None:
  675. _script_exporter = ScriptExporter(parent=contents_manager)
  676. log = contents_manager.log
  677. base, ext = os.path.splitext(os_path)
  678. script, resources = _script_exporter.from_filename(os_path)
  679. script_fname = base + resources.get('output_extension', '.txt')
  680. log.info("Saving script /%s", to_api_path(script_fname, contents_manager.root_dir))
  681. with io.open(script_fname, 'w', encoding='utf-8') as f:
  682. f.write(script)
  683. def _save_data(filename, data):
  684. """
  685. Save formatted skeleton data to a pickle file
  686. """
  687. if filename[-2:] == ".p":
  688. filename = filename
  689. else:
  690. filename = str(filename + ".p")
  691. with open(filename, 'wb') as fp:
  692. pickle.dump(data, fp, protocol=pickle.HIGHEST_PROTOCOL)
  693. print("Saved data to file: " + filename)
  694. def download_unknowns(url: str) -> None:
  695. """."""
  696. page_content: bytes = get_none_soup(url)
  697. page_string: bytes = page_content[0:100]
  698. """parse section of page bytes and use as name. If unknown encoding
  699. convert to number string (exclude first few bytes that state filetype) """
  700. try:
  701. page_unicode = page_string.decode("ISO-8859-1").replace(R'%', '_')
  702. page_parsed = [char for char in page_unicode if char.isalnum() or char == '_']
  703. unknown_file_name = "".join(page_parsed)[10:30]
  704. except UnicodeDecodeError:
  705. try:
  706. page_unicode = page_string.decode('utf-8').replace(R'%', '_')
  707. page_parsed = [char for char in page_unicode if char.isalnum() or char == '_']
  708. unknown_file_name = "".join(page_parsed)[10:30]
  709. except UnicodeDecodeError:
  710. unknown_file_name = "unk_"
  711. for char in page_content[10:30]:
  712. if char != b'\\':
  713. unknown_file_name += str(char)
  714. print(unknown_file_name)
  715. """check beginning of page bytes for a filetype"""
  716. if b'%PDF' in page_string: # ;
  717. extension = '.pdf'
  718. else:
  719. extension = '.unk.txt'
  720. with open(save_file_dir + '/' + unknown_file_name + extension, 'wb') as file:
  721. file.write(page_content) # ; print(save_file_dir)
  722. def download_images(start_url: str, filetypes: List[str]) -> None:
  723. """.."""
  724. base_url = get_base_url(start_url)
  725. # print(start_url)
  726. soup = get_soup(start_url) # ;print(soup)
  727. if soup is not None:
  728. for index, image in enumerate(soup.select('img')): # print(image)
  729. # image_raw = str(image)
  730. src_raw = str(image.get('src')) # print(image.attrs['src'])
  731. if src_raw.startswith('http'):
  732. image_url = src_raw
  733. elif src_raw.startswith('/'):
  734. image_url = base_url + src_raw
  735. else:
  736. image_url = src_raw
  737. # print(image_url)
  738. for image_type in filter(lambda x: x in src_raw, filetypes): # print(image)
  739. image_response = requests.get(image_url, stream=True)
  740. if image_response.status_code == 200:
  741. image_name = re.sub(r'.*/', '', src_raw).replace(R'.', '_')
  742. # print(image_name, index)
  743. fp: BinaryIO = open(save_image_dir + '/' + image_name + str(index) + image_type, 'wb')
  744. fp.write(image_response.content)
  745. fp.close()
  746. # i = Image.open(BytesIO(image_response.content))
  747. # i.save(image_name)
  748. def _unicode_save(self, temp_file):
  749. im = pygame.Surface((10, 10), 0, 32)
  750. try:
  751. with open(temp_file, "w") as f:
  752. pass
  753. os.remove(temp_file)
  754. except IOError:
  755. raise unittest.SkipTest("the path cannot be opened")
  756. self.assertFalse(os.path.exists(temp_file))
  757. try:
  758. imageext.save_extended(im, temp_file)
  759. self.assertGreater(os.path.getsize(temp_file), 10)
  760. finally:
  761. try:
  762. os.remove(temp_file)
  763. except EnvironmentError:
  764. pass