utils.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. from tqdm import tqdm
  2. import os
  3. import numpy as np
  4. import torch
  5. #########################################
  6. ############# NLP Utils ###############
  7. #########################################
  8. def is_whitespace(c, use_space=True):
  9. if (c == " " and use_space) or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
  10. return True
  11. return False
  12. def normalize(word, ignore_spaces=True):
  13. chars = []
  14. for c in word.lower():
  15. if c == ' ' and ignore_spaces:
  16. pass
  17. else:
  18. chars.append(c)
  19. return ''.join(chars)
  20. def clean_str(str_in):
  21. str_out = ''
  22. for c in str_in:
  23. if not is_whitespace(c, use_space=False):
  24. str_out += c
  25. return str_out
  26. def compact_text(paragraph_text):
  27. doc_tokens = []
  28. prev_is_whitespace = True
  29. for c in paragraph_text:
  30. if is_whitespace(c):
  31. prev_is_whitespace = True
  32. else:
  33. if prev_is_whitespace:
  34. doc_tokens.append(' ')
  35. doc_tokens.append(c)
  36. prev_is_whitespace = False
  37. else:
  38. doc_tokens.append(c)
  39. prev_is_whitespace = False
  40. return ''.join(doc_tokens)
  41. #########################################
  42. ########### File I/O Utils ############
  43. #########################################
  44. def read_file(txtfile, join=False):
  45. with open(txtfile, 'r') as f:
  46. content = f.readlines()
  47. content = [x.strip() for x in content]
  48. return (' '.join(content) if join else content)
  49. def save2file(filename, listofstrings):
  50. # to read : self.word_vocab = f.read().split("\n")
  51. fp = open(filename, "w")
  52. for i in tqdm(range(len(listofstrings))):
  53. str_state = listofstrings[i] + '\n'
  54. fp.write(str_state)
  55. fp.close()
  56. write_files = save2file
  57. def print_bars(s, num=20):
  58. print("##" * int(num))
  59. print(s)
  60. print("##" * int(num))
  61. #########################################
  62. ########### DataStructure Utils ########
  63. #########################################
  64. class Queue:
  65. def __init__(self):
  66. self.items = []
  67. def isEmpty(self):
  68. return self.items == []
  69. def push(self, item):
  70. self.items.append(item)
  71. def pop(self):
  72. item = self.items[0]
  73. del(self.items[0])
  74. return item
  75. def peek(self):
  76. return self.items[0]
  77. def size(self):
  78. return len(self.items)
  79. #########################################
  80. ########### PyTorch Utils ############
  81. #########################################
  82. def to_tensor(x, cuda=True):
  83. t_x = torch.tensor(x)
  84. if cuda:
  85. t_x = t_x.cuda()
  86. return t_x
  87. def to_numpy(x, cuda=True, var=False):
  88. if var:
  89. x = x.detach()
  90. if cuda:
  91. x = x.cpu()
  92. return x.numpy()
  93. def make_one_hot(n, idx, make_torch=False):
  94. a = np.zeros((n,), dtype=np.float32)
  95. a[idx] = 1.
  96. return torch.tensor(a) if make_torch else a