123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- from tqdm import tqdm
- import os
- import numpy as np
- import torch
- #########################################
- ############# NLP Utils ###############
- #########################################
- def is_whitespace(c, use_space=True):
- if (c == " " and use_space) or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
- return True
- return False
- def normalize(word, ignore_spaces=True):
- chars = []
- for c in word.lower():
- if c == ' ' and ignore_spaces:
- pass
- else:
- chars.append(c)
- return ''.join(chars)
- def clean_str(str_in):
- str_out = ''
- for c in str_in:
- if not is_whitespace(c, use_space=False):
- str_out += c
- return str_out
- def compact_text(paragraph_text):
- doc_tokens = []
- prev_is_whitespace = True
- for c in paragraph_text:
- if is_whitespace(c):
- prev_is_whitespace = True
- else:
- if prev_is_whitespace:
- doc_tokens.append(' ')
- doc_tokens.append(c)
- prev_is_whitespace = False
- else:
- doc_tokens.append(c)
- prev_is_whitespace = False
- return ''.join(doc_tokens)
- #########################################
- ########### File I/O Utils ############
- #########################################
- def read_file(txtfile, join=False):
- with open(txtfile, 'r') as f:
- content = f.readlines()
- content = [x.strip() for x in content]
- return (' '.join(content) if join else content)
- def save2file(filename, listofstrings):
- # to read : self.word_vocab = f.read().split("\n")
- fp = open(filename, "w")
- for i in tqdm(range(len(listofstrings))):
- str_state = listofstrings[i] + '\n'
- fp.write(str_state)
- fp.close()
- write_files = save2file
- def print_bars(s, num=20):
- print("##" * int(num))
- print(s)
- print("##" * int(num))
- #########################################
- ########### DataStructure Utils ########
- #########################################
- class Queue:
- def __init__(self):
- self.items = []
- def isEmpty(self):
- return self.items == []
- def push(self, item):
- self.items.append(item)
- def pop(self):
- item = self.items[0]
- del(self.items[0])
- return item
- def peek(self):
- return self.items[0]
- def size(self):
- return len(self.items)
- #########################################
- ########### PyTorch Utils ############
- #########################################
- def to_tensor(x, cuda=True):
- t_x = torch.tensor(x)
- if cuda:
- t_x = t_x.cuda()
- return t_x
- def to_numpy(x, cuda=True, var=False):
- if var:
- x = x.detach()
- if cuda:
- x = x.cpu()
- return x.numpy()
- def make_one_hot(n, idx, make_torch=False):
- a = np.zeros((n,), dtype=np.float32)
- a[idx] = 1.
- return torch.tensor(a) if make_torch else a
|