加载词向量
作者:互联网
import numpy as np
from numpy import dtype, fromstring, float32 as REAL
#fname=glove.refine.txt
class WordLoader(object):
def load_word_vector(self, fname, binary=None):
if binary == None:
if fname.endswith('.txt'):
binary = False
elif fname.endswith('.bin'):
binary = True
else:
raise NotImplementedError('Cannot infer binary from %s' % (fname))#抛出未实现异常
vocab = {}
with open(fname) as fin:
header = fin.readline()
vocab_size, vec_size = map(int, header.split())
if binary:#如果是二进制文件
binary_len = dtype(REAL).itemsize * vec_size#dtype(REAL).itemsize字节大小4
for line_no in range(vocab_size):
try:
word = []
while True:
ch = fin.read(1)
if ch == ' ':
word = ''.join(word)
break
if ch != '\n':
word.append(ch)
vocab[word] = fromstring(fin.read(binary_len), dtype=REAL)
except:
pass
else:
for line_no, line in enumerate(fin):
try:
parts = line.strip().split(' ')
word, weights = parts[0], map(REAL, parts[1:])
vocab[word] = weights
except:
pass
return vocab
标签:vocab,binary,word,REAL,fname,fin,向量,加载 来源: https://blog.51cto.com/u_14540820/2759518