from gensim.models import FastText
from sklearn.decomposition import PCA
from matplotlib import pyplot
#Example sentences
sentences = [[‘I’, ‘love’, ‘nlp’],
[‘I’, ‘will’, ‘learn’, ‘nlp’, ‘in’, ‘2’,‘months’],
[‘nlp’, ‘is’, ‘future’],
[ ‘nlp’, ‘saves’, ‘time’, ‘and’, ‘solves’,
‘lot’, ‘of’, ‘industry’, ‘problems’],
[‘nlp’, ‘uses’, ‘machine’, ‘learning’]]
fast = FastText(sentences,size=20, window=1, min_count=1,
workers=5, min_n=1, max_n=2)

vector for word nlp

print(fast[‘nlp’])

fast.save(‘fast.bin’)

load model

fast = Word2Vec.load(‘fast.bin’)

visualize

X = fast[fast.wv.vocab]
pca = PCA(n_components=2)
result = pca.fit_transform(X)

pyplot.scatter(result[:, 0], result[:, 1])
words = list(fast.wv.vocab)
for i, word in enumerate(words):
pyplot.annotate(word, xy=(result[i, 0], result[i, 1]))
pyplot.show()

fasttext for word nlp

相关文章: