示例代码如下
#
-*- coding: utf-8 -*- import os import sys reload(sys) sys.setdefaultencoding('utf-8') from gensim.models import word2vec def main(): #原始搜狗语料路径 input_file=ur"/users1/ymli/wlj/dataset/corpus/sogou_seg_all_ban.txt" sentences = word2vec.Text8Corpus(input_file) model = word2vec.Word2Vec(sentences, sg=1, size=100, window=5, min_count=5, negative=3, sample=0.001, hs=1, workers=4) model.save("./sogou_phrase/sogou_phrase.model") if __name__=="__main__": main()

 

相关文章:

  • 2021-08-06
  • 2021-05-02
  • 2021-05-03
  • 2021-06-15
猜你喜欢
  • 2022-12-23
  • 2021-04-18
  • 2021-10-05
  • 2022-12-23
  • 2022-12-23
  • 2021-07-16
  • 2021-10-05
相关资源
相似解决方案