支持向量机



















import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import *
train_data= pd.read_csv('F:\PY-Learning\CNEWS\cnews\cnews.train.txt', names=['title', 'content'], sep='\t', engine='python', encoding='UTF-8')
test_data = pd.read_csv('F:\PY-Learning\CNEWS\cnews\cnews.test.txt', names=['title', 'content'], sep='\t',engine='python',encoding='UTF-8')
val_data = pd.read_csv('F:\PY-Learning\CNEWS\cnews\cnews.val.txt', names=['title', 'content'], sep='\t',engine='python',encoding='UTF-8')
x_train = train_data['content']
x_test = test_data['content']
x_val = val_data['content']
y_train = train_data['title']
y_test = test_data['title']
y_val = val_data['title']
count_vec = CountVectorizer()
x_count_train = count_vec.fit_transform(x_train )
x_count_test = count_vec.transform(x_test )
count_stop_vec = CountVectorizer(analyzer='word', stop_words='english')
x_count_stop_train = count_stop_vec.fit_transform(x_train)
x_count_stop_test = count_stop_vec.transform(x_test)
mnb_count = SVC()
mnb_count.fit(x_count_train, y_train)
mnb_count_y_predict = mnb_count.predict(x_count_test)
mnb_count.score(x_count_test, y_test)
tfid_vec = TfidfVectorizer()
x_tfid_train = tfid_vec.fit_transform(x_train)
x_tfid_test = tfid_vec.transform(x_test)
mnb_tfid = SVC()
mnb_tfid.fit(x_tfid_train, y_train)
mnb_tfid_y_predict = mnb_tfid.predict(x_tfid_test)
mnb_tfid.score(x_tfid_test, y_test)