【发布时间】:2016-06-24 08:08:18
【问题描述】:
几天前我已经开始在 weka 上工作了。 (即我也是 weka 和 java 的新手)
我正在使用 weka 为推文寻找评估措施。运行程序时出现以下错误:
weka.classifiers.bayes.NaiveBayesMultinomialUpdateable:无法处理字符串属性! weka.core.UnsupportedAttributeTypeException:weka.classifiers.bayes.NaiveBayesMultinomialUpdateable:无法处理字符串属性! 在 weka.core.Capabilities.test(未知来源) 在 weka.core.Capabilities.test(未知来源) 在 weka.core.Capabilities.test(未知来源) 在 weka.core.Capabilities.test(未知来源) 在 weka.core.Capabilities.testWithFail(未知来源) 在 weka.classifiers.bayes.NaiveBayesMultinomialUpdateable.buildClassifier(未知来源) 在 com.ConnectGlobe.TextDirectoryToArff.main(TextDirectoryToArff.java:83)
计划:
package com.ConnectGlobe;
import java.io.*;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.bayes.NaiveBayesMultinomialUpdateable;
import weka.core.*;
/**
*
* @author sv
*/
public class TextDirectoryToArff {
public Instances createDataset(String directoryPath) throws Exception {
FastVector atts = new FastVector(2);
atts.addElement(new Attribute("filename", (FastVector) null));
atts.addElement(new Attribute("contents", (FastVector) null));
Instances data = new Instances("text_files_in_" + directoryPath, atts, 0);
File dir = new File(directoryPath);
String[] files = dir.list();
for (int i = 0; i < files.length; i++) {
if (files[i].endsWith(".txt")) {
try {
double[] newInst = new double[2];
newInst[0] = (double)data.attribute(0).addStringValue(files[i]);
File txt = new File(directoryPath + File.separator + files[i]);
InputStreamReader is;
is = new InputStreamReader(new FileInputStream(txt));
StringBuffer txtStr = new StringBuffer();
int c;
while ((c = is.read()) != -1) {
txtStr.append((char)c);
}
newInst[1] = (double)data.attribute(1).addStringValue(txtStr.toString());
data.add(new Instance(1.0, newInst));
} catch (Exception e) {
//System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]);
}
}
}
return data;
}
public static void main(String[] args) {
TextDirectoryToArff tdta1 = new TextDirectoryToArff();
TextDirectoryToArff tdta2 = new TextDirectoryToArff();
try {
Instances dataset1 = tdta1.createDataset("C:\\1"); // .txt file will be loaded
dataset1.setClassIndex(dataset1.numAttributes() - 1 );
Instances dataset2 = tdta2.createDataset("C:\\2");
dataset2.setClassIndex(dataset1.numAttributes() - 1);
System.out.println(dataset1);
System.out.println(dataset2);
double precision = 0, recall=0,fmeasure=0,error=0;
int size1 = dataset1.numInstances() / 10;
int size2 = dataset2.numInstances() / 10;
int begin = 0;
int end = size1 - 1 ;
for (int i=1 ; i<=10;i++)
{
System.out.println("iteration :" + 1);
Instances training = new Instances(dataset1);
Instances testing = new Instances(dataset1, begin , (end - begin));
for (int j=0;j < (end - begin); j++)
training.delete(begin);
Classifier tree = (Classifier)new NaiveBayesMultinomialUpdateable();
Instances filteredInstaces = training;
StringToNominal nominal ;
for(int a=0;a<training.numAttributes()-1;a++)
{
if(training.attribute(a).isString())
{
nominal = new StringToNominal();
nominal.setInputFormat(filteredInstaces);
training = Filter.useFilter(training, nominal);
}
}
tree.buildClassifier(filteredInstaces);
Evaluation eval = new Evaluation(testing);
eval.evaluateModel(tree, testing);
System.out.println("Precision:" + eval.precision(1));
System.out.println("Recall:" + eval.recall(1));
System.out.println("Fmeasure:" + eval.fMeasure(1));
System.out.println("Error:" + eval.errorRate());
precision += eval.precision(1);
recall += eval.recall(1);
fmeasure += eval.fMeasure(1);
error += eval.errorRate();
//update
begin = end + 1;
end+= size1;
if(i==(9))
{
end = dataset1.numInstances();
}
System.out.println("Precision:" + precision/10.0);
System.out.println("Recall:" + recall/10.0);
System.out.println("Fmeasure:" + fmeasure/10.0);
System.out.println("Error:" + error/0.0);
}
// Classifier cls = new NaiveBayesMultinomialUpdateable();
// cls.buildClassifier(dataset1);
// evaluate classifier and print some statistics
// Evaluation eval = new Evaluation(dataset1);
//eval.evaluateModel(cls, dataset2);
//eval.crossValidateModel(cls,dataset1,10, dataset2.getRandomNumberGenerator(1));
//System.out.println(eval.toSummaryString("\nResults\n======\n", false));
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
}
}
}
【问题讨论】:
-
我在 C:\\1\data.txt 中提供了一些推文。一些推文如下:wake nap negtokentaking。午睡奖励。银行卡尺钱差报警晨度!一个早上。发动机深夜清晨。糟糕 2am negtoken 迟到了吗?
标签: twitter weka naivebayes