java-无法在artff文件weka中使用字符串属性并构建分类器
作者:互联网
嗨,我正在使用Weka进行机器学习,我的artff文件格式如下
`@relation datastest
@attribute fwoh {what, when, where, how, who, why}
@attribute parameter {color, performance}
@attribute object { power, cost}
@attribute model {x,y,z}
@attribute question String`
我尝试使用J48,PART,DecisionTable,ZeroR和SMO,在构建分类器时,所有分类器都将我置于异常之下.
weka.core.UnsupportedAttributeTypeException: weka.classifiers.rules.ZeroR: Cannot handle string class!
at weka.core.Capabilities.test(Capabilities.java:1164)
at weka.core.Capabilities.test(Capabilities.java:1303)
at weka.core.Capabilities.test(Capabilities.java:1208)
at weka.core.Capabilities.testWithFail(Capabilities.java:1506)
at weka.classifiers.rules.ZeroR.buildClassifier(ZeroR.java:122)
at wekaproject.TextCategorizationTest.main(TextCategorizationTest.java:66)
我建立分类器如下
final Instances data = new Instances(readDataFile("questions.txt"));
final Classifier classifier = new SMO();
classifier.buildClassifier(data );
谁能告诉我应该使用什么分类器.而且我应该使用StringToWordVector.我尝试使用StringToVector,但没有帮助我.谁能告诉我如何使用StringToVector,如果需要的话.
更新:
这是输入的arff文件
@relation 'text_files_in_C:\\Desktop\\test'
@attribute id {a,b,c}
@attribute ids {g,h,i}
@attribute idss {k,l,m}
@attribute contents string
@data
a,g,k,'x'
b,h,l'y'
c,i,m,'z'
这是过滤后的输出arff文件
@relation 'text_files_in_C:\\Desktop\\test-weka.filters.unsupervised.attribute.StringToWordVector-D.,:\\\'\\\"()?!-R4-W1000000-C-T-N1-L-stemmerweka.core.stemmers.NullStemmer-M1'
@attribute id {a,b,c}
@attribute ids {g,h,i}
@attribute idss {k,l,m}
@attribute x numeric
@attribute y numeric
@attribute z numeric
@data
{3 0.693147}
{0 b,1 h,2 l,4 0.693147}
{0 c,1 i,2 m,5 0.693147}
我正在尝试测试的实例
@relation 'text_files_in_C:\\Desktop\\test'
@attribute id {a,b,c}
@attribute ids {g,h,i}
@attribute idss {k,l,m}
@attribute contents string
@data
b,h,l,'x'
c,i,m,'y'
这是我的Java代码
package wekaproject;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.functions.SMO;
import weka.classifiers.rules.DecisionTable;
import weka.classifiers.rules.PART;
import weka.classifiers.rules.ZeroR;
import weka.classifiers.trees.J48;
import weka.core.Instances;
import weka.core.SerializationHelper;
import weka.core.converters.ArffSaver;
import weka.core.stemmers.LovinsStemmer;
import weka.core.stemmers.Stemmer;
import weka.core.stopwords.WordsFromFile;
import weka.core.tokenizers.NGramTokenizer;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.StringToNominal;
import weka.filters.unsupervised.attribute.StringToWordVector;
public class TestWeka {
public static BufferedReader readDataFile(String filename) {
BufferedReader inputReader = null;
try {
inputReader = new BufferedReader(new FileReader(filename));
} catch (FileNotFoundException ex) {
System.err.println("File not found: " + filename);
}
return inputReader;
}
public static void main(final String [] args) throws Exception {
System.out.println("Running");
final StringToWordVector filter = new StringToWordVector();
final ZeroR classifier = new ZeroR();
final Instances data = new Instances(readDataFile("test.arff"));
data.setClassIndex(data.numAttributes() - 1);
// Use filter.
String[] options = new String[2];
options[0] = "-R"; // "range"
options[1] = "4";
filter.setOptions(options);
filter.setInputFormat(data);
Instances filteredData = Filter.useFilter(data, filter);
filteredData.setClassIndex(0);
// Rebuild classifier.
classifier.buildClassifier(filteredData);
ArffSaver saver = new ArffSaver();
saver.setInstances(data);
saver.setFile(new File("input_test_filtered.arff"));
saver.writeBatch();
Instances testInstances=new Instances(readDataFile("test2.arff"));
testInstances.setClassIndex(testInstances.numAttributes()-1);
Instances filteredTestData=Filter.useFilter(testInstances, filter);
filteredTestData.setClassIndex(data.numAttributes()-1);
saver = new ArffSaver();
saver.setInstances(testInstances);
saver.setFile(new File("output_test_filtered.arff"));
saver.writeBatch();
for (int j = 0; j < filteredTestData.numInstances(); j++) {
double value = classifier.classifyInstance(filteredTestData.instance(j));
System.out.println("value::" + value);
// get the prediction percentage or distribution
double[] percentage = classifier.distributionForInstance(filteredTestData.instance(j));
String prediction = data.classAttribute().value((int) value);
for (int i = 0; i < percentage.length; i = i + 1) {
System.out.println("Probability of class " + data.classAttribute().value(i)
+ " : " + Double.toString(percentage[i]));
}
System.out.println("The predicted value of instance " + Integer.toString(j) + ": " + prediction);
}
}
} // End of the class //
当我对实例进行分类时,我总是得到X作为结果.任何帮助深表感谢!!!
更新的代码
package wekaproject;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import weka.classifiers.functions.LibLINEAR;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ArffSaver;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.StringToNominal;
public class demo1 {
public demo1() throws Exception {
// TODO Auto-generated constructor stub
BufferedReader breader = null;
breader = new BufferedReader(new FileReader(
"test.arff"));
Instances Train = new Instances(breader);
//Train.setClassIndex(Train.numAttributes() - 1); // comment out this line
LibLINEAR kMeans = new LibLINEAR();
StringToNominal filter=new StringToNominal();
String options[]=new String[2];
options[0]="-R";
options[1]="4";
filter.setInputFormat(Train);
Instances traineData=Filter.useFilter(Train, filter);
traineData.setClassIndex(4);
kMeans.buildClassifier(traineData);
breader.close();
ArffSaver saver = new ArffSaver();
saver.setInstances(traineData);
saver.setFile(new File("output_test_filtered2.arff"));
saver.writeBatch();
Instance instance = new DenseInstance(4);
instance.setDataset(traineData);
instance.setValue(0, "what");
instance.setValue(1, "car");
instance.setValue(2, "green");
instance.setValue(3, "y");
double value = kMeans.classifyInstance(instance);
System.out.println("value::" + value);
double[] percentage = kMeans.distributionForInstance(instance);
String prediction = traineData.classAttribute().value((int) value);
for (int i = 0; i < percentage.length; i = i + 1) {
System.out.println("Probability of class " + traineData.classAttribute().value(i)
+ " : " + Double.toString(percentage[i]));
}
System.out.println("The predicted value of instance " + Integer.toString(0) + ": " + prediction);
}
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
new demo1();
}
}
解决方法:
尝试使用StringToNominal过滤器转换“类属性”.
命令行调用是
weka.filters.unsupervised.attribute.StringToNominal -R最后
目前,我还不知道如何从Java代码内部调用它.
标签:weka,java 来源: https://codeday.me/bug/20191112/2024007.html