【发布时间】:2019-03-31 16:34:26
【问题描述】:
ML.Net 预测分数始终返回 NaN(空)。
这个想法是教一个回归算法来学习我的家庭日常生活。 我尝试了几种 ML.Net nuget 包和代码示例的变体,但结果相同:Score == NaN。 下面是一些代码和一个数据集,它是从我的家庭自动化中记录的。
这是来自 MSDN 的电影推荐回归示例的变体:
public class AutomationData
{
[LoadColumn(0)]
//0 - 6
public int Day;
[LoadColumn(1)]
//example: 0947 == 9:47am
public int TimeOfDay;
//Device Id
[LoadColumn(2)]
public int Device;
//This is the State of the device (0 OFF - 1 ON)
// Seems it has to be float? (Vector R4)
[LoadColumn(3)]
public float Label;
}
public class AutomationPrediction
{
public float Label;
public float Score;
}
public static void Regression()
{
MLContext mlContext = new MLContext();
IDataView trainingDataView = LoadData(mlContext).training;
IDataView testDataView = LoadData(mlContext).test;
ITransformer model = BuildAndTrainModel(mlContext, trainingDataView);
EvaluateModel(mlContext, testDataView, model);
UseModelForSinglePrediction(mlContext, model);
}
public static (IDataView training, IDataView test) LoadData(MLContext mlContext)
{
var trainingDataPath = Path.Combine(Environment.CurrentDirectory, "MachineLearning/Data", "data.csv");
var testDataPath = Path.Combine(Environment.CurrentDirectory, "MachineLearning/Data", "data.csv");
IDataView trainingDataView = mlContext.Data.LoadFromTextFile<AutomationData>(trainingDataPath, hasHeader: true, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<AutomationData>(testDataPath, hasHeader: true, separatorChar: ',');
return (trainingDataView, testDataView);
}
public static ITransformer BuildAndTrainModel(MLContext mlContext, IDataView trainingDataView)
{
IEstimator<ITransformer> estimator = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "deviceEncoded", inputColumnName: "Device")
.Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "timeOfDayEncoded", inputColumnName: "TimeOfDay"))
.Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "dayEncoded", inputColumnName: "Day"));
var options = new MatrixFactorizationTrainer.Options
{
MatrixColumnIndexColumnName = "deviceEncoded",
MatrixRowIndexColumnName = "timeOfDayEncoded",
LabelColumnName = "Label",
NumberOfIterations = 20,
ApproximationRank = 100
};
var trainerEstimator = estimator.Append(mlContext.Recommendation().Trainers.MatrixFactorization(options));
ITransformer model = trainerEstimator.Fit(trainingDataView);
return model;
}
public static void EvaluateModel(MLContext mlContext, IDataView testDataView, ITransformer model)
{
var prediction = model.Transform(testDataView);
var metrics = mlContext.Regression.Evaluate(prediction, label: DefaultColumnNames.Label, score: DefaultColumnNames.Score);
Console.WriteLine("Rms: " + metrics.Rms.ToString());
Console.WriteLine("RSquared: " + metrics.RSquared.ToString());
}
public static void UseModelForSinglePrediction(MLContext mlContext, ITransformer model)
{
var predictionEngine = model.CreatePredictionEngine<AutomationData, AutomationPrediction>(mlContext);
var testInput = new AutomationData { Device = 117, TimeOfDay = 0945 };
var automationPrediction = predictionEngine.Predict(testInput);
Console.WriteLine("Prediction Score: " + Math.Round(automationPrediction.Score, 1)); //Is Always 'NaN' (null)
if (Math.Round(automationPrediction.Score, 1) > 3.5)
{
Console.WriteLine("State: " + testInput.Label);
}
else
{
Console.WriteLine("State " + testInput.Label);
}
}
}
这是回归算法尝试使用的 data.csv 片段。
Day,TimeOfDay,Device,State
6,0827,999,1
6,0827,117,1
6,0827,117,0
6,0838,18,1
6,0838,79,1
6,0838,6,1
6,0901,117,1
6,0908,999,0
6,0910,73,0
6,0913,72,1
6,0914,72,0
6,0915,79,0
6,0915,6,0
6,0915,5,0
6,0915,4,0
6,0915,18,0
6,1015,18,1
6,1015,79,1
6,1015,6,1
6,1015,5,1
6,1015,4,1
6,1726,18,1
6,1726,79,1
6,1726,51,0
6,1726,128,0
6,1726,69,0
我希望预测状态返回 0 或 1(开或关)的值,以及一个分数(浮点数),它显示回归认为它是正确的接近程度。
【问题讨论】: