一、概述

本篇我们首先通过回归算法实现一个葡萄酒品质预测的程序,然后通过AutoML的方法再重新实现,通过对比两种实现方式来学习AutoML的应用。

首先数据集来自于竞赛网站kaggle.com的UCI Wine Quality Dataset数据集,访问地址:https://www.kaggle.com/c/uci-wine-quality-dataset/data

 该数据集,输入为一些葡萄酒的化学检测数据,比如酒精度等,输出为品酒师的打分,具体字段描述如下:

Data fields
Input variables (based on physicochemical tests): 
1 - fixed acidity 
2 - volatile acidity 
3 - citric acid 
4 - residual sugar 
5 - chlorides 
6 - free sulfur dioxide 
7 - total sulfur dioxide 
8 - density 
9 - pH 
10 - sulphates 
11 - alcohol

Output variable (based on sensory data): 
12 - quality (score between 0 and 10)

Other:
13 - id (unique ID for each sample, needed for submission)

   

二、代码

namespace Regression_WineQuality
{
    public class WineData
    {
        [LoadColumn(0)]
        public float FixedAcidity;

        [LoadColumn(1)]
        public float VolatileAcidity;

        [LoadColumn(2)]
        public float CitricACID;

        [LoadColumn(3)]
        public float ResidualSugar;

        [LoadColumn(4)]
        public float Chlorides;

        [LoadColumn(5)]
        public float FreeSulfurDioxide;

        [LoadColumn(6)]
        public float TotalSulfurDioxide;

        [LoadColumn(7)]
        public float Density;

        [LoadColumn(8)]
        public float PH;

        [LoadColumn(9)]
        public float Sulphates;

        [LoadColumn(10)]
        public float Alcohol;
      
        [LoadColumn(11)]
        [ColumnName("Label")]
        public float Quality;
       
        [LoadColumn(12)]
        public float Id;
    }

    public class WinePrediction
    {
        [ColumnName("Score")]
        public float PredictionQuality;
    }

    class Program
    {
        static readonly string ModelFilePath = Path.Combine(Environment.CurrentDirectory, "MLModel", "model.zip");

        static void Main(string[] args)
        { 
            Train();
            Prediction();

            Console.WriteLine("Hit any key to finish the app");
            Console.ReadKey();
        }

        public static void Train()
        {
            MLContext mlContext = new MLContext(seed: 1);

            // 准备数据
            string TrainDataPath = Path.Combine(Environment.CurrentDirectory, "Data", "winequality-data-full.csv");
            var fulldata = mlContext.Data.LoadFromTextFile<WineData>(path: TrainDataPath, separatorChar: ',', hasHeader: true);

            var trainTestData = mlContext.Data.TrainTestSplit(fulldata, testFraction: 0.2);
            var trainData = trainTestData.TrainSet;
            var testData = trainTestData.TestSet;

            // 创建学习管道并通过训练数据调整模型  
            var dataProcessPipeline = mlContext.Transforms.DropColumns("Id")
                .Append(mlContext.Transforms.NormalizeMeanVariance(nameof(WineData.FreeSulfurDioxide)))
                .Append(mlContext.Transforms.NormalizeMeanVariance(nameof(WineData.TotalSulfurDioxide)))
                .Append(mlContext.Transforms.Concatenate("Features", new string[] { nameof(WineData.FixedAcidity),
                                                                                    nameof(WineData.VolatileAcidity),
                                                                                    nameof(WineData.CitricACID),
                                                                                    nameof(WineData.ResidualSugar),
                                                                                    nameof(WineData.Chlorides),
                                                                                    nameof(WineData.FreeSulfurDioxide),
                                                                                    nameof(WineData.TotalSulfurDioxide),
                                                                                    nameof(WineData.Density),
                                                                                    nameof(WineData.PH),
                                                                                    nameof(WineData.Sulphates),
                                                                                    nameof(WineData.Alcohol)}));

            var trainer = mlContext.Regression.Trainers.LbfgsPoissonRegression(labelColumnName: "Label", featureColumnName: "Features");
            var trainingPipeline = dataProcessPipeline.Append(trainer);
            var trainedModel = trainingPipeline.Fit(trainData);

            // 评估
            var predictions = trainedModel.Transform(testData);
            var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score");
            PrintRegressionMetrics(trainer.ToString(), metrics);

            // 保存模型
            Console.WriteLine("====== Save model to local file =========");
            mlContext.Model.Save(trainedModel, trainData.Schema, ModelFilePath);
        }

        static void Prediction()
        {
            MLContext mlContext = new MLContext(seed: 1);

            ITransformer loadedModel = mlContext.Model.Load(ModelFilePath, out var modelInputSchema);
            var predictor = mlContext.Model.CreatePredictionEngine<WineData, WinePrediction>(loadedModel);

            WineData wineData = new WineData
            {
                FixedAcidity = 7.6f,
                VolatileAcidity = 0.33f,
                CitricACID = 0.36f,
                ResidualSugar = 2.1f,
                Chlorides = 0.034f,
                FreeSulfurDioxide = 26f,
                TotalSulfurDioxide = 172f,
                Density = 0.9944f,
                PH = 3.42f,
                Sulphates = 0.48f,
                Alcohol = 10.5f
            };

            var wineQuality = predictor.Predict(wineData);
            Console.WriteLine($"Wine Data  Quality is:{wineQuality.PredictionQuality} ");           
        }        
    }
}
View Code

相关文章: