【发布时间】:2017-11-12 20:33:11
【问题描述】:
我对以下代码有疑问。该代码可以正常工作,但是在使用并行 for 循环与常规 for 循环时,我收到了不同的输出值。我需要让并行 for 循环正常工作,因为我运行此代码数千次。有谁知道为什么我的并行 for 循环返回不同的输出?
private object _lock = new object();
public double CalculatePredictedRSquared()
{
double press = 0, tss = 0, press2 = 0, press1 = 0;
Vector<double> output = CreateVector.Dense(Enumerable.Range(0, 400).Select(i => Convert.ToDouble(i)).ToArray());
List<double> input1 = new List<double>(Enumerable.Range(0, 400).Select(i => Convert.ToDouble(i)));
List<double> input2 = new List<double>(Enumerable.Range(200, 400).Select(i => Convert.ToDouble(i)));
Parallel.For(0, output.Count, i =>
{
ConcurrentBag<MultipleRegressionInfo> listMRInfoBag = new ConcurrentBag<MultipleRegressionInfo>(listMRInfo);
ConcurrentBag<double> vectorArrayBag = new ConcurrentBag<double>(output);
ConcurrentBag<double[]> matrixList = new ConcurrentBag<double[]>();
lock (_lock)
{
matrixList.Add(input1.Where((v, k) => k != i).ToArray());
matrixList.Add(input2.Where((v, k) => k != i).ToArray());
}
var matrixArray2 = CreateMatrix.DenseOfColumnArrays(matrixList);
var actualResult = vectorArrayBag.ElementAt(i);
var newVectorArray = CreateVector.Dense(vectorArrayBag.Where((v, j) => j != i).ToArray());
var items = FindBestMRSolution(matrixArray2, newVectorArray);
double estimate1 = 0;
if (items != null)
{
lock (_lock)
{
var y = 0d;
var independentCount = matrixArray2.RowCount;
var dependentCount = newVectorArray.Count;
if (independentCount == dependentCount)
{
var populationCount = independentCount;
y = newVectorArray.Average();
for (int l = 0; l < matrixArray2.ColumnCount; l++)
{
var avg = matrixArray2.Column(l).Average();
y -= avg * items[l];
}
}
for (int m = 0; m < 2; m++)
{
var coefficient = items[m];
if (m == 0)
{
estimate1 += input1.ElementAt(i) * coefficient;
}
else
{
estimate1 += input2.ElementAt(i) * coefficient;
}
}
estimate1 += y;
}
}
else
{
lock (_lock)
{
estimate1 = 0;
}
}
lock (_lock)
{
press1 += Math.Pow(actualResult - estimate1, 2);
}
});
for (int i = 0; i < output.Count; i++)
{
List<double[]> matrixList = new List<double[]>();
matrixList.Add(input1.Where((v, k) => k != i).ToArray());
matrixList.Add(input2.Where((v, k) => k != i).ToArray());
var matrixArray = CreateMatrix.DenseOfColumnArrays(matrixList);
var actualResult = output.ElementAt(i);
var newVectorArray = CreateVector.Dense(output.Where((v, j) => j != i).ToArray());
var items = FindBestMRSolution(matrixArray, newVectorArray);
double estimate = 0;
if (items != null)
{
var y = CalculateYIntercept(matrixArray, newVectorArray, items);
for (int m = 0; m < 2; m++)
{
var coefficient = items[m];
if (m == 0)
{
estimate += input1.ElementAt(i) * coefficient;
}
else
{
estimate += input2.ElementAt(i) * coefficient;
}
}
}
else
{
estimate = 0;
}
press2 += Math.Pow(actualResult - estimate, 2);
}
tss = CalculateTotalSumOfSquares(vectorArray.ToList());
var test1 = 1 - (press1 / tss);
var test2 = 1 - (press2 / tss);
}
public Vector<double> CalculateWithQR(Matrix<double> x, Vector<double> y)
{
Vector<double> result = null;
result = MultipleRegression.QR(x, y);
for (int i = 0; i < result.Count; i++)
{
var value = result.ElementAt(i);
if (Double.IsNaN(value) || Double.IsInfinity(value))
{
return null;
}
}
return result;
}
public Vector<double> CalculateWithNormal(Matrix<double> x, Vector<double> y)
{
Vector<double> result = null;
result = MultipleRegression.NormalEquations(x, y);
for (int i = 0; i < result.Count; i++)
{
var value = result.ElementAt(i);
if (Double.IsNaN(value) || Double.IsInfinity(value))
{
return null;
}
}
return result;
}
public Vector<double> CalculateWithSVD(Matrix<double> x, Vector<double> y)
{
Vector<double> result = null;
result = MultipleRegression.Svd(x, y);
for (int i = 0; i < result.Count; i++)
{
var value = result.ElementAt(i);
if (Double.IsNaN(value) || Double.IsInfinity(value))
{
return null;
}
}
return result;
}
public Vector<double> FindBestMRSolution(Matrix<double> x, Vector<double> y)
{
Vector<double> result = null;
result = CalculateWithNormal(x, y);
if (result != null)
{
return result;
}
else
{
result = CalculateWithSVD(x, y);
if (result != null)
{
return result;
}
else
{
result = CalculateWithQR(x, y);
if (result != null)
{
return result;
}
}
}
return result;
}
public double CalculateTotalSumOfSquares(List<double> dependentVariables)
{
double tts = 0;
for (int i = 0; i < dependentVariables.Count; i++)
{
tts += Math.Pow(dependentVariables.ElementAt(i) - dependentVariables.Average(), 2);
}
return tts;
}
实际输出(更新结果):
test1 = 137431.12889999992 (parallel for loop)
test2 = 7.3770258447689254E- (regular for loop)
结语:如何设置符合 MCVE 的测试
这可能是准备一个真正完全可重现设置的 MCVE 代码 + A/B/C/... DataSET-s ,放入一个可立即运行的 [IDE 和测试沙箱, 超链接此处][1],以便社区成员可以单击重新运行按钮并专注于根本原因分析,而不是解码和重新设计大量不完整的 SLOC。
如果这适用于 O/P,它将适用于 O/P 要求答案或帮助的其他社区成员。
我的新版代码:
public double CalculatePredictedRSquared()
{
Vector<double> output = CreateVector.Dense(Enumerable.Range(0, 400).Select(i => Convert.ToDouble(i)).ToArray());
List<double> input1 = new List<double>(Enumerable.Range(0, 400).Select(i => Convert.ToDouble(i)));
List<double> input2 = new List<double>(Enumerable.Range(200, 400).Select(i => Convert.ToDouble(i)));
double tss = CalculateTotalSumOfSquares(output.ToList());
IEnumerable<int> range = Enumerable.Range(0, output.Count);
var query = range.Select(i => DoIt(i, output, input1, input2));
var result = 1 - (query.Sum() / tss);
return result;
}
public double DoIt(int i, Vector<double> output, List<double> input1, List<double> input2)
{
List<double[]> matrixList = new List<double[]>
{
input1.Where((v, k) => k != i).ToArray(),
input2.Where((v, k) => k != i).ToArray()
};
var matrixArray = CreateMatrix.DenseOfColumnArrays(matrixList);
var actualResult = output.ElementAt(i);
var newVectorArray = CreateVector.Dense(output.Where((v, j) => j != i).ToArray());
var items = FindBestMRSolution(matrixArray, newVectorArray);
double estimate = 0;
if (items != null)
{
var y = CalculateYIntercept(matrixArray, newVectorArray, items);
for (int m = 0; m < 2; m++)
{
var coefficient = items[m];
if (m == 0)
{
estimate += input1.ElementAt(i) * coefficient;
}
else
{
estimate += input2.ElementAt(i) * coefficient;
}
}
}
else
{
estimate = 0;
}
return Math.Pow(actualResult - estimate, 2);
}
【问题讨论】:
-
我在并行代码中看到了外部调用,比如
FindBestMRSolution,你确定这些东西是线程安全的,因为它们可以以这种方式/上下文安全地使用吗? -
无论如何,请发minimal reproducible example。
-
“发布完整示例的唯一方法是......” - 不,学习将问题减少到不到 30 行。以
File|New Project开头。 -
如果你不能缩小范围,这里没有人可以帮助你。并行代码产生与顺序版本不同的结果的典型原因是您弄乱了共享状态,因此多个并行线程会更改并使用相同的值。另外,您是否验证了这两个值中的哪一个是正确的?
-
说真的:从一个更简单的问题开始。您试图通过随机使东西线程安全并随机引入锁来解决问题;如果您继续这样做,您将失败。编写正确的多线程程序需要详细了解内存和控制流在 C# 中的工作原理。
标签: c# multithreading parallel-processing