我正在寻找从给定数据集生成数据直方图.我已经阅读了构建直方图的不同选项,我对基于工作的方法最感兴趣
Shimazaki,H.; Shinomoto,S.(2007)."用于选择时间直方图的bin大小的方法"
上述方法使用估计来确定最佳的箱宽和分布,这在我的情况下是必需的,因为样本数据的分布会变化并且难以预先确定箱数和宽度.
有人可以推荐一个很好的来源或起点,用于在c#中编写这样的函数或者具有足够接近的c#直方图代码.
非常感谢.
以下是我从这里写的这个算法的Python版本的端口.我知道API可以做一些工作,但这应该足以让你开始.此代码的结果与Python代码为相同输入数据生成的结果相同.
public class HistSample
{
public static void CalculateOptimalBinWidth(double[] x)
{
double xMax = x.Max(), xMin = x.Min();
int minBins = 4, maxBins = 50;
double[] N = Enumerable.Range(minBins, maxBins - minBins)
.Select(v => (double)v).ToArray();
double[] D = N.Select(v => (xMax - xMin) / v).ToArray();
double[] C = new double[D.Length];
for (int i = 0; i < N.Length; i++)
{
double[] binIntervals = LinearSpace(xMin, xMax, (int)N[i] + 1);
double[] ki = Histogram(x, binIntervals);
ki = ki.Skip(1).Take(ki.Length - 2).ToArray();
double mean = ki.Average();
double variance = ki.Select(v => Math.Pow(v - mean, 2)).Sum() / N[i];
C[i] = (2 * mean - variance) / (Math.Pow(D[i], 2));
}
double minC = C.Min();
int index = C.Select((c, ix) => new { Value = c, Index = ix })
.Where(c => c.Value == minC).First().Index;
double optimalBinWidth = D[index];
}
public static double[] Histogram(double[] data, double[] binEdges)
{
double[] counts = new double[binEdges.Length - 1];
for (int i = 0; i < binEdges.Length - 1; i++)
{
double lower = binEdges[i], upper = binEdges[i + 1];
for (int j = 0; j < data.Length; j++)
{
if (data[j] >= lower && data[j] <= upper)
{
counts[i]++;
}
}
}
return counts;
}
public static double[] LinearSpace(double a, double b, int count)
{
double[] output = new double[count];
for (int i = 0; i < count; i++)
{
output[i] = a + ((i * (b - a)) / (count - 1));
}
return output;
}
}
Run Code Online (Sandbox Code Playgroud)
像这样运行:
double[] x =
{
4.37, 3.87, 4.00, 4.03, 3.50, 4.08, 2.25, 4.70, 1.73,
4.93, 1.73, 4.62, 3.43, 4.25, 1.68, 3.92, 3.68, 3.10,
4.03, 1.77, 4.08, 1.75, 3.20, 1.85, 4.62, 1.97, 4.50,
3.92, 4.35, 2.33, 3.83, 1.88, 4.60, 1.80, 4.73, 1.77,
4.57, 1.85, 3.52, 4.00, 3.70, 3.72, 4.25, 3.58, 3.80,
3.77, 3.75, 2.50, 4.50, 4.10, 3.70, 3.80, 3.43, 4.00,
2.27, 4.40, 4.05, 4.25, 3.33, 2.00, 4.33, 2.93, 4.58,
1.90, 3.58, 3.73, 3.73, 1.82, 4.63, 3.50, 4.00, 3.67,
1.67, 4.60, 1.67, 4.00, 1.80, 4.42, 1.90, 4.63, 2.93,
3.50, 1.97, 4.28, 1.83, 4.13, 1.83, 4.65, 4.20, 3.93,
4.33, 1.83, 4.53, 2.03, 4.18, 4.43, 4.07, 4.13, 3.95,
4.10, 2.27, 4.58, 1.90, 4.50, 1.95, 4.83, 4.12
};
HistSample.CalculateOptimalBinWidth(x);
Run Code Online (Sandbox Code Playgroud)