source: trunk/eraser/Eraser.Util/Sampler.cs @ 2515

Revision 2515, 3.8 KB checked in by lowjoel, 3 years ago (diff)

Set svn:keywords and svn:eol-style on all the source files.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Rev URL
Line 
1/*
2 * $Id$
3 * Copyright 2008-2010 The Eraser Project
4 * Original Author: Joel Low <lowjoel@users.sourceforge.net>
5 * Modified By:
6 *
7 * This file is part of Eraser.
8 *
9 * Eraser is free software: you can redistribute it and/or modify it under the
10 * terms of the GNU General Public License as published by the Free Software
11 * Foundation, either version 3 of the License, or (at your option) any later
12 * version.
13 *
14 * Eraser is distributed in the hope that it will be useful, but WITHOUT ANY
15 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
16 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
17 *
18 * A copy of the GNU General Public License can be found at
19 * <http://www.gnu.org/licenses/>.
20 */
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26
27namespace Eraser.Util
28{
29    /// <summary>
30    /// Provides functions to sample data.
31    /// </summary>
32    /// <typeparam name="T">The type of data to sample.</typeparam>
33    public class Sampler
34    {
35        public void Add(double sample)
36        {
37            Samples.Add(new KeyValuePair<DateTime, double>(DateTime.Now, sample));
38        }
39
40        /// <summary>
41        /// Resets the sampler. This is useful when the existing data is found to be biased.
42        /// </summary>
43        public void Reset()
44        {
45            Samples.Clear();
46        }
47
48        /// <summary>
49        /// Gets the prediction interval for this data set.
50        /// </summary>
51        /// <param name="significanceLevel">The level of significance of the prediction.</param>
52        /// <returns>Null if insufficient data to make a prediction is available.</returns>
53        public Interval Predict(double significanceLevel)
54        {
55            if (Samples.Count < 2)
56                return null;
57
58            double mean = Samples.Average(sample => sample.Value);
59            double variance = Math.Sqrt(Samples.Sum(
60                sample => Math.Pow(sample.Value - mean, 2.0)) / (double)(Samples.Count - 1));
61            double tPercentile = alglib.studenttdistr.invstudenttdistribution(
62                Samples.Count - 1, (significanceLevel + 1) / 2);
63
64            double interval = tPercentile * variance * Math.Sqrt(1 + (1.0 / Samples.Count));
65            return new Interval(mean - interval, mean + interval);
66        }
67
68        /// <summary>
69        /// Gets the outliers in the sample.
70        /// </summary>
71        /// <param name="significanceLevel">The level of significance for the prediction.</param>
72        /// <returns>The list of samples which are outliers, or null if insufficient data is
73        /// available to determine which samples are outliers.</returns>
74        public IList<KeyValuePair<DateTime, double>> GetOutliers(double significanceLevel)
75        {
76            Interval interval = Predict(significanceLevel);
77            if (interval == null)
78                return null;
79
80            return Samples.Where(sample => !interval.Within(sample.Value)).ToList();
81        }
82
83        /// <summary>
84        /// The samples comprising this data set.
85        /// </summary>
86        private List<KeyValuePair<DateTime, double>> Samples =
87            new List<KeyValuePair<DateTime, double>>();
88    }
89
90    /// <summary>
91    /// Represents an interval.
92    /// </summary>
93    public class Interval
94    {
95        /// <summary>
96        /// Constructor.
97        /// </summary>
98        /// <param name="minimum">The lower bound of the interval.</param>
99        /// <param name="maximum">The upper bound of the interval.</param>
100        public Interval(double minimum, double maximum)
101        {
102            Minimum = minimum;
103            Maximum = maximum;
104        }
105
106        /// <summary>
107        /// Checks whether the given value is within this interval.
108        /// </summary>
109        /// <param name="value">The value to check.</param>
110        /// <returns>True if the value is within this interval.</returns>
111        public bool Within(double value)
112        {
113            return value >= Minimum && value <= Maximum;
114        }
115
116        /// <summary>
117        /// The lower bound of the interval.
118        /// </summary>
119        public double Minimum { get; private set; }
120
121        /// <summary>
122        /// The upper bound of the interval.
123        /// </summary>
124        public double Maximum { get; private set; }
125    }
126}
Note: See TracBrowser for help on using the repository browser.