mirror of
https://github.com/make-42/xyosc
synced 2025-01-19 02:57:34 +01:00
187 lines
7.6 KiB
Go
187 lines
7.6 KiB
Go
package peakdetect
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"math"
|
|
)
|
|
|
|
const (
|
|
// SignalNegative indicates that a particular value is a negative peak.
|
|
SignalNegative Signal = -1
|
|
// SignalNeutral indicates that a particular value is not a peak.
|
|
SignalNeutral Signal = 0
|
|
// SignalPositive indicates that a particular value is a positive peak.
|
|
SignalPositive Signal = 1
|
|
)
|
|
|
|
// Signal is a set of enums that indicates what type of peak, if any a particular value is.
|
|
type Signal int8
|
|
|
|
// ErrInvalidInitialValues indicates that the initial values provided are not valid to initialize a PeakDetector.
|
|
var ErrInvalidInitialValues = errors.New("the initial values provided are invalid")
|
|
|
|
type peakDetector struct {
|
|
index uint
|
|
influence float64
|
|
lag uint
|
|
movingMeanStdDev *movingMeanStdDev
|
|
prevMean float64
|
|
prevStdDev float64
|
|
prevValue float64
|
|
threshold float64
|
|
}
|
|
|
|
// PeakDetector detects peaks in realtime timeseries data using z-scores.
|
|
//
|
|
// This is a Golang interface for the algorithm described by this StackOverflow answer:
|
|
// https://stackoverflow.com/a/22640362/14797322
|
|
//
|
|
// Brakel, J.P.G. van (2014). "Robust peak detection algorithm using z-scores". Stack Overflow. Available
|
|
// at: https://stackoverflow.com/questions/22583391/peak-signal-detection-in-realtime-timeseries-data/22640362#22640362
|
|
// (version: 2020-11-08).
|
|
type PeakDetector interface {
|
|
// Initialize initializes the PeakDetector with its configuration and initialValues. The initialValues are the first
|
|
// values to be processed by the PeakDetector. The length of these values are used to configure the PeakDetector's
|
|
// lag (see description below). The PeakDetector will never return any signals for the initialValues.
|
|
//
|
|
// influence determines the influence of signals on the algorithm's detection threshold. If put at 0, signals have
|
|
// no influence on the threshold, such that future signals are detected based on a threshold that is calculated with
|
|
// a mean and standard deviation that is not influenced by past signals. If put at 0.5, signals have half the
|
|
// influence of normal data points. Another way to think about this is that if you put the influence at 0, you
|
|
// implicitly assume stationary (i.e. no matter how many signals there are, you always expect the time series to
|
|
// return to the same average over the long term). If this is not the case, you should put the influence parameter
|
|
// somewhere between 0 and 1, depending on the extent to which signals can systematically influence the time-varying
|
|
// trend of the data. E.g., if signals lead to a structural break of the long-term average of the time series, the
|
|
// influence parameter should be put high (close to 1) so the threshold can react to structural breaks quickly.
|
|
//
|
|
// threshold is the number of standard deviations from the moving mean above which the algorithm will classify a new
|
|
// datapoint as being a signal. For example, if a new datapoint is 4.0 standard deviations above the moving mean and
|
|
// the threshold parameter is set as 3.5, the algorithm will identify the datapoint as a signal. This parameter
|
|
// should be set based on how many signals you expect. For example, if your data is normally distributed, a
|
|
// threshold (or: z-score) of 3.5 corresponds to a signaling probability of 0.00047 (from this table), which implies
|
|
// that you expect a signal once every 2128 datapoints (1/0.00047). The threshold therefore directly influences how
|
|
// sensitive the algorithm is and thereby also determines how often the algorithm signals. Examine your own data and
|
|
// choose a sensible threshold that makes the algorithm signal when you want it to (some trial-and-error might be
|
|
// needed here to get to a good threshold for your purpose).
|
|
//
|
|
// lag determines how much your data will be smoothed and how adaptive the algorithm is to change in the long-term
|
|
// average of the data. The more stationary your data is, the more lags you should include (this should improve the
|
|
// robustness of the algorithm). If your data contains time-varying trends, you should consider how quickly you want
|
|
// the algorithm to adapt to these trends. I.e., if you put lag at 10, it takes 10 'periods' before the algorithm's
|
|
// threshold is adjusted to any systematic changes in the long-term average. So choose the lag parameter based on
|
|
// the trending behavior of your data and how adaptive you want the algorithm to be.
|
|
Initialize(influence, threshold float64, initialValues []float64) error
|
|
// Next processes the next value and determines its signal.
|
|
Next(value float64) Signal
|
|
// NextBatch processes the next values and determines their signals. Their signals will be returned in a slice equal
|
|
// to the length of the input.
|
|
NextBatch(values []float64) []Signal
|
|
}
|
|
|
|
// NewPeakDetector creates a new PeakDetector. It must be initialized before use.
|
|
func NewPeakDetector() PeakDetector {
|
|
return &peakDetector{
|
|
movingMeanStdDev: &movingMeanStdDev{},
|
|
}
|
|
}
|
|
|
|
func (p *peakDetector) Initialize(influence, threshold float64, initialValues []float64) error {
|
|
p.lag = uint(len(initialValues))
|
|
if p.lag == 0 {
|
|
return fmt.Errorf("the length of the initial values is zero, the length is used as the lag for the algorithm: %w", ErrInvalidInitialValues)
|
|
}
|
|
p.influence = influence
|
|
p.threshold = threshold
|
|
|
|
p.prevMean, p.prevStdDev = p.movingMeanStdDev.initialize(initialValues)
|
|
p.prevValue = initialValues[p.lag-1]
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *peakDetector) Next(value float64) (signal Signal) {
|
|
p.index++
|
|
if p.index == p.lag {
|
|
p.index = 0
|
|
}
|
|
|
|
if math.Abs(value-p.prevMean) > p.threshold*p.prevStdDev {
|
|
if value > p.prevMean {
|
|
signal = SignalPositive
|
|
} else {
|
|
signal = SignalNegative
|
|
}
|
|
value = p.influence*value + (1-p.influence)*p.prevValue
|
|
} else {
|
|
signal = SignalNeutral
|
|
}
|
|
|
|
p.prevMean, p.prevStdDev = p.movingMeanStdDev.next(value)
|
|
p.prevValue = value
|
|
|
|
return signal
|
|
}
|
|
|
|
func (p *peakDetector) NextBatch(values []float64) []Signal {
|
|
signals := make([]Signal, len(values))
|
|
for i, v := range values {
|
|
signals[i] = p.Next(v)
|
|
}
|
|
return signals
|
|
}
|
|
|
|
// meanStdDev determines the mean and population standard deviation for the given population.
|
|
type movingMeanStdDev struct {
|
|
cache []float64
|
|
cacheLen float64
|
|
cacheLenU uint
|
|
index uint
|
|
prevMean float64
|
|
prevVariance float64
|
|
}
|
|
|
|
// initialize creates the needed assets for the movingMeanStdDev. It also computes the resulting mean and population
|
|
// standard deviation using Welford's method.
|
|
//
|
|
// https://www.johndcook.com/blog/standard_deviation/
|
|
func (m *movingMeanStdDev) initialize(initialValues []float64) (mean, stdDev float64) {
|
|
m.cacheLenU = uint(len(initialValues))
|
|
m.cacheLen = float64(m.cacheLenU)
|
|
m.cache = make([]float64, m.cacheLenU)
|
|
copy(m.cache, initialValues)
|
|
|
|
mean = initialValues[0]
|
|
prevMean := mean
|
|
var sumOfSquares float64
|
|
for i := uint(2); i <= m.cacheLenU; i++ {
|
|
value := initialValues[i-1]
|
|
mean = prevMean + (value-prevMean)/float64(i)
|
|
sumOfSquares = sumOfSquares + (value-prevMean)*(value-mean)
|
|
prevMean = mean
|
|
}
|
|
|
|
m.prevMean = mean
|
|
m.prevVariance = sumOfSquares / m.cacheLen
|
|
return mean, math.Sqrt(m.prevVariance)
|
|
}
|
|
|
|
// Next computes the next mean and population standard deviation. It uses a sliding window and is based on Welford's
|
|
// method.
|
|
//
|
|
// https://stackoverflow.com/a/14638138/14797322
|
|
func (m *movingMeanStdDev) next(value float64) (mean, stdDev float64) {
|
|
outOfWindow := m.cache[m.index]
|
|
m.cache[m.index] = value
|
|
m.index++
|
|
if m.index == m.cacheLenU {
|
|
m.index = 0
|
|
}
|
|
|
|
newMean := m.prevMean + (value-outOfWindow)/m.cacheLen
|
|
m.prevVariance = m.prevVariance + (value-newMean+outOfWindow-m.prevMean)*(value-outOfWindow)/(m.cacheLen)
|
|
m.prevMean = newMean
|
|
|
|
return m.prevMean, math.Sqrt(m.prevVariance)
|
|
}
|