From 005b3ae47b3ffc6ae46a821415f82f6283bb032e Mon Sep 17 00:00:00 2001 From: Louis Dalibard Date: Thu, 16 Jan 2025 22:46:14 +0100 Subject: [PATCH] might be more efficient --- .../github.com/MicahParks/peakdetect/LICENSE | 201 ++++++++++++++++++ .../MicahParks/peakdetect/README.md | 142 +++++++++++++ .../MicahParks/peakdetect/peakdetect.go | 186 ++++++++++++++++ 3 files changed, 529 insertions(+) create mode 100644 vendor/github.com/MicahParks/peakdetect/LICENSE create mode 100644 vendor/github.com/MicahParks/peakdetect/README.md create mode 100644 vendor/github.com/MicahParks/peakdetect/peakdetect.go diff --git a/vendor/github.com/MicahParks/peakdetect/LICENSE b/vendor/github.com/MicahParks/peakdetect/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/vendor/github.com/MicahParks/peakdetect/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/MicahParks/peakdetect/README.md b/vendor/github.com/MicahParks/peakdetect/README.md new file mode 100644 index 0000000..11a225a --- /dev/null +++ b/vendor/github.com/MicahParks/peakdetect/README.md @@ -0,0 +1,142 @@ +[![Go Reference](https://pkg.go.dev/badge/github.com/MicahParks/peakdetect.svg)](https://pkg.go.dev/github.com/MicahParks/peakdetect) [![Go Report Card](https://goreportcard.com/badge/github.com/MicahParks/peakdetect)](https://goreportcard.com/report/github.com/MicahParks/peakdetect) +# peakdetect +Detect peaks in realtime timeseries data using z-scores. This is a Golang implementation for the algorithm described +by [this StackOverflow answer](https://stackoverflow.com/a/22640362/14797322). + +Unlike some implementations, a goal is to minimize the memory footprint and allow for the processing of new data points +without reprocessing old ones. + +```go +import "github.com/MicahParks/peakdetect" +``` + +# Configuration +`Lag` determines how much your data will be smoothed and how adaptive the algorithm is to change in the long-term +average of the data. The more stationary your data is, the more lags you should include (this should improve the +robustness of the algorithm). If your data contains time-varying trends, you should consider how quickly you want the +algorithm to adapt to these trends. I.e., if you put lag at 10, it takes 10 'periods' before the algorithm's threshold +is adjusted to any systematic changes in the long-term average. So choose the lag parameter based on the trending +behavior of your data and how adaptive you want the algorithm to be. + +`Influence` determines the influence of signals on the algorithm's detection threshold. If put at 0, signals have no +influence on the threshold, such that future signals are detected based on a threshold that is calculated with a mean +and standard deviation that is not influenced by past signals. If put at 0.5, signals have half the influence of normal +data points. Another way to think about this is that if you put the influence at 0, you implicitly assume stationary ( +i.e. no matter how many signals there are, you always expect the time series to return to the same average over the long +term). If this is not the case, you should put the influence parameter somewhere between 0 and 1, depending on the +extent to which signals can systematically influence the time-varying trend of the data. E.g., if signals lead to a +structural break of the long-term average of the time series, the influence parameter should be put high (close to 1) so +the threshold can react to structural breaks quickly + +`Threshold` is the number of standard deviations from the moving mean above which the algorithm will classify a new +datapoint as being a signal. For example, if a new datapoint is 4.0 standard deviations above the moving mean and the +threshold parameter is set as 3.5, the algorithm will identify the datapoint as a signal. This parameter should be set +based on how many signals you expect. For example, if your data is normally distributed, a threshold (or: z-score) of +3.5 corresponds to a signaling probability of 0.00047 (from this table), which implies that you expect a signal once +every 2128 datapoints (1/0.00047). The threshold therefore directly influences how sensitive the algorithm is and +thereby also determines how often the algorithm signals. Examine your own data and choose a sensible threshold that +makes the algorithm signal when you want it to (some trial-and-error might be needed here to get to a good threshold for +your purpose) + +# Usage +```go +package main + +import ( + "fmt" + "log" + + "github.com/MicahParks/peakdetect" +) + +// This example is the equivalent of the R example from the algorithm's author. +// https://stackoverflow.com/a/54507329/14797322 +func main() { + data := []float64{1, 1, 1.1, 1, 0.9, 1, 1, 1.1, 1, 0.9, 1, 1.1, 1, 1, 0.9, 1, 1, 1.1, 1, 1, 1, 1, 1.1, 0.9, 1, 1.1, 1, 1, 0.9, 1, 1.1, 1, 1, 1.1, 1, 0.8, 0.9, 1, 1.2, 0.9, 1, 1, 1.1, 1.2, 1, 1.5, 1, 3, 2, 5, 3, 2, 1, 1, 1, 0.9, 1, 1, 3, 2.6, 4, 3, 3.2, 2, 1, 1, 0.8, 4, 4, 2, 2.5, 1, 1, 1} + + // Algorithm configuration from example. + const ( + lag = 30 + threshold = 5 + influence = 0 + ) + + // Create then initialize the peak detector. + detector := peakdetect.NewPeakDetector() + err := detector.Initialize(influence, threshold, data[:lag]) // The length of the initial values is the lag. + if err != nil { + log.Fatalf("Failed to initialize peak detector.\nError: %s", err) + } + + // Start processing new data points and determine what signal, if any they produce. + // + // This method, .Next(), is best for when data are being processed in a stream, but this simply iterates over a + // slice. + nextDataPoints := data[lag:] + for i, newPoint := range nextDataPoints { + signal := detector.Next(newPoint) + var signalType string + switch signal { + case peakdetect.SignalNegative: + signalType = "negative" + case peakdetect.SignalNeutral: + signalType = "neutral" + case peakdetect.SignalPositive: + signalType = "positive" + } + + println(fmt.Sprintf("Data point at index %d has the signal: %s", i+lag, signalType)) + } + + // This method, .NextBatch(), is a helper function for processing many data points at once. It's returned slice + // should produce the same signal outputs as the loop above. + signals := detector.NextBatch(nextDataPoints) + println(fmt.Sprintf("1:1 ratio of batch inputs to signal outputs: %t", len(signals) == len(nextDataPoints))) +} +``` + +# Testing +``` +$ go test -cover -race +PASS +coverage: 100.0% of statements +ok github.com/MicahParks/peakdetect 0.019s +``` + +# Performance +To further improve performance, this algorithm uses Welford's algorithm on initialization +and an adaptation of [this StackOverflow answer](https://stackoverflow.com/a/14638138/14797322) to calculate the mean +and population standard deviation for the lag period (sliding window). This appears to improve performance by more than +a factor of 10! + +`v0.0.4` +``` +goos: linux +goarch: amd64 +pkg: github.com/MicahParks/peakdetect +cpu: AMD Ryzen 9 7950X 16-Core Processor +BenchmarkPeakDetector_NextBatch-32 1000000000 0.0000221 ns/op +PASS +ok github.com/MicahParks/peakdetect 0.003s +``` + +`v0.1.0` +``` +goos: linux +goarch: amd64 +pkg: github.com/MicahParks/peakdetect +cpu: AMD Ryzen 9 7950X 16-Core Processor +BenchmarkPeakDetector_NextBatch-32 1000000000 0.0000011 ns/op +PASS +ok github.com/MicahParks/peakdetect 0.003s +``` + +# References +Brakel, J.P.G. van (2014). "Robust peak detection algorithm using z-scores". Stack Overflow. Available +at: https://stackoverflow.com/questions/22583391/peak-signal-detection-in-realtime-timeseries-data/22640362#22640362 +(version: 2020-11-08). + +* [StackOverflow: Peak detection in realtime timeseries data](https://stackoverflow.com/a/22640362/14797322). +* [StackOverflow: sliding window for online algorithm to calculate mean and standard devation](https://stackoverflow.com/a/14638138/14797322). +* [Welford's algorithm related blog post](https://www.johndcook.com/blog/standard_deviation/). +* Yeah, I used [Wikipedia](https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance) too. diff --git a/vendor/github.com/MicahParks/peakdetect/peakdetect.go b/vendor/github.com/MicahParks/peakdetect/peakdetect.go new file mode 100644 index 0000000..55c1b7d --- /dev/null +++ b/vendor/github.com/MicahParks/peakdetect/peakdetect.go @@ -0,0 +1,186 @@ +package peakdetect + +import ( + "errors" + "fmt" + "math" +) + +const ( + // SignalNegative indicates that a particular value is a negative peak. + SignalNegative Signal = -1 + // SignalNeutral indicates that a particular value is not a peak. + SignalNeutral Signal = 0 + // SignalPositive indicates that a particular value is a positive peak. + SignalPositive Signal = 1 +) + +// Signal is a set of enums that indicates what type of peak, if any a particular value is. +type Signal int8 + +// ErrInvalidInitialValues indicates that the initial values provided are not valid to initialize a PeakDetector. +var ErrInvalidInitialValues = errors.New("the initial values provided are invalid") + +type peakDetector struct { + index uint + influence float64 + lag uint + movingMeanStdDev *movingMeanStdDev + prevMean float64 + prevStdDev float64 + prevValue float64 + threshold float64 +} + +// PeakDetector detects peaks in realtime timeseries data using z-scores. +// +// This is a Golang interface for the algorithm described by this StackOverflow answer: +// https://stackoverflow.com/a/22640362/14797322 +// +// Brakel, J.P.G. van (2014). "Robust peak detection algorithm using z-scores". Stack Overflow. Available +// at: https://stackoverflow.com/questions/22583391/peak-signal-detection-in-realtime-timeseries-data/22640362#22640362 +// (version: 2020-11-08). +type PeakDetector interface { + // Initialize initializes the PeakDetector with its configuration and initialValues. The initialValues are the first + // values to be processed by the PeakDetector. The length of these values are used to configure the PeakDetector's + // lag (see description below). The PeakDetector will never return any signals for the initialValues. + // + // influence determines the influence of signals on the algorithm's detection threshold. If put at 0, signals have + // no influence on the threshold, such that future signals are detected based on a threshold that is calculated with + // a mean and standard deviation that is not influenced by past signals. If put at 0.5, signals have half the + // influence of normal data points. Another way to think about this is that if you put the influence at 0, you + // implicitly assume stationary (i.e. no matter how many signals there are, you always expect the time series to + // return to the same average over the long term). If this is not the case, you should put the influence parameter + // somewhere between 0 and 1, depending on the extent to which signals can systematically influence the time-varying + // trend of the data. E.g., if signals lead to a structural break of the long-term average of the time series, the + // influence parameter should be put high (close to 1) so the threshold can react to structural breaks quickly. + // + // threshold is the number of standard deviations from the moving mean above which the algorithm will classify a new + // datapoint as being a signal. For example, if a new datapoint is 4.0 standard deviations above the moving mean and + // the threshold parameter is set as 3.5, the algorithm will identify the datapoint as a signal. This parameter + // should be set based on how many signals you expect. For example, if your data is normally distributed, a + // threshold (or: z-score) of 3.5 corresponds to a signaling probability of 0.00047 (from this table), which implies + // that you expect a signal once every 2128 datapoints (1/0.00047). The threshold therefore directly influences how + // sensitive the algorithm is and thereby also determines how often the algorithm signals. Examine your own data and + // choose a sensible threshold that makes the algorithm signal when you want it to (some trial-and-error might be + // needed here to get to a good threshold for your purpose). + // + // lag determines how much your data will be smoothed and how adaptive the algorithm is to change in the long-term + // average of the data. The more stationary your data is, the more lags you should include (this should improve the + // robustness of the algorithm). If your data contains time-varying trends, you should consider how quickly you want + // the algorithm to adapt to these trends. I.e., if you put lag at 10, it takes 10 'periods' before the algorithm's + // threshold is adjusted to any systematic changes in the long-term average. So choose the lag parameter based on + // the trending behavior of your data and how adaptive you want the algorithm to be. + Initialize(influence, threshold float64, initialValues []float64) error + // Next processes the next value and determines its signal. + Next(value float64) Signal + // NextBatch processes the next values and determines their signals. Their signals will be returned in a slice equal + // to the length of the input. + NextBatch(values []float64) []Signal +} + +// NewPeakDetector creates a new PeakDetector. It must be initialized before use. +func NewPeakDetector() PeakDetector { + return &peakDetector{ + movingMeanStdDev: &movingMeanStdDev{}, + } +} + +func (p *peakDetector) Initialize(influence, threshold float64, initialValues []float64) error { + p.lag = uint(len(initialValues)) + if p.lag == 0 { + return fmt.Errorf("the length of the initial values is zero, the length is used as the lag for the algorithm: %w", ErrInvalidInitialValues) + } + p.influence = influence + p.threshold = threshold + + p.prevMean, p.prevStdDev = p.movingMeanStdDev.initialize(initialValues) + p.prevValue = initialValues[p.lag-1] + + return nil +} + +func (p *peakDetector) Next(value float64) (signal Signal) { + p.index++ + if p.index == p.lag { + p.index = 0 + } + + if math.Abs(value-p.prevMean) > p.threshold*p.prevStdDev { + if value > p.prevMean { + signal = SignalPositive + } else { + signal = SignalNegative + } + value = p.influence*value + (1-p.influence)*p.prevValue + } else { + signal = SignalNeutral + } + + p.prevMean, p.prevStdDev = p.movingMeanStdDev.next(value) + p.prevValue = value + + return signal +} + +func (p *peakDetector) NextBatch(values []float64) []Signal { + signals := make([]Signal, len(values)) + for i, v := range values { + signals[i] = p.Next(v) + } + return signals +} + +// meanStdDev determines the mean and population standard deviation for the given population. +type movingMeanStdDev struct { + cache []float64 + cacheLen float64 + cacheLenU uint + index uint + prevMean float64 + prevVariance float64 +} + +// initialize creates the needed assets for the movingMeanStdDev. It also computes the resulting mean and population +// standard deviation using Welford's method. +// +// https://www.johndcook.com/blog/standard_deviation/ +func (m *movingMeanStdDev) initialize(initialValues []float64) (mean, stdDev float64) { + m.cacheLenU = uint(len(initialValues)) + m.cacheLen = float64(m.cacheLenU) + m.cache = make([]float64, m.cacheLenU) + copy(m.cache, initialValues) + + mean = initialValues[0] + prevMean := mean + var sumOfSquares float64 + for i := uint(2); i <= m.cacheLenU; i++ { + value := initialValues[i-1] + mean = prevMean + (value-prevMean)/float64(i) + sumOfSquares = sumOfSquares + (value-prevMean)*(value-mean) + prevMean = mean + } + + m.prevMean = mean + m.prevVariance = sumOfSquares / m.cacheLen + return mean, math.Sqrt(m.prevVariance) +} + +// Next computes the next mean and population standard deviation. It uses a sliding window and is based on Welford's +// method. +// +// https://stackoverflow.com/a/14638138/14797322 +func (m *movingMeanStdDev) next(value float64) (mean, stdDev float64) { + outOfWindow := m.cache[m.index] + m.cache[m.index] = value + m.index++ + if m.index == m.cacheLenU { + m.index = 0 + } + + newMean := m.prevMean + (value-outOfWindow)/m.cacheLen + m.prevVariance = m.prevVariance + (value-newMean+outOfWindow-m.prevMean)*(value-outOfWindow)/(m.cacheLen) + m.prevMean = newMean + + return m.prevMean, math.Sqrt(m.prevVariance) +}