Version: SMASH-3.1
lowess.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015,2017-2020
3  * SMASH Team
4  *
5  * GNU General Public License (GPLv3 or later)
6  *
7  * This code has been adapted from ROOT's TGraphSmooth.cxx.
8  *
9  * Copyright (c) 2006 Rene Brun and Fons Rademakers.
10  * Copyright (c) 2001 Christian Stratowa
11  * Copyright (c) 1999-2001 Robert Gentleman, Ross Ihaka and the R
12  * Development Core Team
13  */
14 #ifndef SRC_INCLUDE_SMASH_LOWESS_H_
15 #define SRC_INCLUDE_SMASH_LOWESS_H_
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cmath>
20 #include <cstddef>
21 #include <utility>
22 #include <vector>
23 
24 namespace smash {
25 
26 namespace lowess {
32 template <typename T>
33 void lowest(const T *x, const T *y, size_t n, T xs, T &ys, size_t nleft,
34  size_t nright, T *w, bool userw, T *rw, bool &ok) {
35  // indices start at 1
36  x--;
37  y--;
38  w--;
39  rw--;
40 
41  const auto range = x[n] - x[1];
42  const auto h = std::max(xs - x[nleft], x[nright] - xs);
43  const auto h9 = 0.999 * h;
44  const auto h1 = 0.001 * h;
45 
46  // sum of weights
47  T a = 0.;
48  auto j = nleft;
49  while (j <= n) {
50  // compute weights (pick up all ties on right)
51  w[j] = 0.;
52  const auto r = std::abs(x[j] - xs);
53  if (r <= h9) {
54  if (r <= h1) {
55  w[j] = 1.;
56  } else {
57  const auto d = (r / h) * (r / h) * (r / h);
58  w[j] = (1. - d) * (1. - d) * (1. - d);
59  }
60  if (userw)
61  w[j] *= rw[j];
62  a += w[j];
63  } else if (x[j] > xs) {
64  break;
65  }
66  j += 1;
67  }
68 
69  // rightmost pt (may be greater than nright because of ties)
70  const auto nrt = j - 1;
71  if (a <= 0.) {
72  ok = false;
73  } else {
74  ok = true;
75  // weighted least squares: make sum of w[j] == 1
76  for (j = nleft; j <= nrt; j++)
77  w[j] /= a;
78  if (h > 0.) {
79  a = 0.;
80  // use linear fit weighted center of x values
81  for (j = nleft; j <= nrt; j++)
82  a += w[j] * x[j];
83  auto b = xs - a;
84  T c = 0.;
85  for (j = nleft; j <= nrt; j++)
86  c += w[j] * (x[j] - a) * (x[j] - a);
87  if (std::sqrt(c) > 0.001 * range) {
88  b /= c;
89  // points are spread out enough to compute slope
90  for (j = nleft; j <= nrt; j++)
91  w[j] *= (b * (x[j] - a) + 1.);
92  }
93  }
94  ys = 0.;
95  for (j = nleft; j <= nrt; j++)
96  ys += w[j] * y[j];
97  }
98 }
99 
105 template <typename T>
106 void psort(T *x, size_t n, size_t k) {
107  for (size_t pL = 0, pR = n - 1; pL < pR;) {
108  const auto v = x[k];
109  size_t i;
110  size_t j;
111  for (i = pL, j = pR; i <= j;) {
112  while (x[i] < v) {
113  i++;
114  }
115  while (v < x[j]) {
116  j--;
117  }
118  if (i <= j) {
119  const auto w = x[i];
120  x[i++] = x[j];
121  x[j--] = w;
122  }
123  }
124  if (j < k) {
125  pL = i;
126  }
127  if (k < i) {
128  pR = j;
129  }
130  }
131 }
132 
138 template <typename T>
139 void lowess(const T *x, const T *y, size_t n, T *ys, T span, size_t iter,
140  T delta, T *rw, T *res) {
141  if (n < 2) {
142  ys[0] = y[0];
143  return;
144  }
145 
146  // nleft, nright, last, etc. must all be shifted to get rid of these:
147  x--;
148  y--;
149  ys--;
150 
151  // at least two, at most n points
152  constexpr size_t two = 2;
153  const auto ns =
154  std::max(two, std::min(n, static_cast<size_t>(span * n + 1e-7)));
155 
156  // robustness iterations
157  size_t iiter = 1;
158  while (iiter <= iter + 1) {
159  size_t nleft = 1;
160  size_t nright = ns;
161  size_t last = 0; // index of prev estimated point
162  size_t i = 1; // index of current point
163 
164  for (;;) {
165  if (nright < n) {
166  // move nleft, nright to right if radius decreases
167  const auto d1 = x[i] - x[nleft];
168  const auto d2 = x[nright + 1] - x[i];
169 
170  // if d1 <= d2 with x[nright+1] == x[nright], lowest fixes
171  if (d1 > d2) {
172  // radius will not decrease by move right
173  nleft++;
174  nright++;
175  continue;
176  }
177  }
178 
179  // fitted value at x[i]
180  const bool iterg1 = iiter > 1;
181  bool ok;
182  lowest(&x[1], &y[1], n, x[i], ys[i], nleft, nright, res, iterg1, rw, ok);
183  if (!ok)
184  ys[i] = y[i];
185 
186  // all weights zero copy over value (all rw==0)
187  if (last < i - 1) {
188  const auto denom = x[i] - x[last];
189 
190  // skipped points -- interpolate non-zero - proof?
191  for (auto j = last + 1; j < i; j++) {
192  const auto alpha = (x[j] - x[last]) / denom;
193  ys[j] = alpha * ys[i] + (1. - alpha) * ys[last];
194  }
195  }
196 
197  // last point actually estimated
198  last = i;
199 
200  // x coord of close points
201  const auto cut = x[last] + delta;
202  for (i = last + 1; i <= n; i++) {
203  if (x[i] > cut)
204  break;
205  if (x[i] == x[last]) {
206  ys[i] = ys[last];
207  last = i;
208  }
209  }
210  i = std::max(last + 1, i - 1);
211  if (last >= n)
212  break;
213  }
214 
215  // residuals
216  for (i = 0; i < n; i++)
217  res[i] = y[i + 1] - ys[i + 1];
218 
219  // compute robustness weights except last time
220  if (iiter > iter)
221  break;
222  for (i = 0; i < n; i++)
223  rw[i] = std::abs(res[i]);
224 
225  // compute cmad := 6 * median(rw[], n)
226  const auto m1 = n / 2;
227  // partial sort, for m1 & m2
228  // TODO(steinberg): consider replacing psort with std::partial_sort
229  psort(rw, n, m1);
230  T cmad;
231  if (n % 2 == 0) {
232  const auto m2 = n - m1 - 1;
233  psort(rw, n, m2);
234  cmad = 3. * (rw[m1] + rw[m2]);
235  } else { /* n odd */
236  cmad = 6. * rw[m1];
237  }
238 
239  const auto c9 = 0.999 * cmad;
240  const auto c1 = 0.001 * cmad;
241  for (i = 0; i < n; i++) {
242  if (cmad == 0.) {
243  // In this case, `r` cannot really be smaller than `c1` or `c2`, so we
244  // would set `rw[i] = 0` anyway. To avoid divisions by zero, we do this
245  // already here.
246  rw[i] = 0;
247  continue;
248  }
249  const auto r = std::abs(res[i]);
250 
251  if (r <= c1)
252  rw[i] = 1.;
253  else if (r <= c9)
254  rw[i] = (1. - (r / cmad) * (r / cmad)) * (1. - (r / cmad) * (r / cmad));
255  else
256  rw[i] = 0.;
257  }
258  iiter++;
259  }
260 }
261 
262 } // namespace lowess
263 
288 template <typename T>
289 std::vector<T> smooth(const std::vector<T> &x, const std::vector<T> &y,
290  T span = 2. / 3, size_t iter = 3, T delta = 0) {
291  assert(x.size() == y.size());
292  std::vector<T> result;
293  result.resize(x.size());
294  std::vector<T> rw;
295  rw.resize(x.size());
296  std::vector<T> res;
297  res.resize(x.size());
298  lowess::lowess(&x.front(), &y.front(), x.size(), &result.front(), span, iter,
299  delta, &rw.front(), &res.front());
300  return result;
301 }
302 
303 } // namespace smash
304 
305 #endif // SRC_INCLUDE_SMASH_LOWESS_H_
void lowest(const T *x, const T *y, size_t n, T xs, T &ys, size_t nleft, size_t nright, T *w, bool userw, T *rw, bool &ok)
Fit value at x[i] Based on R function lowest: Translated to C++ by C.
Definition: lowess.h:33
void psort(T *x, size_t n, size_t k)
Partial sort.
Definition: lowess.h:106
void lowess(const T *x, const T *y, size_t n, T *ys, T span, size_t iter, T delta, T *rw, T *res)
Lowess regression smoother.
Definition: lowess.h:139
constexpr int h1
h₁(1170).
constexpr int n
Neutron.
Definition: action.h:24
std::vector< T > smooth(const std::vector< T > &x, const std::vector< T > &y, T span=2./3, size_t iter=3, T delta=0)
Apply the LOWESS smoother (see the reference below) to the given data (x, y).
Definition: lowess.h:289