rdp.php 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. <?php
  2. /* Ramer-Douglas-Peucker Algorithm
  3. *
  4. * input data is a curve or graph, and outputs a similar curve or graph that
  5. * contains fewer data points
  6. *
  7. *
  8. * 1) connect the points at the beginning (a) and end (b) of the curve to
  9. * form a chord (ab)
  10. * 2) determine the point of the curve with the largest distance (d)
  11. * perpendicular to `ab`
  12. * 3) compare this `d` with a threshold (s) value. done when `d` is less
  13. * than `s`, `ab` is an approximation of the curve
  14. * 4) otherwise `d` is greater than `s`, divide `ab` into two segments, (ac)
  15. * and (cb), for each chord follow steps `1,2,3`
  16. * 5) when all chords are approximations of their curves, connect to give
  17. * an approximation of the original curve
  18. */
  19. class RamerDouglasPeucker {
  20. public $save;
  21. public /* array of */ $points;
  22. public $called = 0;
  23. private $lookuptable = array();
  24. private $epsilon;
  25. public function __construct(array /* of points */ $curve) {
  26. // store a copy of the curve
  27. $this->points = $curve;
  28. $firstPoint; $lastPoint;
  29. $firstPoint = array(
  30. $curve[0][0],
  31. $curve[1][0]
  32. );
  33. $lastPoint = array(
  34. $curve[0][count($curve[0]) - 1],
  35. $curve[1][count($curve[0]) - 1]
  36. );
  37. // keep the first and last points (use the X-axis as key)
  38. $this->save = array(
  39. $firstPoint[0] => $firstPoint[1],
  40. $lastPoint[0] => $lastPoint[1]
  41. );
  42. }
  43. // this way is faster, but fails for vertical and horizontal lines
  44. private function perpendicularDistance(array $start, array $end, array $p, array $lut) {
  45. // this is just for profiling, RDP is slower than I thought it would be
  46. $this->called++;
  47. // rename start (x,y) and end (x,y) points
  48. $x0 = $start[0];
  49. $y0 = $start[1];
  50. $x1 = $end[0];
  51. $y1 = $end[1];
  52. /* for either a horizontal, or impossible vertical line, we can determine
  53. * the distance between the point and the line:
  54. *
  55. * slope is zero (horizontal): it *is* reasonable to expect this
  56. * slope is infinite (vertical): should not be possible, our use case
  57. * for radon measurements are done against time domain, meaning we
  58. * should not have two measurements for one sensor occurring at the
  59. * same time
  60. */
  61. if($x0 === $x1) return abs($p[0] - $x0);
  62. if($y0 === $y1) return abs($p[1] - $y0);
  63. /* as long as line is not horizontal or vertical we can save some time:
  64. * using the alternative formula:
  65. * mx + k = (x0 - x) / m + y0
  66. * this method saves about 25% of the time, so it is kinda worth it
  67. */
  68. if(/* save memory */ true) {
  69. $m = ($y1-$y0) / ($x1-$x0);
  70. return abs($p[1] - ($m * $p[0]) - $y0 + ($m * $x0)) / sqrt(1 + pow($m, 2));
  71. } // else
  72. // use a lookup table for slope calculation and sqrt(1 + pow($m, 2))
  73. if(!isset($this->lookuptable[$lut[0]][$lut[1]])) {
  74. $m = ($y1-$y0) / ($x1-$x0);
  75. $this->lookuptable[$lut[0]][$lut[1]] = array($m, sqrt(1 + pow($m, 2)));
  76. //echo "m: {$m} sqrt: {$this->lookuptable[$lut[0]][$lut[1]][1]}" .PHP_EOL;
  77. }
  78. $m = $this->lookuptable[$lut[0]][$lut[1]][0];
  79. $d = $this->lookuptable[$lut[0]][$lut[1]][1];
  80. return abs($p[1] - ($m * $p[0]) - $y0 + ($m * $x0)) / $d;
  81. }
  82. /* this rdp function works great but is kinda slow, 100K points takes
  83. * more than a second on common 2022 processor, profiling shows that
  84. * the `sqrt` function consumes some of the processing time, I suspect
  85. * that array_slice is using some resources
  86. */
  87. private function _rdp(array $curve) {
  88. // do no operations if curve is already a line segment
  89. if(3 > count($curve)) return;
  90. // find the point with the largest distance
  91. $maxDistance = 0;
  92. $index = 0;
  93. for($i = count($curve); $i--;) {
  94. if($this->epsilon < ($d = $this->perpendicularDistance(
  95. $curve[0], end($curve), $curve[$i]
  96. ))) {
  97. if($d > $maxDistance) {
  98. $maxDistance = $d;
  99. $index = $i;
  100. }
  101. };
  102. }
  103. // line is approximation of curve if epsilon not exceeded
  104. if(0 === $maxDistance) return;
  105. // otherwise split the curve at the point of greatest distance
  106. $this->save[$curve[$index][0]] = $curve[$index];
  107. $this->rdp(array_slice($curve, 0, $index));
  108. $this->rdp(array_slice($curve, $index));
  109. }
  110. // this is a rewrite for fewer CPU instructions per point
  111. private function rdp(array $curve, $start, $end) {
  112. $count = $end - $start + 1;
  113. // do no operations if curve is already a line segment
  114. if(3 > $count) return;
  115. /* to improve memory usage with PHP 5.3 the curve input parameter
  116. * may have points where xy components are stored separately, as an
  117. * array of x values, and an array of y values
  118. *
  119. * curve = array(array(of x ...), array(of y ...))
  120. */
  121. $firstPoint; $lastPoint;
  122. $firstPoint = array(
  123. $curve[0][$start],
  124. $curve[1][$start]
  125. );
  126. $lastPoint = array(
  127. $curve[0][$end],
  128. $curve[1][$end]
  129. );
  130. // find the point with the largest distance
  131. $maxDistance = 0;
  132. $index = 0;
  133. for($i = $count; $i--;) {
  134. $nthPoint = array(
  135. $curve[0][$start + $i],
  136. $curve[1][$start + $i]
  137. );
  138. if($this->epsilon < ($d = $this->perpendicularDistance(
  139. $firstPoint, $lastPoint, $nthPoint,
  140. /* lut */ array($start, $end)
  141. ))) {
  142. //var_dump($curve[$start],$curve[$end]);
  143. if($d > $maxDistance) {
  144. $maxDistance = $d;
  145. $index = $start + $i;
  146. }
  147. };
  148. }
  149. // line is approximation of curve if epsilon not exceeded
  150. if(0 === $maxDistance) return;
  151. //echo "maxDistance: {$maxDistance} start: {$start} end: {$index}" . PHP_EOL;
  152. /* to improve memory usage with PHP 5.3 the curve input parameter
  153. * may have points where xy components are stored separately, as an
  154. * array of x values, and an array of y values
  155. *
  156. * curve = array(array(of x ...), array(of y ...))
  157. */
  158. $x = $curve[0][$index];
  159. $y = $curve[1][$index];
  160. // otherwise split the curve at the point of greatest distance
  161. $this->save[$x] = $y;
  162. $this->rdp($curve, $start, $index);
  163. $this->rdp($curve, $index, $end);
  164. }
  165. /* determine if any points are remotely close to being outside the given
  166. * epsilon
  167. */
  168. private function quickScan() {
  169. // pick a y-axis reference point (use average of entire dataset)
  170. $y = array_sum($this->points[1]) / count($this->points[1]);
  171. $maximum = $y + $this->epsilon;
  172. $minimum = $y - $this->epsilon;
  173. for($i = count($this->points[1]); $i--;) {
  174. if($this->points[1][$i] > $maximum) return false;
  175. if($this->points[1][$i] < $minimum) return false;
  176. }
  177. return true;
  178. }
  179. public function getRDP($epsilon) {
  180. $this->epsilon = $epsilon;
  181. //$initial = ini_get("precision");
  182. //ini_set("precision", 3);
  183. /* sometimes I am seeing a run-time of a minute but then the RDP
  184. * algorithm returns two points, usually with very large epsilon
  185. * values - input is around 100K points
  186. */
  187. if($this->quickScan()) {
  188. if(defined("VERBOSE_DB_INSERT_LOGGING"))
  189. var_dump("[?] quickscan optimization triggered");
  190. // truncate input to reduce unnecessary calculations
  191. $endX = array_pop($this->points[0]);
  192. $endY = array_pop($this->points[1]);
  193. $this->points = array(
  194. array($this->points[0][0], $endX),
  195. array($this->points[1][0], $endY)
  196. );
  197. }
  198. $count = count($this->points[0]);
  199. $this->rdp($this->points, 0, $count - 1);
  200. //ini_set("precision", $initial);
  201. /* even though keys are integers PHP will store elements in the order
  202. * they were added to the array, we would still need to sort the array
  203. */
  204. ksort($this->save);
  205. return $this->save;
  206. //return array_values($this->save);
  207. }
  208. }