123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217 |
- <?php
- /* Ramer-Douglas-Peucker Algorithm
- *
- * input data is a curve or graph, and outputs a similar curve or graph that
- * contains fewer data points
- *
- *
- * 1) connect the points at the beginning (a) and end (b) of the curve to
- * form a chord (ab)
- * 2) determine the point of the curve with the largest distance (d)
- * perpendicular to `ab`
- * 3) compare this `d` with a threshold (s) value. done when `d` is less
- * than `s`, `ab` is an approximation of the curve
- * 4) otherwise `d` is greater than `s`, divide `ab` into two segments, (ac)
- * and (cb), for each chord follow steps `1,2,3`
- * 5) when all chords are approximations of their curves, connect to give
- * an approximation of the original curve
- */
- class RamerDouglasPeucker {
- public $save;
- public /* array of */ $points;
- public $called = 0;
- private $lookuptable = array();
- private $epsilon;
- public function __construct(array /* of points */ $curve) {
- // store a copy of the curve
- $this->points = $curve;
- $firstPoint; $lastPoint;
- $firstPoint = array(
- $curve[0][0],
- $curve[1][0]
- );
- $lastPoint = array(
- $curve[0][count($curve[0]) - 1],
- $curve[1][count($curve[0]) - 1]
- );
- // keep the first and last points (use the X-axis as key)
- $this->save = array(
- $firstPoint[0] => $firstPoint[1],
- $lastPoint[0] => $lastPoint[1]
- );
- }
- // this way is faster, but fails for vertical and horizontal lines
- private function perpendicularDistance(array $start, array $end, array $p, array $lut) {
- // this is just for profiling, RDP is slower than I thought it would be
- $this->called++;
- // rename start (x,y) and end (x,y) points
- $x0 = $start[0];
- $y0 = $start[1];
- $x1 = $end[0];
- $y1 = $end[1];
- /* for either a horizontal, or impossible vertical line, we can determine
- * the distance between the point and the line:
- *
- * slope is zero (horizontal): it *is* reasonable to expect this
- * slope is infinite (vertical): should not be possible, our use case
- * for radon measurements are done against time domain, meaning we
- * should not have two measurements for one sensor occurring at the
- * same time
- */
- if($x0 === $x1) return abs($p[0] - $x0);
- if($y0 === $y1) return abs($p[1] - $y0);
- /* as long as line is not horizontal or vertical we can save some time:
- * using the alternative formula:
- * mx + k = (x0 - x) / m + y0
- * this method saves about 25% of the time, so it is kinda worth it
- */
- if(/* save memory */ true) {
- $m = ($y1-$y0) / ($x1-$x0);
- return abs($p[1] - ($m * $p[0]) - $y0 + ($m * $x0)) / sqrt(1 + pow($m, 2));
- } // else
- // use a lookup table for slope calculation and sqrt(1 + pow($m, 2))
- if(!isset($this->lookuptable[$lut[0]][$lut[1]])) {
- $m = ($y1-$y0) / ($x1-$x0);
- $this->lookuptable[$lut[0]][$lut[1]] = array($m, sqrt(1 + pow($m, 2)));
- //echo "m: {$m} sqrt: {$this->lookuptable[$lut[0]][$lut[1]][1]}" .PHP_EOL;
- }
- $m = $this->lookuptable[$lut[0]][$lut[1]][0];
- $d = $this->lookuptable[$lut[0]][$lut[1]][1];
- return abs($p[1] - ($m * $p[0]) - $y0 + ($m * $x0)) / $d;
- }
- /* this rdp function works great but is kinda slow, 100K points takes
- * more than a second on common 2022 processor, profiling shows that
- * the `sqrt` function consumes some of the processing time, I suspect
- * that array_slice is using some resources
- */
- private function _rdp(array $curve) {
- // do no operations if curve is already a line segment
- if(3 > count($curve)) return;
- // find the point with the largest distance
- $maxDistance = 0;
- $index = 0;
- for($i = count($curve); $i--;) {
- if($this->epsilon < ($d = $this->perpendicularDistance(
- $curve[0], end($curve), $curve[$i]
- ))) {
- if($d > $maxDistance) {
- $maxDistance = $d;
- $index = $i;
- }
- };
- }
- // line is approximation of curve if epsilon not exceeded
- if(0 === $maxDistance) return;
- // otherwise split the curve at the point of greatest distance
- $this->save[$curve[$index][0]] = $curve[$index];
- $this->rdp(array_slice($curve, 0, $index));
- $this->rdp(array_slice($curve, $index));
- }
- // this is a rewrite for fewer CPU instructions per point
- private function rdp(array $curve, $start, $end) {
- $count = $end - $start + 1;
- // do no operations if curve is already a line segment
- if(3 > $count) return;
- /* to improve memory usage with PHP 5.3 the curve input parameter
- * may have points where xy components are stored separately, as an
- * array of x values, and an array of y values
- *
- * curve = array(array(of x ...), array(of y ...))
- */
- $firstPoint; $lastPoint;
- $firstPoint = array(
- $curve[0][$start],
- $curve[1][$start]
- );
- $lastPoint = array(
- $curve[0][$end],
- $curve[1][$end]
- );
- // find the point with the largest distance
- $maxDistance = 0;
- $index = 0;
- for($i = $count; $i--;) {
- $nthPoint = array(
- $curve[0][$start + $i],
- $curve[1][$start + $i]
- );
- if($this->epsilon < ($d = $this->perpendicularDistance(
- $firstPoint, $lastPoint, $nthPoint,
- /* lut */ array($start, $end)
- ))) {
- //var_dump($curve[$start],$curve[$end]);
- if($d > $maxDistance) {
- $maxDistance = $d;
- $index = $start + $i;
- }
- };
- }
- // line is approximation of curve if epsilon not exceeded
- if(0 === $maxDistance) return;
- //echo "maxDistance: {$maxDistance} start: {$start} end: {$index}" . PHP_EOL;
- /* to improve memory usage with PHP 5.3 the curve input parameter
- * may have points where xy components are stored separately, as an
- * array of x values, and an array of y values
- *
- * curve = array(array(of x ...), array(of y ...))
- */
- $x = $curve[0][$index];
- $y = $curve[1][$index];
- // otherwise split the curve at the point of greatest distance
- $this->save[$x] = $y;
- $this->rdp($curve, $start, $index);
- $this->rdp($curve, $index, $end);
- }
- /* determine if any points are remotely close to being outside the given
- * epsilon
- */
- private function quickScan() {
- // pick a y-axis reference point (use average of entire dataset)
- $y = array_sum($this->points[1]) / count($this->points[1]);
- $maximum = $y + $this->epsilon;
- $minimum = $y - $this->epsilon;
- for($i = count($this->points[1]); $i--;) {
- if($this->points[1][$i] > $maximum) return false;
- if($this->points[1][$i] < $minimum) return false;
- }
- return true;
- }
- public function getRDP($epsilon) {
- $this->epsilon = $epsilon;
- //$initial = ini_get("precision");
- //ini_set("precision", 3);
- /* sometimes I am seeing a run-time of a minute but then the RDP
- * algorithm returns two points, usually with very large epsilon
- * values - input is around 100K points
- */
- if($this->quickScan()) {
- if(defined("VERBOSE_DB_INSERT_LOGGING"))
- var_dump("[?] quickscan optimization triggered");
- // truncate input to reduce unnecessary calculations
- $endX = array_pop($this->points[0]);
- $endY = array_pop($this->points[1]);
- $this->points = array(
- array($this->points[0][0], $endX),
- array($this->points[1][0], $endY)
- );
- }
- $count = count($this->points[0]);
- $this->rdp($this->points, 0, $count - 1);
- //ini_set("precision", $initial);
- /* even though keys are integers PHP will store elements in the order
- * they were added to the array, we would still need to sort the array
- */
- ksort($this->save);
- return $this->save;
- //return array_values($this->save);
- }
- }
|