points = $curve; $firstPoint; $lastPoint; $firstPoint = array( $curve[0][0], $curve[1][0] ); $lastPoint = array( $curve[0][count($curve[0]) - 1], $curve[1][count($curve[0]) - 1] ); // keep the first and last points (use the X-axis as key) $this->save = array( $firstPoint[0] => $firstPoint[1], $lastPoint[0] => $lastPoint[1] ); } // this way is faster, but fails for vertical and horizontal lines private function perpendicularDistance(array $start, array $end, array $p, array $lut) { // this is just for profiling, RDP is slower than I thought it would be $this->called++; // rename start (x,y) and end (x,y) points $x0 = $start[0]; $y0 = $start[1]; $x1 = $end[0]; $y1 = $end[1]; /* for either a horizontal, or impossible vertical line, we can determine * the distance between the point and the line: * * slope is zero (horizontal): it *is* reasonable to expect this * slope is infinite (vertical): should not be possible, our use case * for radon measurements are done against time domain, meaning we * should not have two measurements for one sensor occurring at the * same time */ if($x0 === $x1) return abs($p[0] - $x0); if($y0 === $y1) return abs($p[1] - $y0); /* as long as line is not horizontal or vertical we can save some time: * using the alternative formula: * mx + k = (x0 - x) / m + y0 * this method saves about 25% of the time, so it is kinda worth it */ if(/* save memory */ true) { $m = ($y1-$y0) / ($x1-$x0); return abs($p[1] - ($m * $p[0]) - $y0 + ($m * $x0)) / sqrt(1 + pow($m, 2)); } // else // use a lookup table for slope calculation and sqrt(1 + pow($m, 2)) if(!isset($this->lookuptable[$lut[0]][$lut[1]])) { $m = ($y1-$y0) / ($x1-$x0); $this->lookuptable[$lut[0]][$lut[1]] = array($m, sqrt(1 + pow($m, 2))); //echo "m: {$m} sqrt: {$this->lookuptable[$lut[0]][$lut[1]][1]}" .PHP_EOL; } $m = $this->lookuptable[$lut[0]][$lut[1]][0]; $d = $this->lookuptable[$lut[0]][$lut[1]][1]; return abs($p[1] - ($m * $p[0]) - $y0 + ($m * $x0)) / $d; } /* this rdp function works great but is kinda slow, 100K points takes * more than a second on common 2022 processor, profiling shows that * the `sqrt` function consumes some of the processing time, I suspect * that array_slice is using some resources */ private function _rdp(array $curve) { // do no operations if curve is already a line segment if(3 > count($curve)) return; // find the point with the largest distance $maxDistance = 0; $index = 0; for($i = count($curve); $i--;) { if($this->epsilon < ($d = $this->perpendicularDistance( $curve[0], end($curve), $curve[$i] ))) { if($d > $maxDistance) { $maxDistance = $d; $index = $i; } }; } // line is approximation of curve if epsilon not exceeded if(0 === $maxDistance) return; // otherwise split the curve at the point of greatest distance $this->save[$curve[$index][0]] = $curve[$index]; $this->rdp(array_slice($curve, 0, $index)); $this->rdp(array_slice($curve, $index)); } // this is a rewrite for fewer CPU instructions per point private function rdp(array $curve, $start, $end) { $count = $end - $start + 1; // do no operations if curve is already a line segment if(3 > $count) return; /* to improve memory usage with PHP 5.3 the curve input parameter * may have points where xy components are stored separately, as an * array of x values, and an array of y values * * curve = array(array(of x ...), array(of y ...)) */ $firstPoint; $lastPoint; $firstPoint = array( $curve[0][$start], $curve[1][$start] ); $lastPoint = array( $curve[0][$end], $curve[1][$end] ); // find the point with the largest distance $maxDistance = 0; $index = 0; for($i = $count; $i--;) { $nthPoint = array( $curve[0][$start + $i], $curve[1][$start + $i] ); if($this->epsilon < ($d = $this->perpendicularDistance( $firstPoint, $lastPoint, $nthPoint, /* lut */ array($start, $end) ))) { //var_dump($curve[$start],$curve[$end]); if($d > $maxDistance) { $maxDistance = $d; $index = $start + $i; } }; } // line is approximation of curve if epsilon not exceeded if(0 === $maxDistance) return; //echo "maxDistance: {$maxDistance} start: {$start} end: {$index}" . PHP_EOL; /* to improve memory usage with PHP 5.3 the curve input parameter * may have points where xy components are stored separately, as an * array of x values, and an array of y values * * curve = array(array(of x ...), array(of y ...)) */ $x = $curve[0][$index]; $y = $curve[1][$index]; // otherwise split the curve at the point of greatest distance $this->save[$x] = $y; $this->rdp($curve, $start, $index); $this->rdp($curve, $index, $end); } /* determine if any points are remotely close to being outside the given * epsilon */ private function quickScan() { // pick a y-axis reference point (use average of entire dataset) $y = array_sum($this->points[1]) / count($this->points[1]); $maximum = $y + $this->epsilon; $minimum = $y - $this->epsilon; for($i = count($this->points[1]); $i--;) { if($this->points[1][$i] > $maximum) return false; if($this->points[1][$i] < $minimum) return false; } return true; } public function getRDP($epsilon) { $this->epsilon = $epsilon; //$initial = ini_get("precision"); //ini_set("precision", 3); /* sometimes I am seeing a run-time of a minute but then the RDP * algorithm returns two points, usually with very large epsilon * values - input is around 100K points */ if($this->quickScan()) { if(defined("VERBOSE_DB_INSERT_LOGGING")) var_dump("[?] quickscan optimization triggered"); // truncate input to reduce unnecessary calculations $endX = array_pop($this->points[0]); $endY = array_pop($this->points[1]); $this->points = array( array($this->points[0][0], $endX), array($this->points[1][0], $endY) ); } $count = count($this->points[0]); $this->rdp($this->points, 0, $count - 1); //ini_set("precision", $initial); /* even though keys are integers PHP will store elements in the order * they were added to the array, we would still need to sort the array */ ksort($this->save); return $this->save; //return array_values($this->save); } }