getSensor("R"); if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) { /* sensors is about 100K, we do not need it now that we have the one * we want */ unset($sensors); } $data = new DataSqueeze(); $before = memory_get_usage(); var_dump("using {$before} bytes before rows"); $rows = $data->getRows($sensor); //$curve = $data->getRows($sensor, "radon", ); // convert sensor readings from array("time"=>x, "reading"=>y) $rowCount = count($rows[0]); for($i = $rowCount; $i--;) { $row = NULL; if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) { $row = array(array_pop($rows[0]), array_pop($rows[1])); } else { $row = $rows[$i]; } $curve[0][] = $row[0]; $curve[1][] = $row[1]; } if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) { /* even though it is empty in PHP 5.3 the `rows` variable was eating * a MegaByte */ unset($rows, $data); } $after = memory_get_usage(); $diff = $after - $before; var_dump("using {$after} bytes after [$rowCount] rows diff:({$diff} bytes)"); // rows were reversed by loop $curve[0] = array_reverse($curve[0]); $curve[1] = array_reverse($curve[1]); } else { for($i = 0; $i < 1000; $i++) { $x = ($i * .01); $var = array($i, exp(-$x) * cos(2 * M_PI * $x)); $curve[0][] = $var[0]; $curve[1][] = $var[1]; } } define("TEST_DATA_SQUEEZE_FIR", NULL); if(defined("TEST_DATA_SQUEEZE_FIR")) { // run the data through a simple FIR filter $delay = /* number of samples until filter starts */ 10; $period = /* time between samples in seconds */ 1; /* [!] do not try to use one second for the period on a real sensor * * this would cause the filter to regenerate readings every second * instead of whatever the normal period would be, RadonEye for example * should be once every 300 seconds... */ if(defined("USE_SENSOR_READINGS")) { $delay = $sensor->options->getSampleDepth(); $period = $sensor->options->getSamplePeriod(); var_dump("using delay: {$delay} samples, period: {$period} seconds"); } $filter = new DataSqueeze(); // process data $t0 = microtime(true); $fCurve = $filter->simpleFIR($curve, $delay, $period); /* [?] the filter returns floating point values, values read from the * database are integers, the filtered results should undergo quantization * to return them to the more familiar precision that they started from * * this will lose resolution that we gained from the filter but should be * minimal, assuming the values stored in the database are not normalized * eg: [-1..0..1] */ for($i=count($fCurve[0]);$i--;) $fCurve[1][$i] = intval($fCurve[1][$i]); $t0 = "(cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)"; $filteredCount = count($fCurve); var_dump("to process the filter[{$filteredCount}]: {$t0}
"); } else { $fCurve = $curve; } /* [?] smaller epsilon means more points and higher accuracy, but costs more * CPU resources and will take longer to transfer the data to the end user * * Emma points out that the epsilon will need to vary depending on the * units for each graph, for example a graph with a range of 0-1 will have * an epsilon less than one, whereas a graph with a range of 0-1000 should * use a larger epsilon, most likely greater than ten. */ $epsilon = defined("USE_SENSOR_READINGS") ? $sensor->options->getSquishFactor() : 0.01; // initiate the object, let it copy the graph $testing = new RamerDouglasPeucker($fCurve); // run the algorithm $t0 = microtime(true); $rdpCurve = $testing->getRDP($epsilon); /* test some other libraries: * * there were two people who ported simplify.js, who claims to have made a * fast enough version of polyline simplification algorithm, they improved * speed by first reducing the number of points before running RDP using * radial coordinates * * I tested to see if the results were ten times faster but found that the * reduced resolution provided by the modified algorithm gave at most a 30% * improvement. [?] to test the libraries I needed to replace all source code * `'x'` and `'y'` with `0` and `1` to match this code's use of points. * * The result of this testing and the slowness of RDP in general is leading me * to want to cache results. * * these libraries are basically the same, but I tested both: * https://raw.githubusercontent.com/aken/simplify-php/master/simplify.php * https://raw.githubusercontent.com/andriichumak/simplify-php/master/Simplify.php */ //$testin9 = new Simplify(); //$rdpCurve = $testin9->run($curve, $epsilon); $t0 = "(cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)"; // output some stats for developers $count = count($curve[0]); $count = array($count, count($rdpCurve)); echo "from {$count[0]} down to {$count[1]} using e = {$epsilon} {$t0}" . PHP_EOL; echo "
the perpendicular distance routine was called: {$testing->called}" . PHP_EOL; $t0 = microtime(true); $last = 0; for($i = 10000; $i--;) { $last = sqrt($i + $last); } $t0 = "(cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)"; echo "
res = {$last}... running the sqrt function n times {$t0}" . PHP_EOL; // convert to chartjs dataset, x values must be quoted $data = array(); $data2 = array(); /* testing chartjs to see if grabbing the min and max values for the x-axis * help with the problem where the values either smash together at x=0 * when x values are presented as integers, or * where when I make them strings, they sometimes do not interlace, for small * datasets they interlace, for large datasets they are serial, eg: * [0...999...0...999] */ $min = PHP_INT_MAX; $max = 0; $ymin = 0; $ymax = 0; $count = count($curve[0]); $max = max($max, $curve[0][count($curve[0]) -1]); $min = min($min, $curve[0][0]); for($i = $count; $i--;) { $o = intval(-1 + $count - $i); $data[] = "{x:{$curve[0][$o]}, y:{$curve[1][$o]}}"; $ymax = max($ymax, $curve[1][$o]); if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) { unset($curve[0][$o], $curve[1][$o]); } } foreach($rdpCurve as $k => $v) { $data2[] = "{x:{$k}, y:{$v}}"; $max = max($max, $k); $min = min($min, $k); $ymax = max($ymax, $v); } //var_dump($data2); /* testing: * for a years worth of data setting increasing the epsilon to an order * of magnitude higher gave us a reasonable (from 87886 readings to 1227 * 1227 for 'R' between `1635368506` through `1664229791`, a sensor that reads * once per minute, it had several power failures, explaining the missing * readings) * * The noise in the readings is preserved when using the above settings, if * we increase the sample depth of the sensor to flatten the noise out, for a * year, two magnitudes more sample depth, and decreasing the default * epsilon by half magnitude gave a nice line * * [?] increasing sample depth also decreases the overall amplitude of the * signal but gives a easier to read graph * * [?] increasing or decreasing the epsilon adds or removes load to the * computer, there is a sweet spot that preserves enough detail while reducing * the number of data points in the output, the sweet spot depends on sensor, * it can be estimated by looking at deviations of the output graphs, we used * a tenth of the standard deviation as starting values for each sensor, using * a smaller value causes more work on the computer, but a tenth seemed like * not too big of a load on CPU while guarantying that one would not notice * a difference in the output graph. */ if(/* redo with an offset */ true) { // offset the charts by the `max` amount so I can compare them $data2 = array(); foreach($rdpCurve as $k => $v) { $v += $ymax; $data2[] = "{x:{$k}, y:{$v}}"; } $ymax *= 2; } /* [!] warning about chartjs * * After spending about an hour trying to figure out why all points were * being rendered on the x-axis at 0 (zero) I found that by changing the * value of each point's x from integer to string caused the chart to render * correctly. I checked the documentation and found that the data structure * for a dataset is documented as supporting integer values: * `dataset:[{ data: [{x: 10, y: 20}, {x: 15, y: null}] }]` * * But testing shows this not to work. I have added quotes around each `x` * value, but that just seems wrong. * `dataset:[{ data: [{x: '10', y: 20}, {x: '15', y: null}] }]` * * [[ an update to the above warning, I figured this out a couple weeks later, * using `type: 'time'` worked as long as I included a `date` adapter for * chartjs, apparently in version 3 they removed time type parsing, but * they have a linear type, this worked fine without needing to put the * single quotes around `x` values. ]] * * * [?] chartjs performance options * normalized: true - informs that data indices are unique and sorted * parse: false - data has been prepared in their internal data format * min / max scales - calculate the minimum and maximum xy scales * minRotation / maxRotation - set to same value (rotation of x axis labels) * */ echo "
"; function getStandardDeviations() { /* for whatever reason users always seem to request all data from all time * even though they never use it * * the time to handle the user's request is negligible, but the time for them * to download the results takes a long time, both because it is a lot of data * and they are using a horrible transfer mechanism, HTTP * * HTTP alone slows down the transfer by at least 10x, then they are asking for * thousands upon thousands of data-points, but then, once they get all the * data, the screen that they use cannot even display all the data-points so * they just show a single data-point. together these two work to reduce the * response speed of the computer by at least 4 magnitudes. That's amazing! */ // testing $sensors = new TheInternet(); $sensor = $sensors->getSensor("A"); $data = new DataSqueeze(); /* using standard deviation can give us a baseline for what reasonable * values we can use for Ramer-Douglas-Peucker epsilon */ foreach(array('A', 'B', 'C', 'R', 'S') as $v) { var_dump("std deviation {$v}", $data->getStdDeviation($sensors->getSensor($v))); } /* get standard deviations for each type of reading for each of the BME sensors * * use these values to help determine reasonable epsilon for * Ramer-Douglas-Peucker */ $types = explode(" ", BME680_COLUMNS); array_shift(/* remove "time" from types */ $types); array_shift(/* remove "sensor" from types */ $types); foreach($types as $t) { foreach(array("H", "I", "J") as $s) { var_dump("std deviation {$t}: {$s}", $data->getStdDeviation($sensors->getSensor($s), $t) ); } } } require_once("fantastic.php"); function testDataset() { // TheInternet has a list of all the sensor clusters $internet = new TheInternet(); // expand the list into all possible views of the data (Datasets) $datasets = Dataset::makeAllDatasetsFromInternet($internet); // we need an interface to get data from the database $data = new DataSqueeze(); // just use one of the datasets for testing $testSensor = $datasets[0]; // each dataset knows information about it's sensor var_dump($testSensor->getTypeName()); $rows = $data->getRows($testSensor->parentSensorCluster, $testSensor->getTypeName()); /* for FIR, period is fixed, delay can be modified to change how much * filtering is performed by the FIR, in general (but not always) users * will probably benefit from more filtering when "zoomed" out to give a * better idea of trends */ $delay = $testSensor->parentSensorCluster->options->getSampleDepth(); $period = $testSensor->parentSensorCluster->options->getSamplePeriod(); $filtered = $data->simpleFIR($rows, $delay, $period); /* filtered data is should be run through the RDP algorithm to reduce * the number of points needed to represent a given line, one can be more * aggressive when heavy filtering is performed. Aggressive just means * a higher degree of leeway is given to the RDP algorithm when considering * if a line is considered near a sampled /filtered data point */ $rdp = new RamerDouglasPeucker($filtered); $final = $rdp->getRDP( $testSensor->parentSensorCluster->options->getSquishFactor( $testSensor->getTypeName() ) ); var_dump($final); var_dump(count($datasets)); }