getSensor("R");
if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) {
/* sensors is about 100K, we do not need it now that we have the one
* we want
*/
unset($sensors);
}
$data = new DataSqueeze();
$before = memory_get_usage();
var_dump("using {$before} bytes before rows");
$rows = $data->getRows($sensor);
//$curve = $data->getRows($sensor, "radon", );
// convert sensor readings from array("time"=>x, "reading"=>y)
$rowCount = count($rows[0]);
for($i = $rowCount; $i--;) {
$row = NULL;
if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) {
$row = array(array_pop($rows[0]), array_pop($rows[1]));
} else {
$row = $rows[$i];
}
$curve[0][] = $row[0];
$curve[1][] = $row[1];
}
if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) {
/* even though it is empty in PHP 5.3 the `rows` variable was eating
* a MegaByte
*/
unset($rows, $data);
}
$after = memory_get_usage();
$diff = $after - $before;
var_dump("using {$after} bytes after [$rowCount] rows diff:({$diff} bytes)");
// rows were reversed by loop
$curve[0] = array_reverse($curve[0]);
$curve[1] = array_reverse($curve[1]);
} else {
for($i = 0; $i < 1000; $i++) {
$x = ($i * .01);
$var = array($i, exp(-$x) * cos(2 * M_PI * $x));
$curve[0][] = $var[0];
$curve[1][] = $var[1];
}
}
define("TEST_DATA_SQUEEZE_FIR", NULL);
if(defined("TEST_DATA_SQUEEZE_FIR")) {
// run the data through a simple FIR filter
$delay = /* number of samples until filter starts */ 10;
$period = /* time between samples in seconds */ 1;
/* [!] do not try to use one second for the period on a real sensor
*
* this would cause the filter to regenerate readings every second
* instead of whatever the normal period would be, RadonEye for example
* should be once every 300 seconds...
*/
if(defined("USE_SENSOR_READINGS")) {
$delay = $sensor->options->getSampleDepth();
$period = $sensor->options->getSamplePeriod();
var_dump("using delay: {$delay} samples, period: {$period} seconds");
}
$filter = new DataSqueeze();
// process data
$t0 = microtime(true);
$fCurve = $filter->simpleFIR($curve, $delay, $period);
/* [?] the filter returns floating point values, values read from the
* database are integers, the filtered results should undergo quantization
* to return them to the more familiar precision that they started from
*
* this will lose resolution that we gained from the filter but should be
* minimal, assuming the values stored in the database are not normalized
* eg: [-1..0..1]
*/
for($i=count($fCurve[0]);$i--;) $fCurve[1][$i] = intval($fCurve[1][$i]);
$t0 = "(cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)";
$filteredCount = count($fCurve);
var_dump("to process the filter[{$filteredCount}]: {$t0}
");
} else {
$fCurve = $curve;
}
/* [?] smaller epsilon means more points and higher accuracy, but costs more
* CPU resources and will take longer to transfer the data to the end user
*
* Emma points out that the epsilon will need to vary depending on the
* units for each graph, for example a graph with a range of 0-1 will have
* an epsilon less than one, whereas a graph with a range of 0-1000 should
* use a larger epsilon, most likely greater than ten.
*/
$epsilon = defined("USE_SENSOR_READINGS")
? $sensor->options->getSquishFactor()
: 0.01;
// initiate the object, let it copy the graph
$testing = new RamerDouglasPeucker($fCurve);
// run the algorithm
$t0 = microtime(true);
$rdpCurve = $testing->getRDP($epsilon);
/* test some other libraries:
*
* there were two people who ported simplify.js, who claims to have made a
* fast enough version of polyline simplification algorithm, they improved
* speed by first reducing the number of points before running RDP using
* radial coordinates
*
* I tested to see if the results were ten times faster but found that the
* reduced resolution provided by the modified algorithm gave at most a 30%
* improvement. [?] to test the libraries I needed to replace all source code
* `'x'` and `'y'` with `0` and `1` to match this code's use of points.
*
* The result of this testing and the slowness of RDP in general is leading me
* to want to cache results.
*
* these libraries are basically the same, but I tested both:
* https://raw.githubusercontent.com/aken/simplify-php/master/simplify.php
* https://raw.githubusercontent.com/andriichumak/simplify-php/master/Simplify.php
*/
//$testin9 = new Simplify();
//$rdpCurve = $testin9->run($curve, $epsilon);
$t0 = "(cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)";
// output some stats for developers
$count = count($curve[0]);
$count = array($count, count($rdpCurve));
echo "from {$count[0]} down to {$count[1]} using e = {$epsilon} {$t0}" . PHP_EOL;
echo "
the perpendicular distance routine was called: {$testing->called}" . PHP_EOL;
$t0 = microtime(true);
$last = 0;
for($i = 10000; $i--;) { $last = sqrt($i + $last); }
$t0 = "(cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)";
echo "
res = {$last}... running the sqrt function n times {$t0}" . PHP_EOL;
// convert to chartjs dataset, x values must be quoted
$data = array();
$data2 = array();
/* testing chartjs to see if grabbing the min and max values for the x-axis
* help with the problem where the values either smash together at x=0
* when x values are presented as integers, or
* where when I make them strings, they sometimes do not interlace, for small
* datasets they interlace, for large datasets they are serial, eg:
* [0...999...0...999]
*/
$min = PHP_INT_MAX; $max = 0; $ymin = 0; $ymax = 0;
$count = count($curve[0]);
$max = max($max, $curve[0][count($curve[0]) -1]);
$min = min($min, $curve[0][0]);
for($i = $count; $i--;) {
$o = intval(-1 + $count - $i);
$data[] = "{x:{$curve[0][$o]}, y:{$curve[1][$o]}}";
$ymax = max($ymax, $curve[1][$o]);
if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) {
unset($curve[0][$o], $curve[1][$o]);
}
}
foreach($rdpCurve as $k => $v) {
$data2[] = "{x:{$k}, y:{$v}}";
$max = max($max, $k);
$min = min($min, $k);
$ymax = max($ymax, $v);
}
//var_dump($data2);
/* testing:
* for a years worth of data setting increasing the epsilon to an order
* of magnitude higher gave us a reasonable (from 87886 readings to 1227
* 1227 for 'R' between `1635368506` through `1664229791`, a sensor that reads
* once per minute, it had several power failures, explaining the missing
* readings)
*
* The noise in the readings is preserved when using the above settings, if
* we increase the sample depth of the sensor to flatten the noise out, for a
* year, two magnitudes more sample depth, and decreasing the default
* epsilon by half magnitude gave a nice line
*
* [?] increasing sample depth also decreases the overall amplitude of the
* signal but gives a easier to read graph
*
* [?] increasing or decreasing the epsilon adds or removes load to the
* computer, there is a sweet spot that preserves enough detail while reducing
* the number of data points in the output, the sweet spot depends on sensor,
* it can be estimated by looking at deviations of the output graphs, we used
* a tenth of the standard deviation as starting values for each sensor, using
* a smaller value causes more work on the computer, but a tenth seemed like
* not too big of a load on CPU while guarantying that one would not notice
* a difference in the output graph.
*/
if(/* redo with an offset */ true) {
// offset the charts by the `max` amount so I can compare them
$data2 = array();
foreach($rdpCurve as $k => $v) {
$v += $ymax;
$data2[] = "{x:{$k}, y:{$v}}";
}
$ymax *= 2;
}
/* [!] warning about chartjs
*
* After spending about an hour trying to figure out why all points were
* being rendered on the x-axis at 0 (zero) I found that by changing the
* value of each point's x from integer to string caused the chart to render
* correctly. I checked the documentation and found that the data structure
* for a dataset is documented as supporting integer values:
* `dataset:[{ data: [{x: 10, y: 20}, {x: 15, y: null}] }]`
*
* But testing shows this not to work. I have added quotes around each `x`
* value, but that just seems wrong.
* `dataset:[{ data: [{x: '10', y: 20}, {x: '15', y: null}] }]`
*
* [[ an update to the above warning, I figured this out a couple weeks later,
* using `type: 'time'` worked as long as I included a `date` adapter for
* chartjs, apparently in version 3 they removed time type parsing, but
* they have a linear type, this worked fine without needing to put the
* single quotes around `x` values. ]]
*
*
* [?] chartjs performance options
* normalized: true - informs that data indices are unique and sorted
* parse: false - data has been prepared in their internal data format
* min / max scales - calculate the minimum and maximum xy scales
* minRotation / maxRotation - set to same value (rotation of x axis labels)
*
*/
echo "