123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396 |
- <?php
- //require_once("aken-simplify.php");
- //require_once("andrii-simplify.php");
- require_once("db-frontend.php");
- require_once("rdp.php");
- //getStandardDeviations();
- //testDataset();
- //testCacheDatabase();
- function testCacheDatabase() {
- // tests getting data from the cache, either all or a time range
- if(false) var_dump(CACHE_DB::getCachedData("A", "radon"));
- if(false) var_dump(CACHE_DB::getCachedData(
- "A",
- "radon",
- time() - (4 /* days */ * 24 * 60 * 60),
- time()
- ));
- // tests creating or refreshing the cache based on a time range
- if(false) CACHE_DB::createCacheDb(
- time() - (1 /* days */ * 24 * 60 * 60),
- time()
- ); else CACHE_DB::createCacheDb(0, PHP_INT_MAX, true);
- exit();
- }
- /* pulling a years worth of readings gets close to php.ini's max memory
- * of 128MB, with a single sensor reading every five minutes and a
- * year's worth of reading somehow PHP uses more than 75 MegaBytes
- *
- * try to free up memory as soon as we no longer need it
- */
- define("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE", NULL);
- // draw an interesting curve to test RDP against
- $curve = array(array(/* x value array */), array(/* y value array */));
- // sensor might get used in another code block
- $sensor;
- define("USE_SENSOR_READINGS", NULL);
- if(defined("USE_SENSOR_READINGS")) {
- // testing
- $sensors = new TheInternet();
- // overwrite demo curve
- $sensor = $sensors->getSensor("R");
- if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) {
- /* sensors is about 100K, we do not need it now that we have the one
- * we want
- */
- unset($sensors);
- }
- $data = new DataSqueeze();
- $before = memory_get_usage();
- var_dump("using {$before} bytes before rows");
- $rows = $data->getRows($sensor);
- //$curve = $data->getRows($sensor, "radon", );
- // convert sensor readings from array("time"=>x, "reading"=>y)
- $rowCount = count($rows[0]);
- for($i = $rowCount; $i--;) {
- $row = NULL;
- if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) {
- $row = array(array_pop($rows[0]), array_pop($rows[1]));
- } else {
- $row = $rows[$i];
- }
- $curve[0][] = $row[0];
- $curve[1][] = $row[1];
- }
- if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) {
- /* even though it is empty in PHP 5.3 the `rows` variable was eating
- * a MegaByte
- */
- unset($rows, $data);
- }
- $after = memory_get_usage();
- $diff = $after - $before;
- var_dump("using {$after} bytes after [$rowCount] rows diff:({$diff} bytes)");
- // rows were reversed by loop
- $curve[0] = array_reverse($curve[0]);
- $curve[1] = array_reverse($curve[1]);
- } else {
- for($i = 0; $i < 1000; $i++) {
- $x = ($i * .01);
- $var = array($i, exp(-$x) * cos(2 * M_PI * $x));
- $curve[0][] = $var[0];
- $curve[1][] = $var[1];
- }
- }
- define("TEST_DATA_SQUEEZE_FIR", NULL);
- if(defined("TEST_DATA_SQUEEZE_FIR")) {
- // run the data through a simple FIR filter
- $delay = /* number of samples until filter starts */ 10;
- $period = /* time between samples in seconds */ 1;
- /* [!] do not try to use one second for the period on a real sensor
- *
- * this would cause the filter to regenerate readings every second
- * instead of whatever the normal period would be, RadonEye for example
- * should be once every 300 seconds...
- */
- if(defined("USE_SENSOR_READINGS")) {
- $delay = $sensor->options->getSampleDepth();
- $period = $sensor->options->getSamplePeriod();
- var_dump("using delay: {$delay} samples, period: {$period} seconds");
- }
- $filter = new DataSqueeze();
- // process data
- $t0 = microtime(true);
- $fCurve = $filter->simpleFIR($curve, $delay, $period);
- /* [?] the filter returns floating point values, values read from the
- * database are integers, the filtered results should undergo quantization
- * to return them to the more familiar precision that they started from
- *
- * this will lose resolution that we gained from the filter but should be
- * minimal, assuming the values stored in the database are not normalized
- * eg: [-1..0..1]
- */
- for($i=count($fCurve[0]);$i--;) $fCurve[1][$i] = intval($fCurve[1][$i]);
- $t0 = "(cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)";
- $filteredCount = count($fCurve);
- var_dump("to process the filter[{$filteredCount}]: {$t0}<br>");
- } else {
- $fCurve = $curve;
- }
- /* [?] smaller epsilon means more points and higher accuracy, but costs more
- * CPU resources and will take longer to transfer the data to the end user
- *
- * Emma points out that the epsilon will need to vary depending on the
- * units for each graph, for example a graph with a range of 0-1 will have
- * an epsilon less than one, whereas a graph with a range of 0-1000 should
- * use a larger epsilon, most likely greater than ten.
- */
- $epsilon = defined("USE_SENSOR_READINGS")
- ? $sensor->options->getSquishFactor()
- : 0.01;
- // initiate the object, let it copy the graph
- $testing = new RamerDouglasPeucker($fCurve);
- // run the algorithm
- $t0 = microtime(true);
- $rdpCurve = $testing->getRDP($epsilon);
- /* test some other libraries:
- *
- * there were two people who ported simplify.js, who claims to have made a
- * fast enough version of polyline simplification algorithm, they improved
- * speed by first reducing the number of points before running RDP using
- * radial coordinates
- *
- * I tested to see if the results were ten times faster but found that the
- * reduced resolution provided by the modified algorithm gave at most a 30%
- * improvement. [?] to test the libraries I needed to replace all source code
- * `'x'` and `'y'` with `0` and `1` to match this code's use of points.
- *
- * The result of this testing and the slowness of RDP in general is leading me
- * to want to cache results.
- *
- * these libraries are basically the same, but I tested both:
- * https://raw.githubusercontent.com/aken/simplify-php/master/simplify.php
- * https://raw.githubusercontent.com/andriichumak/simplify-php/master/Simplify.php
- */
- //$testin9 = new Simplify();
- //$rdpCurve = $testin9->run($curve, $epsilon);
- $t0 = "(cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)";
- // output some stats for developers
- $count = count($curve[0]);
- $count = array($count, count($rdpCurve));
- echo "from {$count[0]} down to {$count[1]} using e = {$epsilon} {$t0}" . PHP_EOL;
- echo "<br>the perpendicular distance routine was called: {$testing->called}" . PHP_EOL;
- $t0 = microtime(true);
- $last = 0;
- for($i = 10000; $i--;) { $last = sqrt($i + $last); }
- $t0 = "(cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)";
- echo "<br>res = {$last}... running the sqrt function n times {$t0}" . PHP_EOL;
- // convert to chartjs dataset, x values must be quoted
- $data = array();
- $data2 = array();
- /* testing chartjs to see if grabbing the min and max values for the x-axis
- * help with the problem where the values either smash together at x=0
- * when x values are presented as integers, or
- * where when I make them strings, they sometimes do not interlace, for small
- * datasets they interlace, for large datasets they are serial, eg:
- * [0...999...0...999]
- */
- $min = PHP_INT_MAX; $max = 0; $ymin = 0; $ymax = 0;
- $count = count($curve[0]);
- $max = max($max, $curve[0][count($curve[0]) -1]);
- $min = min($min, $curve[0][0]);
- for($i = $count; $i--;) {
- $o = intval(-1 + $count - $i);
- $data[] = "{x:{$curve[0][$o]}, y:{$curve[1][$o]}}";
- $ymax = max($ymax, $curve[1][$o]);
- if(defined("CLEAN_UP_MEMORY_AS_FAST_AS_POSSIBLE")) {
- unset($curve[0][$o], $curve[1][$o]);
- }
- }
- foreach($rdpCurve as $k => $v) {
- $data2[] = "{x:{$k}, y:{$v}}";
- $max = max($max, $k);
- $min = min($min, $k);
- $ymax = max($ymax, $v);
- }
- //var_dump($data2);
- /* testing:
- * for a years worth of data setting increasing the epsilon to an order
- * of magnitude higher gave us a reasonable (from 87886 readings to 1227
- * 1227 for 'R' between `1635368506` through `1664229791`, a sensor that reads
- * once per minute, it had several power failures, explaining the missing
- * readings)
- *
- * The noise in the readings is preserved when using the above settings, if
- * we increase the sample depth of the sensor to flatten the noise out, for a
- * year, two magnitudes more sample depth, and decreasing the default
- * epsilon by half magnitude gave a nice line
- *
- * [?] increasing sample depth also decreases the overall amplitude of the
- * signal but gives a easier to read graph
- *
- * [?] increasing or decreasing the epsilon adds or removes load to the
- * computer, there is a sweet spot that preserves enough detail while reducing
- * the number of data points in the output, the sweet spot depends on sensor,
- * it can be estimated by looking at deviations of the output graphs, we used
- * a tenth of the standard deviation as starting values for each sensor, using
- * a smaller value causes more work on the computer, but a tenth seemed like
- * not too big of a load on CPU while guarantying that one would not notice
- * a difference in the output graph.
- */
- if(/* redo with an offset */ true) {
- // offset the charts by the `max` amount so I can compare them
- $data2 = array();
- foreach($rdpCurve as $k => $v) {
- $v += $ymax;
- $data2[] = "{x:{$k}, y:{$v}}";
- }
- $ymax *= 2;
- }
- /* [!] warning about chartjs
- *
- * After spending about an hour trying to figure out why all points were
- * being rendered on the x-axis at 0 (zero) I found that by changing the
- * value of each point's x from integer to string caused the chart to render
- * correctly. I checked the documentation and found that the data structure
- * for a dataset is documented as supporting integer values:
- * `dataset:[{ data: [{x: 10, y: 20}, {x: 15, y: null}] }]`
- *
- * But testing shows this not to work. I have added quotes around each `x`
- * value, but that just seems wrong.
- * `dataset:[{ data: [{x: '10', y: 20}, {x: '15', y: null}] }]`
- *
- * [[ an update to the above warning, I figured this out a couple weeks later,
- * using `type: 'time'` worked as long as I included a `date` adapter for
- * chartjs, apparently in version 3 they removed time type parsing, but
- * they have a linear type, this worked fine without needing to put the
- * single quotes around `x` values. ]]
- *
- *
- * [?] chartjs performance options
- * normalized: true - informs that data indices are unique and sorted
- * parse: false - data has been prepared in their internal data format
- * min / max scales - calculate the minimum and maximum xy scales
- * minRotation / maxRotation - set to same value (rotation of x axis labels)
- *
- */
- echo "
- <div><canvas id='myChart'></canvas></div>
- <script src='http://cdn.jsdelivr.net/npm/chart.js'></script>
- <script>
- const config = {
- type: 'line',
- data: {
- datasets: [
- {label: 'sinusoid', data: [" . implode(",", $data) . "]},
- {label: 'rdp', data: [" . implode(",", $data2) . "],
- borderColor: 'rgba(255,0,0,0.5)'},
- ]
- },
- options: {
- normalized: true,
- parsing: false,
- animation: false,
- scales:{
- x: {
- type: 'linear',
- ticks: {
- maxRotation: 0,
- minRotation: 0,
- },
- min: {$min},
- max: {$max},
- },
- y: {
- min: {$ymin},
- max: {$ymax},
- }
- }
- }
- };
- const myChart = new Chart(document.getElementById('myChart'), config);
- </script>
- ";
- function getStandardDeviations() {
- /* for whatever reason users always seem to request all data from all time
- * even though they never use it
- *
- * the time to handle the user's request is negligible, but the time for them
- * to download the results takes a long time, both because it is a lot of data
- * and they are using a horrible transfer mechanism, HTTP
- *
- * HTTP alone slows down the transfer by at least 10x, then they are asking for
- * thousands upon thousands of data-points, but then, once they get all the
- * data, the screen that they use cannot even display all the data-points so
- * they just show a single data-point. together these two work to reduce the
- * response speed of the computer by at least 4 magnitudes. That's amazing!
- */
- // testing
- $sensors = new TheInternet();
- $sensor = $sensors->getSensor("A");
- $data = new DataSqueeze();
- /* using standard deviation can give us a baseline for what reasonable
- * values we can use for Ramer-Douglas-Peucker epsilon
- */
- foreach(array('A', 'B', 'C', 'R', 'S') as $v) {
- var_dump("std deviation {$v}", $data->getStdDeviation($sensors->getSensor($v)));
- }
- /* get standard deviations for each type of reading for each of the BME sensors
- *
- * use these values to help determine reasonable epsilon for
- * Ramer-Douglas-Peucker
- */
- $types = explode(" ", BME680_COLUMNS);
- array_shift(/* remove "time" from types */ $types);
- array_shift(/* remove "sensor" from types */ $types);
- foreach($types as $t) {
- foreach(array("H", "I", "J") as $s) {
- var_dump("std deviation {$t}: {$s}",
- $data->getStdDeviation($sensors->getSensor($s), $t)
- );
- }
- }
- }
- require_once("fantastic.php");
- function testDataset() {
- // TheInternet has a list of all the sensor clusters
- $internet = new TheInternet();
- // expand the list into all possible views of the data (Datasets)
- $datasets = Dataset::makeAllDatasetsFromInternet($internet);
- // we need an interface to get data from the database
- $data = new DataSqueeze();
- // just use one of the datasets for testing
- $testSensor = $datasets[0];
- // each dataset knows information about it's sensor
- var_dump($testSensor->getTypeName());
- $rows = $data->getRows($testSensor->parentSensorCluster, $testSensor->getTypeName());
- /* for FIR, period is fixed, delay can be modified to change how much
- * filtering is performed by the FIR, in general (but not always) users
- * will probably benefit from more filtering when "zoomed" out to give a
- * better idea of trends
- */
- $delay = $testSensor->parentSensorCluster->options->getSampleDepth();
- $period = $testSensor->parentSensorCluster->options->getSamplePeriod();
- $filtered = $data->simpleFIR($rows, $delay, $period);
- /* filtered data is should be run through the RDP algorithm to reduce
- * the number of points needed to represent a given line, one can be more
- * aggressive when heavy filtering is performed. Aggressive just means
- * a higher degree of leeway is given to the RDP algorithm when considering
- * if a line is considered near a sampled /filtered data point
- */
- $rdp = new RamerDouglasPeucker($filtered);
- $final = $rdp->getRDP(
- $testSensor->parentSensorCluster->options->getSquishFactor(
- $testSensor->getTypeName()
- )
- );
- var_dump($final);
- var_dump(count($datasets));
- }
|