123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329 |
- <?php
- require_once("rdp.php");
- require_once("environmental.php");
- require_once("fantastic.php");
- /* we ended up with two tables for storing captured sensor data
- *
- * the existing users of the `bosch_bme680` table are supposed to provide
- * start and end WHERE clause. each row has several possible interesting
- * columns which makes it a bit harder to make queries against
- *
- * the original table, `radonLog` has the one column that we are most
- * interested in. one thing we should try to improve is that users request
- * all possible data since the radon data capture project started
- */
- class SensorFigureOuterThingForGettingSQL {
- public $sql;
- private function timeRestraints($start = false, $end = false) {
- $startVal = intval($start);
- $endVal = intval($end);
- $where = "";
- if(false !== $start) $where .= "time >= {$startVal} AND ";
- if(false !== $end) $where .= "time <= {$endVal} AND ";
- return $where;
- }
- public function __construct($sensor, $type = "radon", $start = false, $end = false) {
- // the default SQL query
- $sql = "
- SELECT time, reading
- FROM radonLog
- WHERE {$this->timeRestraints($start, $end)} id='{$sensor->name}'
- ORDER BY time ASC
- ";
- // generate a modified query when getting data for BME680 sensors
- if("radon" !== $type) {
- $temp = $this->getQueryForBME680($sensor, $type, $start, $end);
- if(false !== $temp) $sql = $temp;
- }
- $this->sql = $sql;
- }
- private function getQueryForBME680($sensor, $type, $start = false, $end = false) {
- // create a list of columns that are okay to query
- $okay_columns = explode(" ", BME680_COLUMNS);
- if(false !== ($k = array_search("time", $okay_columns)))
- unset($okay_columns[$k]);
- if(false !== ($k = array_search("sensor", $okay_columns)))
- unset($okay_columns[$k]);
- // cancel this query if type is invalid
- if(false === array_search($type, $okay_columns)) {
- error_log(
- "the requested type, {$type} must be one of: " .
- implode(',', $okay_columns)
- );
- return array(/* empty array for you */);
- }
- // the sensor that you want has a different name, I will look it up
- $sensorName = strval(eMa_sensorName($sensor->name));
- // prevent injection in time interval user input
- $and = $this->timeRestraints($start, $end);
- // add optional time interval to query
- $sql = "SELECT time, {$type} AS reading FROM `bosch_bme680` WHERE {$and}";
- // lookup by sensor, make sure query sorts results oldest event to newest
- $sql .= " sensor = {$sensorName} ORDER BY time ASC";
- return $sql;
- }
- }
- /* all the input data parameters is a data structure that is an array with
- * two elements with keys `0` and `1`
- *
- * the element at data[0] is an array representing x values
- * the element at data[1] is an array representing y values
- *
- * each element are supposed to be the same length - see older versions around
- * git commit sha: 3dcaa832dd99d0073198a041f7dc1d0c9a6b03a8, basically representing
- * x and y in separate arrays in PHP 5.3 used a lot less memory versus objects
- * or an array of x,y pairs
- */
- class DataSqueeze {
- public $sql;
- /* simple FIR is a rolling average, no actual convolution
- *
- * [!] for the Air Things sensors that require manual data entry it
- * is unreasonable to have readings at regular time intervals so an FIR
- * filter is not reasonable to apply
- *
- * the other sensors report data using data compression, they only send
- * data when it is different, this means that the filter cannot be applied
- * directly to the data, missing readings need to be generated and fed
- * into the filter to keep the output stable
- */
- static public function simpleFIR($data, $delay, $period) {
- // do not run FIR filter unless there is enough delay
- if(2 > $delay) return $data;
- /* In some cases we will get a request where not enough data exists
- * to fill the delay buffer, in these cases we should do something
- *
- * check if last time stamp happens before minimum amount of time
- * required for this filter
- */
- if(false) {
- // this test is not good, regenerated data can fill out the
- // delay buffer so testing the input data length is pointless
- $ctData = count($data[0]);
- if($ctData < $delay) {
- trigger_error(
- "Not enough data ({$ctData}) to fill FIR delay buffer ({$delay}).",
- E_USER_WARNING
- );
- // do no filtering at all?
- return $data;
- }
- }
- if(/* last */ end($data[0]) < /* min */ ($data[0][0] + ($delay * $period))) {
- trigger_error(
- "Last time stamp in data occurs before last FIR calculated timestamp.",
- E_USER_WARNING
- );
- // do no filtering at all?
- return $data;
- }
- /* output will have some phase shift, (N - 1) / (2 * Fs)
- * Fs is sample frequency, N is taps (delay)
- *
- * since data uses "time" in seconds the shift should be an integer,
- * which will cause the output curves to be noticeably off.
- * regardless, the phase shift would be far greater without this
- * additional step
- */
- $shift = intval(($delay - 1) / (2 * (1 / $period)));
- // output is possibly going to be bigger than input
- $buf = array();
- $out = array(array(/* x value array */), array(/* y value array */));
- $p = 0;
- /* [!] confusing code ahead...
- *
- * the use of the `$o` variable to track the offset / indices of the
- * `$data` array is a performance optimization
- *
- * -- keep a separate pointer for the stored data, using `array_shift`
- * on an array with 100K elements is going to bring pretty big
- * consequences
- */
- $o = 0;
- $total = count($data[0]);
- $lastReading = 0;
- /* when recording every single value we run out of memory
- *
- * it is sufficient to store the filtered values as the buffer goes
- * into a stable state (until all values in buffer are equal), and
- * then to stop recording until just before the buffer starts to change
- * again. there is some importance to the `just before` as we need to
- * duplicate the value when we stopped recording as well as record the
- * value that signaled us to resume recording
- */
- $stoppedRecording = false;
- $lastStored = NULL;
- $lastTimestamp = NULL;
- // grab initial timestamp, also rewind one period to make loop easier
- $t = $data[/*time*/ 0][0] - $period;
- $lastMem = 0;
- $warningCounter = 0;
- // re-create time, decompress data, regenerate missing data points
- while($o < $total) {
- // advance time (initial time is t - 1)
- $t += $period;
- // get value of the "next" timestamp
- $n = $data[/*time*/ 0][$o];
- // is the next timestamp less than a period away?
- if($period > ($n - $t)) {
- /* just use the next timestamp, there may be some temporal
- * shift the further away it is, but it should be minimal
- *
- * in most cases it'll be just right, shift will be zero and
- * we will consume one from the queue
- */
- $t = $n;
- $lastReading = $data[/*reading*/ 1][$o];
- $o++;
- }
- // store reading into ring buffer
- $buf[$p++ % $delay] = $lastReading;
- // delay is how many samples are buffered before output
- if($p < $delay) continue;
- /* warn users when they start wanting a million things to be considered
- * for inclusion into the filter, this does not mean all will be stored
- * but that they are asking for probably too much
- *
- * a sensor might take a reading every 5 minutes, this works out to
- * 1,048,320 readings every ten years, not sure why someone would
- * ask us to process that much data....
- */
- if(1000000 === $warningCounter++) {
- error_log(implode(" ", array("File:", __FILE__, __METHOD__)) .
- ": I am not sure you are using this right...");
- }
- // manually force collection of garbage cycles every 10K iterations
- //if(0 === ($warningCounter % 10000)) gc_collect_cycles();
- // cache the value we should store, in case it takes time to compute
- $whatWeShouldStore = array_sum($buf) / $delay;
- // should we start recording?
- if((true === $stoppedRecording) and ($whatWeShouldStore !== $lastStored)) {
- $stoppedRecording = false;
- // make a new recording at the last timestamp
- $out[0][] = /*time*/ $lastTimestamp /* adjust for phase shift */ - $shift;
- $out[1][] = /*reading*/ $lastStored;
- }
- // we may use the current timestamp in the future
- $lastTimestamp = $t;
- // should we stop recording?
- if($whatWeShouldStore === $lastStored) {
- $stoppedRecording = true;
- continue;
- } // else
- // store all readings from filter, after delay is surpassed
- $out[0][] = /*time*/ $lastTimestamp /* adjust for phase shift */ - $shift;
- $out[1][] = /*reading*/ $whatWeShouldStore;
- $lastStored = $whatWeShouldStore;
- }
- /* when the filter becomes stable (all values in delay buffer are equal
- * we end up truncating the data points on accident, to make sure that
- * doesn't happen check if it happened now that the we are 'done'
- */
- if($stoppedRecording) {
- $out[0][] = /*time*/ $lastTimestamp /* adjust for phase shift */ - $shift;
- $out[1][] = /*reading*/ $lastStored;
- }
- if(defined("USE_SMALLER_FLOATS")) {
- /* convert all floating point to integer, keep two digits of
- * base-ten deciaml
- */
- for($i = count($out[1]); $i--;) $out[1][$i] = $out[1][$i] / 100.0;
- }
- if(defined("USE_INTEGER_MATHS")) {
- /* convert all floating point to integer, keep two digits of
- * base-ten deciaml
- */
- for($i = count($out[1]); $i--;) $out[1][$i] = intval($out[1][$i] * 100);
- }
- return $out;
- }
- /* I think this is just an example, it is difficult to use due to it
- * doing too much stuff, for example it runs a filter for us and formats
- * the output data probably for some graphical rendering software
- */
- public function getRDP($sensor, $type = "radon", $start = false, $end = false) {
- $rows = $this->getRows($sensor, $type, $start, $end);
- var_dump("rows_initial", count($rows));
- $delay = $sensor->options->getSampleDepth();
- $period = $sensor->options->getSamplePeriod();
- // convert rows to array of [x,y]
- $curve = array();
- foreach($rows as $r) $curve[] = array($r["time"], $r["reading"]);
- // run data through the simplest of FIR filters
- $rows = $this->simpleFIR($curve, $delay, $period);
- // maybe convert floats to integer?
- for($i = count($rows); $i--;) $rows[$i][1] = intval($rows[$i][1]);
- var_dump("rows_filtered", count($rows));
- $rdp = new RamerDouglasPeucker($rows);
- $epsilon = $sensor->options->getSquishFactor($type);
- if(defined("USE_INTEGER_MATHS")) {
- $epsilon = intval($epsilon * 100);
- }
- if(defined("USE_SMALLER_FLOATS")) {
- $epsilon /= 100.0;
- }
- // provide epsilon for Ramer Douglas Peucker
- $rows = $rdp->getRDP($epsilon);
- var_dump("rows_rdp", count($rows));
- // re-key data for end user
- foreach($rows as $k => $v) $rows[$k] = array("time" => $v[0], "reading" => $v[1]);
- return $rows;
- }
- // this is the routine that will be called externally, probably
- public function getRadon() { /* this is the original method */ }
- // getting rows from radon sensors pulls from the `radonLog` table
- public function getRows($sensor, $type = "radon", $start = false, $end = false) {
- $query = new SensorFigureOuterThingForGettingSQL($sensor, $type, $start, $end);
- // store SQL query for future inspection
- $this->sql = $query->sql;
- # connect to mysql database radondb
- $db = mysql_connect("localhost", "root", "secom8703");
- mysql_select_db("radondb", $db);
- if(false === ($result = mysql_query($query->sql, $db))) { return false; }
- $rows = array();
- while($row = mysql_fetch_array($result, MYSQL_ASSOC)) {
- /* [!] for whatever reason `time` and 'reading' are stored
- * (correctly) as INTEGER but returned as a TEXT by this routine,
- * this is terrible
- *
- * [!!] rows was using more than 50 MegaBytes for a year's worth
- * of readings from a RadonEye that sampled every five minutes
- * at least with PHP 5.3, storing the data as an array of x
- * and an array of y is more efficient than an array of [x,y]
- */
- $rows[/* x values - or time, whatever */ 0][] = intval($row["time"]);
- $rows[/* y values - or reading... */ 1][] = intval($row["reading"]);
- }
- mysql_free_result($result);
- # mysql needs clean-up
- mysql_close($db);
- return $rows;
- }
- public function getStdDeviation($sensor, $type = "radon", $start = false, $end = false) {
- $rows = $this->getRows($sensor, $type, $start, $end);
- //for($i = count($rows); $i--;) $rows[$i] = $rows[$i]["reading"];
- $rows = $rows[1];
- $n = count($rows);
- if(0 === $n) return false;
- $mean = array_sum($rows) / $n;
- $carry = 0.0;
- foreach($rows as $v) {
- $d = ((double) $v) - $mean;
- $carry += pow($d, 2);
- }
- return sqrt($carry / $n);
- }
- }
|