= {$startVal} AND "; if(false !== $end) $where .= "time <= {$endVal} AND "; return $where; } public function __construct($sensor, $type = "radon", $start = false, $end = false) { // the default SQL query $sql = " SELECT time, reading FROM radonLog WHERE {$this->timeRestraints($start, $end)} id='{$sensor->name}' ORDER BY time ASC "; // generate a modified query when getting data for BME680 sensors if("radon" !== $type) { $temp = $this->getQueryForBME680($sensor, $type, $start, $end); if(false !== $temp) $sql = $temp; } $this->sql = $sql; } private function getQueryForBME680($sensor, $type, $start = false, $end = false) { // create a list of columns that are okay to query $okay_columns = explode(" ", BME680_COLUMNS); if(false !== ($k = array_search("time", $okay_columns))) unset($okay_columns[$k]); if(false !== ($k = array_search("sensor", $okay_columns))) unset($okay_columns[$k]); // cancel this query if type is invalid if(false === array_search($type, $okay_columns)) { error_log( "the requested type, {$type} must be one of: " . implode(',', $okay_columns) ); return array(/* empty array for you */); } // the sensor that you want has a different name, I will look it up $sensorName = strval(eMa_sensorName($sensor->name)); // prevent injection in time interval user input $and = $this->timeRestraints($start, $end); // add optional time interval to query $sql = "SELECT time, {$type} AS reading FROM `bosch_bme680` WHERE {$and}"; // lookup by sensor, make sure query sorts results oldest event to newest $sql .= " sensor = {$sensorName} ORDER BY time ASC"; return $sql; } } /* all the input data parameters is a data structure that is an array with * two elements with keys `0` and `1` * * the element at data[0] is an array representing x values * the element at data[1] is an array representing y values * * each element are supposed to be the same length - see older versions around * git commit sha: 3dcaa832dd99d0073198a041f7dc1d0c9a6b03a8, basically representing * x and y in separate arrays in PHP 5.3 used a lot less memory versus objects * or an array of x,y pairs */ class DataSqueeze { public $sql; /* simple FIR is a rolling average, no actual convolution * * [!] for the Air Things sensors that require manual data entry it * is unreasonable to have readings at regular time intervals so an FIR * filter is not reasonable to apply * * the other sensors report data using data compression, they only send * data when it is different, this means that the filter cannot be applied * directly to the data, missing readings need to be generated and fed * into the filter to keep the output stable */ static public function simpleFIR($data, $delay, $period) { // do not run FIR filter unless there is enough delay if(2 > $delay) return $data; /* In some cases we will get a request where not enough data exists * to fill the delay buffer, in these cases we should do something * * check if last time stamp happens before minimum amount of time * required for this filter */ if(false) { // this test is not good, regenerated data can fill out the // delay buffer so testing the input data length is pointless $ctData = count($data[0]); if($ctData < $delay) { trigger_error( "Not enough data ({$ctData}) to fill FIR delay buffer ({$delay}).", E_USER_WARNING ); // do no filtering at all? return $data; } } if(/* last */ end($data[0]) < /* min */ ($data[0][0] + ($delay * $period))) { trigger_error( "Last time stamp in data occurs before last FIR calculated timestamp.", E_USER_WARNING ); // do no filtering at all? return $data; } /* output will have some phase shift, (N - 1) / (2 * Fs) * Fs is sample frequency, N is taps (delay) * * since data uses "time" in seconds the shift should be an integer, * which will cause the output curves to be noticeably off. * regardless, the phase shift would be far greater without this * additional step */ $shift = intval(($delay - 1) / (2 * (1 / $period))); // output is possibly going to be bigger than input $buf = array(); $out = array(array(/* x value array */), array(/* y value array */)); $p = 0; /* [!] confusing code ahead... * * the use of the `$o` variable to track the offset / indices of the * `$data` array is a performance optimization * * -- keep a separate pointer for the stored data, using `array_shift` * on an array with 100K elements is going to bring pretty big * consequences */ $o = 0; $total = count($data[0]); $lastReading = 0; /* when recording every single value we run out of memory * * it is sufficient to store the filtered values as the buffer goes * into a stable state (until all values in buffer are equal), and * then to stop recording until just before the buffer starts to change * again. there is some importance to the `just before` as we need to * duplicate the value when we stopped recording as well as record the * value that signaled us to resume recording */ $stoppedRecording = false; $lastStored = NULL; $lastTimestamp = NULL; // grab initial timestamp, also rewind one period to make loop easier $t = $data[/*time*/ 0][0] - $period; $lastMem = 0; $warningCounter = 0; // re-create time, decompress data, regenerate missing data points while($o < $total) { // advance time (initial time is t - 1) $t += $period; // get value of the "next" timestamp $n = $data[/*time*/ 0][$o]; // is the next timestamp less than a period away? if($period > ($n - $t)) { /* just use the next timestamp, there may be some temporal * shift the further away it is, but it should be minimal * * in most cases it'll be just right, shift will be zero and * we will consume one from the queue */ $t = $n; $lastReading = $data[/*reading*/ 1][$o]; $o++; } // store reading into ring buffer $buf[$p++ % $delay] = $lastReading; // delay is how many samples are buffered before output if($p < $delay) continue; /* warn users when they start wanting a million things to be considered * for inclusion into the filter, this does not mean all will be stored * but that they are asking for probably too much * * a sensor might take a reading every 5 minutes, this works out to * 1,048,320 readings every ten years, not sure why someone would * ask us to process that much data.... */ if(1000000 === $warningCounter++) { error_log(implode(" ", array("File:", __FILE__, __METHOD__)) . ": I am not sure you are using this right..."); } // manually force collection of garbage cycles every 10K iterations //if(0 === ($warningCounter % 10000)) gc_collect_cycles(); // cache the value we should store, in case it takes time to compute $whatWeShouldStore = array_sum($buf) / $delay; // should we start recording? if((true === $stoppedRecording) and ($whatWeShouldStore !== $lastStored)) { $stoppedRecording = false; // make a new recording at the last timestamp $out[0][] = /*time*/ $lastTimestamp /* adjust for phase shift */ - $shift; $out[1][] = /*reading*/ $lastStored; } // we may use the current timestamp in the future $lastTimestamp = $t; // should we stop recording? if($whatWeShouldStore === $lastStored) { $stoppedRecording = true; continue; } // else // store all readings from filter, after delay is surpassed $out[0][] = /*time*/ $lastTimestamp /* adjust for phase shift */ - $shift; $out[1][] = /*reading*/ $whatWeShouldStore; $lastStored = $whatWeShouldStore; } /* when the filter becomes stable (all values in delay buffer are equal * we end up truncating the data points on accident, to make sure that * doesn't happen check if it happened now that the we are 'done' */ if($stoppedRecording) { $out[0][] = /*time*/ $lastTimestamp /* adjust for phase shift */ - $shift; $out[1][] = /*reading*/ $lastStored; } if(defined("USE_SMALLER_FLOATS")) { /* convert all floating point to integer, keep two digits of * base-ten deciaml */ for($i = count($out[1]); $i--;) $out[1][$i] = $out[1][$i] / 100.0; } if(defined("USE_INTEGER_MATHS")) { /* convert all floating point to integer, keep two digits of * base-ten deciaml */ for($i = count($out[1]); $i--;) $out[1][$i] = intval($out[1][$i] * 100); } return $out; } /* I think this is just an example, it is difficult to use due to it * doing too much stuff, for example it runs a filter for us and formats * the output data probably for some graphical rendering software */ public function getRDP($sensor, $type = "radon", $start = false, $end = false) { $rows = $this->getRows($sensor, $type, $start, $end); var_dump("rows_initial", count($rows)); $delay = $sensor->options->getSampleDepth(); $period = $sensor->options->getSamplePeriod(); // convert rows to array of [x,y] $curve = array(); foreach($rows as $r) $curve[] = array($r["time"], $r["reading"]); // run data through the simplest of FIR filters $rows = $this->simpleFIR($curve, $delay, $period); // maybe convert floats to integer? for($i = count($rows); $i--;) $rows[$i][1] = intval($rows[$i][1]); var_dump("rows_filtered", count($rows)); $rdp = new RamerDouglasPeucker($rows); $epsilon = $sensor->options->getSquishFactor($type); if(defined("USE_INTEGER_MATHS")) { $epsilon = intval($epsilon * 100); } if(defined("USE_SMALLER_FLOATS")) { $epsilon /= 100.0; } // provide epsilon for Ramer Douglas Peucker $rows = $rdp->getRDP($epsilon); var_dump("rows_rdp", count($rows)); // re-key data for end user foreach($rows as $k => $v) $rows[$k] = array("time" => $v[0], "reading" => $v[1]); return $rows; } // this is the routine that will be called externally, probably public function getRadon() { /* this is the original method */ } // getting rows from radon sensors pulls from the `radonLog` table public function getRows($sensor, $type = "radon", $start = false, $end = false) { $query = new SensorFigureOuterThingForGettingSQL($sensor, $type, $start, $end); // store SQL query for future inspection $this->sql = $query->sql; # connect to mysql database radondb $db = mysql_connect("localhost", "root", "secom8703"); mysql_select_db("radondb", $db); if(false === ($result = mysql_query($query->sql, $db))) { return false; } $rows = array(); while($row = mysql_fetch_array($result, MYSQL_ASSOC)) { /* [!] for whatever reason `time` and 'reading' are stored * (correctly) as INTEGER but returned as a TEXT by this routine, * this is terrible * * [!!] rows was using more than 50 MegaBytes for a year's worth * of readings from a RadonEye that sampled every five minutes * at least with PHP 5.3, storing the data as an array of x * and an array of y is more efficient than an array of [x,y] */ $rows[/* x values - or time, whatever */ 0][] = intval($row["time"]); $rows[/* y values - or reading... */ 1][] = intval($row["reading"]); } mysql_free_result($result); # mysql needs clean-up mysql_close($db); return $rows; } public function getStdDeviation($sensor, $type = "radon", $start = false, $end = false) { $rows = $this->getRows($sensor, $type, $start, $end); //for($i = count($rows); $i--;) $rows[$i] = $rows[$i]["reading"]; $rows = $rows[1]; $n = count($rows); if(0 === $n) return false; $mean = array_sum($rows) / $n; $carry = 0.0; foreach($rows as $v) { $d = ((double) $v) - $mean; $carry += pow($d, 2); } return sqrt($carry / $n); } }