1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522 |
- <?php
- // stuff relating to HighCharts
- function hcSeriesOpts($opts) {
- $arr = /* yes, include junk? */ true ?$opts :array();
- // defaults...
- $type = "line";
- $lineWidth = "1";
- $color = ""; // empty allows highchats to pick defaults
- $yAxis = "1";
- $zIndex = "0";
- $showInLegend = "true";
- $enableMouseTracking = "false";
- $dashStyle = ""; /* longdash dot ... */
- // list of relevant highchart::series properties
- $hcp = "type lineWidth color yAxis zIndex showInLegend enableMouseTracking";
- // either use default values or make use of user provided options
- foreach(explode(' ', $hcp) as $v) {
- $arr[$v] = isset($opts[$v]) ?$opts[$v] :$$v;
- }
- return $arr;
- }
- /* converts PHP native array into HighCharts compatible series, by default
- * returns as the printed Series subset or property of the HighCharts Series
- * Option Structure. It looks like a plain text, JavaScript Language syntax
- * formatted string that is used when creating the `option` variable that is
- * passed to the `HighCharts.chart('container', options);` in
- * `/var/www/radon/graph.php`.
- *
- * This matches the original `graph.php` code, but the number of lines of code
- * that separate the `id` of each created `HighCharts::Series` is more than ten
- * and sometimes spans abstractions. Normally one would use
- * `HighCharts::Chart::get(id)` to manipulate the Series at runtime.
- *
- * An alternative approach would be to store `HighCharts::Series` objects and
- * manipulate them directly, this might allow for less mistakes. To test this
- * set `HcOptsStruct` to `false` and make use of some inline JavaScript
- *
- */
- function makeSeries($name, $dataset, $opts = array(), $HcOptsStruct = true) {
- $tstart = microtime(true);
- // fill in any missing highcharts series options
- $opts = hcSeriesOpts($opts);
- $t1 = microtime(true);
- $temp = "";
- // convert [[time=>{..}, reading=>{..}], ...] to JavaScript compatible string
- foreach($dataset as $koi=>$vul) {
- $temp .= "[{$vul['time']}000,".($vul['reading'])."],";
- }
- $t2 = microtime(true);
- // process highcharts series options
- $available = explode(' ', "type color yAxis zIndex");
- $seriesOpts = implode("\n", array(
- "{type:'{$opts["type"]}',",
- "lineWidth:{$opts["lineWidth"]},",
- "color:'{$opts["color"]}',",
- "id:'{$name}',",
- "name:'{$name}',",
- "data: [{$temp}],",
- "enableMouseTracking: {$opts["enableMouseTracking"]},",
- "showInLegend: {$opts["showInLegend"]},",
- "yAxis:{$opts["yAxis"]},",
- "zIndex:{$opts["zIndex"]},",
- "},"
- ));
- // print the HighChart compatible series dataset
- if(true === $HcOptsStruct) { echo $seriesOpts; }
- $tend = microtime(true);
- $i1 = round(1000*($t1-$tstart));
- $i2 = round(1000*($t2-$t1));
- $i3 = round(1000*($tend-$t2));
- $tot = round(1000*($tend-$tstart));
- error_log("makeseries - i1:".$i1.", i2:".$i2.", i3:".$i3." (t:".$tot."ms)");
- // allow usage for inline JavaScript
- return $seriesOpts;
- }
- require_once("environmental.php");
- require_once("rdp.php");
- class Sensor {
- /* should be A,B,C,D, ..., Z
- * there is some code, that caches readings for the web front-end and uses
- * a SQLite3 database, `radonCache.db` where a column puts some restrictions
- * on what the sensor name should be. Initially the sensor names were just
- * single characters, but this class allowed more than that. But now, with
- * `radonCache.db` name should be able to be converted into an id with a
- * value of [0..16777215]. Using [A..Z] is fine, it kind of limits us to
- * 24 different sensor clusters, but should be easy enough to add more with
- * a look-up-table or something
- */
- public $name;
- /* this is a serial number that identifies the device, some devices have
- * an electronically identifiable serial, eg: Bluetooth media access control
- */
- public $serial;
- public $sensors;
- public $options;
- public function __construct($name = '', $serial = '', $sensors = 0, $options = NULL) {
- $this->name = $name;
- $this->serial = $serial;
- $this->sensors = $sensors;
- // sensor option, `squishFactor`, is most commonly used option
- if(is_numeric($options)) $this->options = new SensorOptions((float) $options);
- else if("SensorOptions" === get_class($options)) $this->options = clone $options;
- else $this->options = NULL === $options ?new SensorOptions() :$options;
- }
- public function getRadon($qo) {
- if(NULL !== /* query options */ $qo) {
- $rows = CACHE_DB::getCachedData($this->name, "radon",
- $qo->zoom, $qo->fir, $qo->start, $qo->end
- );
- //return $rows;
- /* add the most recent non-cached radon reading [mySQL]
- *
- * [!] getting this reading is unbelievably slow, retrieving
- * ten thousand rows from the cache database takes the same
- * time to retrieve a single row from the mySQL radonLog, but
- * we pay the price since the cache database is infrequently
- * updated, it is a pity, and a bit disappointing
- */
- $lastReading = $this->radonLogQuery(/* get last reading */ true);
- if($lastReading) {
- if(defined("ARRAY_OF_TIME_READING_PAIRS")) {
- $rows[] = array_pop($lastReading);
- } else {
- $rows[0][] = $lastReading[0]["time"];
- $rows[1][] = $lastReading[0]["reading"];
- }
- }
- return $rows;
- } // else
- return $this->radonLogQuery();
- }
- private function radonLogQuery($lastReading = false) {
- // connect to mysql database radondb
- $db = mysql_connect("localhost", "root", "secom8703");
- mysql_select_db("radondb", $db);
- $sql = "SELECT time, reading FROM radonLog WHERE id='{$this->name}' ORDER BY time ASC";
- if($lastReading) {
- $sql = "SELECT time, reading FROM radonLog WHERE id='{$this->name}' ORDER BY time DESC LIMIT 1";
- }
- if(false === ($result = mysql_query($sql, $db))) { return false; }
- $rows = array();
- while($row = mysql_fetch_array($result, MYSQL_ASSOC)) { $rows[] = $row; }
- mysql_free_result($result);
- // mysql needs clean-up
- mysql_close($db);
- return $rows;
- }
- public function getHumidity($qo){return get_BME860_humidityData($this->name,$qo);}
- public function getFahrenheit($qo){return get_BME860_fahrenheitData($this->name,$qo);}
- public function getIaq($qo){return get_BME860_iaqData($this->name,$qo);}
- public function getUkrumayl($qo){return get_BME860_ukrumaylData($this->name,$qo);}
- public function getWesDust($qo){return get_BME860_wesDust($this->name,$qo);}
- /* requests for the monkey data, that is when a monkey records the temperature
- *
- * these sensors are all manually recorded, I do not see a reason at this
- * point to add query option related features to these datasets
- * - the data for these sensors are not stored in the cache database
- * - we are not reliable enough to input data at regular intervals
- * - because we record because we are reacting to weather, trend lines
- * will be extraordinarily misleading
- * - start end ranges are pointless because, again manual saves means
- * insignificant amount of recordings
- */
- public function getAuxiliary($qo, $kind) {
- switch($kind) {
- case "snow": return getSnowData();
- case "rain": return getRainData();
- case "temperature": return getTemperatureData();
- case "humidity": return getHumidityData();
- default: return array();
- }
- }
- }
- /* all queries should use query options
- *
- * - what date range are you looking for?
- * I want every single temperature sample from millions of recorded samples
- * starting from the beginning of time so that I can look at the last one
- * to see what the temperature is right now - probably a bad idea?
- * - how zoomed in is the user?
- * Is anyone interested in counting ant antenna viewing a wall sized image
- * of the entire earth? Most users ask for this, but we know that it
- * would be ridiculous to service this request; for viewing small details
- * you also want a proportionally small viewing window
- * - what is the interest of the user?
- * Maybe they want general trend lines
- * Maybe they just need to know the exact temperature an hour ago
- * Maybe they want atmospheric noise / currents for random number generation
- *
- * This is just here to standardize requests, we can add validation, and
- * whatever later
- */
- class QueryOptions {
- public $start;
- public $end;
- public $zoom;
- public $fir;
- // [!] defaults means that we do not want to use the cache
- public function __construct($start=false, $end=false, $zoom=NO_USE_RADON_CACHE, $fir=false) {
- $this->start = $start;
- $this->end = $end;
- $this->zoom = $zoom;
- $this->fir = $fir;
- }
- /* [?] setting zoom to false will cause `environmental.php`
- * `getColumnHelper` to retrieve data from the MySQL database.
- *
- * okay? okay.
- */
- public function useCache() { return NO_USE_RADON_CACHE !== $this->zoom; }
- }
- /* sensor type masks (_FANTASTIC_SENSOR_TYPE)
- * when adding a sensor type:
- * add a mask here
- * the interface to store and access the data is probably done in
- * `environmental.php`
- * look at the Dataset::typeIdLUT because you'll probably want to add something
- * there for improving performance of looking at samples.
- * SensorOptions::squishFactor should probably be extended to add a default
- * value for how much samples deviate from each other (sample noise).
- */
- define("RADON", 1);
- define("CELSIUS", 2);
- define("FAHRENHEIT", 4);
- define("UKRUMAYL", 8);
- define("IAQ", 16);
- define("WESDUST", 32);
- define("HUMIDITY", 64);
- /* see sensor type masks (_FANTASTIC_SENSOR_TYPE)
- * the interface in `db-frontend.php` does not understand sensor type
- * masks and this Class compresses them anyways (Dataset::getBit), so
- * we are just going to map the two things together here.
- *
- * when I was researching I found `environmental.php` that uses these
- * constants but the interface is pretty thin (just a set of identical
- * routines)
- *
- * the key is the bit mask offset, the value is the `radonLog` MYSQL
- * database column header
- *
- * [!] in PHP 5 define only allows scalar (int, float, string, bool, or null)
- * had to make a class just to do the same thing...
- * //define("TYPE_ID_TO_COL_HEADER_LUT", array(
- */
- Class TYPE_ID_TO_COL_HEADER {
- static public function LUT($type) {
- $lookUpTable = array(
- Dataset::getBit(RADON) => "radon",
- Dataset::getBit(CELSIUS) => "celsius",
- //Dataset::getBit(FAHRENHEIT) => "does not exist in database, see Celsius",
- Dataset::getBit(UKRUMAYL) => "pascal",
- Dataset::getBit(IAQ) => "indoor_air_quality",
- Dataset::getBit(WESDUST) => "gas_resistance",
- Dataset::getBit(HUMIDITY) => "humidity"
- );
- return $lookUpTable[$type];
- }
- }
- /* [!] not id
- *
- * external users do not use the id, they use masks, id is what is used by
- * Dataset internally and in the cached database
- *
- */
- Class COL_HEADER_TO_TYPE_MASK {
- static public function LUT($col) {
- $lookUpTable = array(
- "radon" => RADON,
- "celsius" => CELSIUS,
- "pascal" => UKRUMAYL,
- "indoor_air_quality" => IAQ,
- "gas_resistance" => WESDUST,
- "humidity" => HUMIDITY,
- );
- return $lookUpTable[$col];
- }
- }
- // sensor clusters
- define("BOSCH_BME680", CELSIUS | UKRUMAYL | IAQ | WESDUST | HUMIDITY);
- define("RADON_EYE", RADON);
- define("AIR_THINGS", RADON);
- // BME680 sensors H, I, J
- Class DEFAULT_BOSCH_BME680 {
- static public function SQUISH_FACTOR() {
- /* 2022 October - these initial values are a good starting place
- * and were calculated after a year of recording, getting the standard
- * deviation, and finally dividing by 100
- *
- * while these values probably offer very little in terms of reducing
- * the number of points needed represent a line, they are going to be
- * close, in a logarithmic sense, to where change occurs more frequently
- */
- return array(
- "celsius" => 6.0 / 100,
- "humidity" => 5.9 / 100,
- "pascal" => 745.0 / 100,
- "gas_resistance" => 250000.0 / 100,
- "indoor_air_quality" => 63.0 / 100,
- "iaq_accuracy" => 0.92 / 100
- );
- }
- }
- Class Dataset {
- public $parentSensorCluster;
- private $sensorId;
- public $mask;
- private $typeId;
- public $datasetId;
- private $zoom;
- private $fir;
- public $stats;
- public function __construct($sensor) {
- $this->parentSensorCluster = $sensor;
- // make sure name is converted to an integer number if it is a letter
- $this->sensorId = Dataset::toSensorId($sensor->name);
- $this->stats = new DatasetStatistics();
- }
- /* because our parent is a cluster of sensors, we use late binding to
- * split the cluster into individual sensors. This is somewhat
- * obscure so users might accidentally create a Dataset without proper
- * initialization (for each sensor in the sensor mask bit field, each
- * zoom, and each fir, create a Dataset, see:
- * Dataset::makeAllDatasetsFromInternet(...) for an example
- *
- * applies to:
- * Dataset::getTypeName()
- * Dataset::getZoom()
- * Dataset::getFIR()
- */
- private function errNotReady($property) {
- throw new Exception(
- "Dataset {$property} is not known until after " .
- __CLASS__ .
- "->setDatasetId is called."
- );
- }
- // returns sensor id
- public function getInternalSensorId() { return $this->sensorId; }
- // returns database header type as text, (Celsius, radon, etc)
- public function getTypeName() {
- if(!isset($this->typeId)) $this->errNotReady("type");
- return TYPE_ID_TO_COL_HEADER::LUT($this->typeId);
- }
- public function getZoom() {
- if(!isset($this->zoom)) $this->errNotReady("zoom");
- return $this->zoom;
- }
- public function getFIR() {
- if(!isset($this->fir)) $this->errNotReady("fir");
- return $this->fir;
- }
- static public function toSensorId($name) {
- if(is_int($name)) return $name;
- if(ctype_alpha($name)) return ord($name);
- throw new Exception(
- "radonCache.db wants sensor names to be convertible into integer type"
- );
- }
- public function setDatasetId($sensorMask, $zoom, $fir) {
- $this->zoom = $zoom;
- $this->fir = $fir;
- $this->mask = $sensorMask;
- $this->typeId = Dataset::getBit($sensorMask);
- $this->datasetId = Dataset::generateDatasetId(
- $this->sensorId,
- $this->typeId,
- $zoom,
- $fir
- );
- return $this->datasetId;
- }
- /* converts sensor type (masks) to a more compressed format, eg: `HUMIDITY` is
- * 6. Sensor type masks are kinda clunky but make it easy to specify a sensor
- * type as well as a cluster of sensors as a single named thing, like
- * `BOSCH_BME680`. The only problem is that we may want to add more sensor
- * types later and the `sensorLog` in `radonCache.db` has limited space to
- * store unique sensor identifying information. There are 8 bits to store 255
- * unique sensor types and 24 bits to store 16,777,215 unique sensor clusters
- *
- * using sensor type masks directly would only allow 8 unique sensor types
- *
- * see: _FANTASTIC_SENSOR_TYPE
- */
- static public function getBit($sensorTypeMask, $o = 0) {
- while(++$o) if(!intval($sensorTypeMask /= 2)) return $o - 1;
- }
- /* each cluster may have multiple sensors, but are stored in a bit-field
- * we need a way to split the bit field into separate sensors without knowing
- * what the sensors are, this is fine, we do not care if it is a RADON or
- * HUMIDITY sensor, each have their own bit indice, we just need to know if
- * one or both, or some other sensor
- */
- static public function getSensorMasks($sensorBitField) {
- $arr = array();
- $idx = /* sensor type masks start at one */ 1;
- while(true) {
- if(1 /* bit-wise AND */ & $sensorBitField) $arr[] = $idx;
- // shift bits to the right
- $sensorBitField >>= 1;
- // shift indice bit to the left
- $idx <<= 1;
- if(0 === $sensorBitField) break;
- }
- return $arr;
- }
- /* just in case SQLITE3 INTEGERS or whatever language we are using is
- * signed, we can only use 31 of 32 bits, here is one possibility:
- *
- * 14 bits = 16,384 sensors
- * 7 bits = 128 types of sensors
- * 5 bits = 32 different zooms
- * 5 bits = 32 different FIR
- */
- static public function generateDatasetId($sensorName, $sensorType, $zoom, $fir) {
- // restrict variables to prevent overlap in bitfield
- if((0 > $sensorName) || (16383 < $sensorName))
- throw new Exception("sensor name must be less than 16384");
- if((0 > $sensorType) || (127 < $sensorType))
- throw new Exception("sensor type mask must be less than 128");
- if((0 > $zoom) || (31 < $zoom))
- throw new Exception("zoom must be less than 32");
- if((0 > $fir) || (31 < $fir))
- throw new Exception("fir must be less than 32, was [{$fir}]");
- return
- (131072 * $sensorName) |
- (1024 * $sensorType) |
- (32 * $zoom) |
- (1 * $fir);
- }
- /* flatten out all combinations of sensors, zoom, and fir
- * also break individual sensors out of their clusters
- *
- * [?] with three BOSCH_BME680, three AIR_THINGS, and two RADON_EYE
- * sensors, three zoom and five fir we should have 300 combinations
- * of different data sets
- *
- * [?] this is just a helper function to abstract the messy stuff regarding
- * making all the different datasets for each sensor, uses TheInternet
- * to initialize all the different identifying properties for each sensor
- * we get an array of all the different properties of each dataset
- */
- static public function makeAllDatasetsFromInternet($internet) {
- $sensors = $internet->cloneAllSensors();
- $datasets = array();
- foreach($sensors as $cluster) {
- $nameRule = false || is_int($cluster->name) ||
- (ctype_alpha($cluster->name) && 1 === strlen($cluster->name));
- // ignore names that we cannot handle
- if(false === $nameRule) continue;
- /* separate clusters of sensors, a cluster contains a bit-field that
- * marks which sensors are onboard, for example a BOSCH_BME860 has
- * air quality sensor, temperature sensor, etc
- */
- foreach(Dataset::getSensorMasks($cluster->sensors) as $sensorMask) {
- if(false) var_dump(
- "sensor: {$cluster->name}",
- Dataset::toSensorId($cluster->name),
- "mask: {$sensorMask}"
- );
- // and another for each fir
- foreach($internet->availableFIR as $fir) {
- // a dataset is created for each zoom
- foreach($internet->availableZoom as $zoom) {
- $dataset = new Dataset($cluster);
- $dataset->setDatasetId($sensorMask, $zoom, $fir);
- if(false) var_dump(
- "dataset zoom[{$zoom}] fir[{$fir}]: ",
- $dataset->setDatasetId($zoom, $fir)
- );
- // store each dataset in a flattened array
- $datasets[] = $dataset;
- }
- }
- }
- }
- /* with the messy stuff handled, we can work on all datasets without worrying
- * about the specifics of each one
- */
- if(false) var_dump(count($datasets));
- return $datasets;
- }
- // helper routine to get filtered and simplified data for a given dataset
- static public function retrieveReducedData($dataset, $start = false, $end = false, &$cache = NULL) {
- // get all input data (or partial if a start and end range are given)
- if((NULL === $cache) or (NULL === $cache->rows)) {
- // we need an interface to get data from the database
- $data = new DataSqueeze();
- $cache = new CachedDatasetData($data->getRows(
- $dataset->parentSensorCluster,
- $dataset->getTypeName(),
- $start,
- $end
- ));
- $cache->sql = $data->sql;
- } // otherwise use cached rows
- if(defined("PRINT_DATA_SOURCE_SQL")) var_dump("sql: {$cache->sql}");
- // run the FIR filter on our data, try to use cached work
- if(NULL === $cache->filtered) {
- // figure out the default FIR delay
- $delay = $dataset->parentSensorCluster->options->getSampleDepth();
- // update the default FIR with this dataset's multiplier
- $delay *= FIR_DELAY_MULTIPLIER::LUT($dataset->fir);
- // get the sample period for this sensor (to regenerate data)
- $period = $dataset->parentSensorCluster->options->getSamplePeriod();
- $cache->filtered = DataSqueeze::simpleFIR($cache->rows, $delay, $period);
- /* this is not really needed, but probably will not trigger under normal
- *
- * if it does, might be helpful to a debugger
- */
- if(0 === count($cache->filtered[0])) {
- var_dump("[!] there were {$cache->rowCount} but no rows after filtering");
- for($i = count($cache->someRows); $i--;) {
- if(!isset($cache->someRows[0][$i])) break;
- echo(
- "time {$cache->someRows[0][$i]} " .
- "reading {$cache->someRows[1][$i]}" . PHP_EOL
- );
- }
- var_dump("[!] there is a problem, probably simple FIR to blame");
- var_dump("delay was [{$delay}] period was [{$period}]");
- return array();
- }
- }
- // initialize algorithm to reduce number of points needed to represent dataset
- $rdp = new RamerDouglasPeucker($cache->filtered);
- // get predefined epsilon for this type of sensor
- $epsilon = $dataset->parentSensorCluster->options->getSquishFactor(
- $dataset->getTypeName()
- );
- if(defined("USE_SMALLER_FLOATS")) {
- $epsilon /= 100.0;
- }
- if(defined("USE_INTEGER_MATHS")) {
- $epsilon = intval($epsilon * 100);
- }
- // recompute epsilon baseline using FIR to bias adjustment
- $epsilon *= FIR_DELAY_MULTIPLIER::SQUISH_FACTOR_ADJUSTMENT($dataset->fir);
- $res = $rdp->getRDP($epsilon);
- // update statistics
- $dataset->stats->epsilon = $epsilon;
- $dataset->stats->rowCount = $cache->rowCount;
- $dataset->stats->filteredCount = count($cache->filtered[0]);
- $dataset->stats->rdpCount = count($res);
- $dataset->stats->rdpPerpendicular = $rdp->called;
- return $res;
- }
- }
- /* minimal structure that can be saved between calls to Dataset::retrieveReducedData
- * in order to save CPU time
- */
- class CachedDatasetData {
- public $filtered = NULL;
- public $rows = NULL;
- public $sql;
- // since rows are cleared, this must be saved so it can later be reported on
- public $rowCount;
- // some of the rows are saved, to help facilitate debugging
- public $someRows;
- public function __construct($rows = NULL) {
- if(NULL !== $rows) {
- $this->someRows = array(array(), array());
- // copy 10 rows
- for($i = 10; $i--;) {
- if(!isset($rows[0][$i])) break;
- $this->someRows[0][$i] = $rows[0][$i];
- $this->someRows[1][$i] = $rows[1][$i];
- }
- $this->rows = $rows;
- $this->rowCount = count($rows[0]);
- }
- }
- }
- class DatasetStatistics {
- public $rowCount;
- public $filteredCount;
- public $rdpCount;
- public $epsilon;
- public $rdpPerpendicular;
- }
- /* sensor options
- *
- * we started running into issues with the sensors collecting too much data,
- * or at least too much for our hardware's bandwidth
- *
- * each sensor needs to be looked at more carefully, and the data that it
- * produces needs to be interpreted by a computer before a person looks at it
- *
- * to keep the sensors together in the same place (this document), the list
- * of sensors needs to accommodate additional properties being added to sensors
- */
- class SensorOptions {
- /* the epsilon that will be used for this sensor when data is processed
- * by the Ramer-Douglas-Peucker algorithm
- * each sensor will have it's own epsilon depending on it's minimum,
- * maximum, and overall signal noise
- */
- private $squishFactor;
- /* read the documentation for your sensor carefully, or maybe even contact
- * the manufacturer of the sensor to find out how many samples need to be
- * collected before one can get the advertised precision
- */
- private $sampleDepth;
- /* used to regenerate or decompress data stored on disk, the sample period
- * is the time in between samples, this is generally set by the individual
- * sensors, a scheme was created to save disk space by only saving samples
- * when a value changes from the previous value. This value can be determined
- * by finding the smallest period between any two samples, but explicitly
- * setting it saves us the work.
- */
- private $samplePeriod;
- public function __construct($squishFactor = NULL, $sampleDepth = 0, $samplePeriod = 1) {
- /* some standard deviations for sensors after running for several
- * months (the /10 is not part of the standard deviation, it is
- * used to give a "wide birth", we are giving ten times the estimated
- * needed detail when drawing simplified lines)
- *
- * these are used to help draw simplified lines or graphs of sensor
- * data. the value represents the distance a simplified line can be
- * from a reading before it needs to be redrawn.
- *
- * each sensor captures reading using different units, for example the
- * radon sensor might capture picocurries per liter and store values
- * as an integer resulting in values covering two or three magnitudes,
- * where an air quality sensor might capture parts per million with
- * values spanning more than six magnitudes.
- *
- * for complex sensors like the BME680 where each sensor is made of
- * multiple sensors, we use an array to represent each squish factor
- * per individual type of sensor, for the radon only sensors, initialize
- * squish factor as a (double) or (float), we will take care of the rest
- *
- * also for by default just use these pre-recorded epsilon values for
- * all BME680 sensors
- */
- $this->squishFactor = NULL === $squishFactor
- ? DEFAULT_BOSCH_BME680::SQUISH_FACTOR()
- : array("radon" => $squishFactor)
- ;
- $this->sampleDepth = intval($sampleDepth);
- $this->samplePeriod = intval($samplePeriod /* seconds */);
- }
- public function setSamplePeriod($period = 1) {
- $this->samplePeriod = intval($period /* in seconds */);
- }
- // gets the number of seconds between samples
- public function getSamplePeriod() {
- return $this->samplePeriod;
- }
- public function setSampleDepth($samples = 0) {
- $this->sampleDepth = intval($samples);
- }
- // gets the amount of delay that should be used for a simple FIR filter
- public function getSampleDepth() {
- return $this->sampleDepth;
- }
- // gets appropriate epsilon for sensor reading Ramer-Douglas-Peucker simplification
- public function getSquishFactor($type = "radon") {
- /* radon type sensors each have their own epsilon, they are placed in
- * areas where radon fluctuates at different rates, using the same
- * epsilon might lead to missing small changes or getting too many
- * data-points
- */
- return !isset($this->squishFactor[$type])
- ? 0.001
- : $this->squishFactor[$type];
- }
- }
- /* zooms are cached, these names identify three views of the same data
- * that represent different data-sets that appear similar to each other
- */
- define("ZOOM_96_HOURS", 1);
- define("ZOOM_MONTH", 2);
- define("ZOOM_YEAR", 3);
- define("ZOOM_DEFAULT", ZOOM_96_HOURS);
- /* [!] not a valid "zoom", but is the default query option that disables
- * the use of the cache and all features that come with it
- */
- define("NO_USE_RADON_CACHE", false);
- /* finite impulse responses are cached, these names identify views of the same
- * data that represent data-sets that are distinct from each other but share
- * similar trends
- */
- define("FIR_OFF", 0);
- define("FIR_1_HOUR", 1);
- define("FIR_96_HOURS", 2);
- define("FIR_MONTH", 3);
- define("FIR_YEAR", 4);
- define("FIR_DEFAULT", FIR_1_HOUR);
- /* used to increase the delay by some multiple in order to increase visibility
- * of trend lines in noisy signals
- */
- class FIR_DELAY_MULTIPLIER {
- static public function LUT($fir) {
- $arr = array( /* these are unit-less, but we
- assume other operand is an hour */
- FIR_OFF => /* 0 */ 0,
- FIR_1_HOUR => /* 1 */ 1,
- FIR_96_HOURS => /* 96 */ 8,
- FIR_MONTH => /* 744 */ 62,
- FIR_YEAR => /* 8760 */ 730,
- // see above: copy the FIR_1_HOUR value
- FIR_DEFAULT => /* */ 1
- );
- return isset($arr[$fir]) ? $arr[$fir] : $arr[FIR_DEFAULT];
- }
- static public function SQUISH_FACTOR_ADJUSTMENT($fir) {
- $arr = array(
- FIR_1_HOUR => 1.0,
- FIR_96_HOURS => 10.0,
- FIR_MONTH => 100.0,
- FIR_YEAR => 1000.0,
- /* [!] squish factor is only used for RDP, all values in cache are
- * subject to the RDP algorithm, we do not have enough space
- * to store all decompress corrected readings from sensors
- */
- FIR_OFF => 1.0,
- FIR_DEFAULT => 1.0
- );
- return isset($arr[$fir]) ? $arr[$fir] : $arr[FIR_DEFAULT];
- }
- }
- /* necessary data caching
- *
- * After testing the filter and RDP algorithm I found that the RadonEye and other
- * devices that save data every five minutes takes too much CPU time. The original
- * method for displaying data to the end user had some deficiencies, for example
- * the data was saved only when a change was discovered, the previous output
- * would extrapolate a line between the two saved values, but as all sampled data
- * should have only displayed the captured data points. To incorporate extrapolated
- * lines we need to consider all points sampled between changes, then decide a
- * strategy for extrapolating a line. For sensor 'R', 95 thousand samples it
- * takes about 7 seconds to regenerate / decompress missing samples, and run all
- * samples through RDP, where nearly 2 million square root calcuations are
- * performed. Using more RAM does speed up the process but with the limitation
- * of 128MB under PHP we'd need around 3 orders of magnitude more RAM. (128
- * Gigabytes would be reasonable)
- *
- * Even with the increased RAM we'd only improve speed by ten, definitely less
- * than a hundred giving us unacceptable delay. For example ten years down the
- * road means 70 seconds, divide by less than 1 hundred is going to be around
- * a second for a single sensor.
- *
- * So we need to cache.
- *
- * What should be cached:
- * zoom: 3 days, month, year
- * levels of FIR: same as above, but additionally default
- *
- * zoom has to do with how many data points are provided to the end user, affects
- * should not be noticeable to an end user, but data is heavily modified, this
- * is a form of down-sampling
- *
- * FIR levels have to do with overall trends, affects are noticeable to an end
- * user, this is a form of averaging in the time domain
- *
- * there will be zoom * levels different data-sets per sensor, so [12] total
- * per sensor, but, those 12 sets will still be smaller than the original data,
- * I hope.
- *
- * sensorLog table in SQLite3 cached database:
- * id, time, sensor, dataset, value
- * dataset is a multiple of 128 and the zoom, then sum with the level of FIR
- * sensor is a multiple of 128 and the name, then sum with the sensor type
- *
- * sensorLog combines both the radonLog and the boche_bme680 tables, data
- * is interpolated and may change from minute to minute. Because of the
- * interpolation timestamps for a given event may not match exactly with the
- * original data, values for a given event may not match exactly values in the
- * original data, the goal of the downsampling and trend averaging techniques
- * is to represent the signal in the ways that is most useful to an end user
- * down sampling has to be cached at multiple sample depths to prevent a user
- * from noticing the "missing" data, trends have to be cached at different FIR
- * levels to provide different views of the same data to a user so that they
- * can see the signal through the noise
- *
- * create an ephemeral database that has less data, this way the end user can
- * continue to request `all` of the possible data
- */
- class CACHE_DB {
- const DB_FILE = "radonCache.db";
- const DATASET_INDEX =
- "CREATE INDEX IF NOT EXISTS datasetIndex ON sensorLog(dataset);";
- const TIME_INDEX =
- "CREATE INDEX IF NOT EXISTS timeIndex ON sensorLog(time);";
- /* when querying, always use `time` and `dataset` column to make use of index
- */
- static private function QUERY($query = NULL, $order = "") {
- // setup a default query
- if(NULL === $query) {
- $query = "SELECT time, reading";
- $order = "ORDER BY time ASC";
- }
- return "
- {$query}
- FROM sensorLog
- WHERE
- dataset = :dataset
- AND time >= :start
- AND time <= :end
- {$order};
- ";
- }
- static private function CREATE($disk = false) {
- return "
- CREATE TABLE IF NOT EXISTS sensorLog(
- time INTEGER NOT NULL,
- dataset INTEGER NOT NULL,
- reading FLOAT" . ($disk ? ", \nUNIQUE(time, dataset)" : "") ."
- );
- ";
- }
- /* cluster is what we call the sensor name, naming scheme has been: 'A', 'B',
- * 'C', etc. This rule has been solidified as the cached database, `radonCache.db`
- * uses a scheme to store all sensor readings in a single table with a single
- * column identifying both which sensor and from what cluster a reading came
- * from.
- *
- * cluster is a sensor that is actually multiple sensors, a single device that
- * because the sensors are soldered to it, that it is impossible to move
- * individual sensors without moving the other sensors in the cluster
- *
- * the sensor type identifies which sensor in the sensor cluster that data is
- * being requested for
- *
- * zoom is what data the user is looking at
- * - ZOOM_DEFAULT
- * - ZOOM_96_HOURS
- * - ZOOM_MONTH
- * - ZOOM_YEAR
- * fir is how much filtering the user wants
- * - FIR_DEFAULT
- * - FIR_OFF
- * - FIR_1_HOURS
- * - FIR_96_HOURS
- * - FIR_MONTH
- * - FIR_YEAR
- */
- static public function getCachedData($clusterName, $sensorType, $zoom = ZOOM_DEFAULT, $fir = FIR_DEFAULT, $start = 0, $end = PHP_INT_MAX) {
- $dbDisk = new SQLite3(CACHE_DB::DB_FILE);
- $diskSelect = $dbDisk->prepare(CACHE_DB::QUERY());
- /* cluster can either be a letter: 'A', 'B', 'C', etc
- * or the integer id of the sensor
- */
- $sensorId = Dataset::toSensorId($clusterName);
- /* sensor type can either be string: 'radon', 'celsius', etc
- * or mask:: RADON, CELSIUS, etc
- */
- $mask = is_int($sensorType)
- ? $sensorType
- : COL_HEADER_TO_TYPE_MASK::LUT($sensorType);
- $typeId = Dataset::getBit($mask);
- $datasetId = Dataset::generateDatasetId($sensorId, $typeId, $zoom, $fir);
- // update our SQL statement
- $diskSelect->bindValue("dataset", $datasetId);
- $diskSelect->bindValue("start", $start);
- $diskSelect->bindValue("end", $end);
- // run it
- $results = $diskSelect->execute();
- // PHP does not do so well storing non-scalar values
- $rows = array(array(/* times */), array(/* readings */));
- /* get results
- * cached database values are not aware of MySQL integer based storage
- * scheme, where integers are used in place of floating point values
- * by multiplying the float by 100 and then removing all decimal points
- *
- * the cached database stores FIR values of the above MySQL values
- * which reintroduces unnecessary precision (any precision beyond two
- * decimal points) and stores those values as float
- *
- * [!] make sure to get rid of the unnecessary precision, it is
- * wasteful and misleading - round readings to no decimal points
- * or maybe just convert to integers
- */
- while($row = $results->fetchArray()) {
- $rows[0][] = $row["time"];
- $rows[1][] = intval($row["reading"],0);
- }
- // done unless we need to support older frontends
- if(false === defined("ARRAY_OF_TIME_READING_PAIRS")) return $rows;
- /* CACHE_DB uses reduced memory: array([time array], [reading array])
- * the frontend was written before this optimization was found, so that
- * nothing needs to be changed in the frontend code we would need to
- * put the results back to a format that matches -or- we could optimze
- * the frontend
- *
- * I am certainly being lazy, I don't want to look through the frontend
- * code, the optimizations are probably not even relevant on versions
- * of PHP older than 5.3, for which the optimizations were written
- *
- * [?] I think the memory limits have been lifted, so re-inflating the
- * array without changes to the frontend should be fine, besides
- * that, less memory is uses retrieving data than processing it
- */
- $arr = array();
- for($i = count($rows[0]); $i--;)
- $arr[] = array(
- "time"=>array_pop($rows[0]),
- "reading"=>array_pop($rows[1])
- );
- return array_reverse($arr);
- }
- /* [?] 20 sensors, about 10 months of data takes about 25 minutes to process on
- * an 800MHz CPU and 16 Gigabyte RAM, produces a 17 Megabyte file
- *
- * After testing to see if one can see the difference between compressed
- * data and uncompressed data and determining if there was value in
- * filtering input data we needed a way to validate if it was possible to
- * bring these features to the frontend. Here we create all the different
- * outputs from our input data and store them to disk in a cache. Various
- * ways of processing and storing the data are explored. Currently it takes
- * a bit too long to generate all the data, with optimizations it went from
- * 25 minutes to 13 minutes.
- *
- * A feature was added to partially update the cache, this makes it so one
- * can update a single sensor in less than a second. Still pretty slow.
- *
- * The time needed to store data is dominated by the need to update, with
- * updates, actually the DELETE taking almost all the CPU time. Processing
- * data is also slow, with the PHP implementation of the algorithm being
- * really slow.
- *
- * some notes about SQLite3 database optimizations - adding indices
- * increases disk space usage:
- * 36 Megabytes with UNIQUE constraint on dataset and time
- * 48 Megabytes with INDEX on dataset
- * 43 Megabytes with INDEX on time and dataset (no UNIQUE constraint)
- *
- * the fourth parameter, clusterName was added to allow a single cluster
- * to do a partial update on just it's sensors. It is important to set
- * reasonable start and end times when doing a partial update, this is
- * somewhat complicated to figure out and currently not implemented well
- * as one would probably want to vary the start and end times based on
- * the level of finite impulse response filtering (this is kinda handled
- * internally, but you still need to provide a good start time)
- *
- * We added an additional Finite Impulse Response filter, FIR_OFF, this
- * means there are more datasets, the disk requirements doubled and I
- * think we are sitting at just under 100 Megabytes for the cache after
- * one year of recording
- */
- static public function createCacheDb($startTime = 0, $endTime = PHP_INT_MAX, $reset = false, $clusterName = false) {
- //define("PRINT_DATA_SOURCE_SQL", NULL);
- /* this seems to make things worse, possibly because larger epsilons passed
- * to RDP algorithm are taking longer to compute
- *
- * this makes the database bigger (possibly)
- * and does not improve performance
- */
- //define("USE_SMALLER_FLOATS", NULL);
- /* this does not seem to help much: 23 minutes
- * without we get nearly 25 minutes
- */
- //define("USE_INTEGER_MATHS", NULL);
- /* storing rows in memory between look-ups saves a very small amount of
- * time when compared to the amount of time doing other work
- *
- * investigated when storing 12 datasets for each of all sensors was taking
- * between 25 minutes and 45 minutes to complete
- *
- * (the time difference seems to be the chosen epsilon and RDP algorithm
- * where larger epsilons are taking longer to compute)
- *
- * might save a minute or two
- */
- //define("VERBOSE_DB_INSERT_LOGGING", NULL);
- define("USE_CACHED_ROWS", NULL);
- define("USE_CACHED_FIR", NULL);
- /* improves DELETE which is used a bit when doing partial updates to
- * database. It does increase disk space used by 30% but gives a big
- * boost to speed. I am not sure why the DELETE was not using the UNIQUE
- * constraint index, but whatever. Normally DELETE does not use a large
- * portion of time, but in the case where we are doing a partial update
- * and deleting two rows and adding a couple the DELETE operation was
- * consuming 98% of the CPU time.
- */
- define("USE_INDEX_ON_DATASET", NULL);
- /* the unique constraint gives an SQL index when queries use the `time`
- * and `dataset` columns. We pay a price for this when writing new
- * entries to the database, but gain speed whenever we read
- *
- * also we never query the :memory: database so creating indices does not
- * make sense, only do this for the disk backed database
- */
- //define("USE_CONSTRAINT", NULL);
- define("USE_INDEX_ON_TIME", NULL);
- define("FLUSH_DB_AT_ONCE", NULL); // faster, uses more RAM
- $CREATE = function($disk = false) { return "
- CREATE TABLE IF NOT EXISTS sensorLog(
- time INTEGER NOT NULL,
- dataset INTEGER NOT NULL,
- reading FLOAT" . ($disk ? ", \nUNIQUE(time, dataset)" : "") ."
- );
- "; };
- /* [?] maybe print a warning later if database exists and user wants to
- * perform a full cache refresh (this is the most probable use, but
- * also the most wasteful)
- *
- * [??] regarding disk backed using magnetic VS ssd VS ramdisk
- * `sudo mount -t tmpfs -o rw,size=64M tmpfs tmpfs` did not improve
- * SQLite3 performance, so we are hitting CPU before disk
- */
- if(true === $reset) unlink(CACHE_DB::DB_FILE);
- $fileExists = file_exists(CACHE_DB::DB_FILE);
- $dbDisk = new SQLite3(CACHE_DB::DB_FILE);
- $db = new SQLite3(":memory:");
- /* enable the write ahead log for disk backed storage
- * this does not help the :memory: backed storage as it cannot use `wal`
- * after testing this it does not seem like these optimizations do not
- * seem to help me
- */
- //$dbDisk->exec("PRAGMA journal_mode = wal;");
- //$dbDisk->exec("PRAGMA synchronous = NORMAL;");
- // double speed on write, but power failure during write will corrupt db
- $dbDisk->exec("PRAGMA synchronous = OFF;");
- // set 64Mb cache (up from 2Mb default) - this does not seem to help
- //$dbDisk->exec("PRAGMA cache_size = -64000;");
- $db->exec(CACHE_DB::CREATE());
- $dbDisk->exec(CACHE_DB::CREATE(/* unique index */ defined("USE_CONSTRAINT")));
- if(defined("USE_INDEX_ON_DATASET")) $dbDisk->exec(CACHE_DB::DATASET_INDEX);
- if(defined("USE_INDEX_ON_TIME")) $dbDisk->exec(CACHE_DB::TIME_INDEX);
- // temporarily give memory db access to the disk db
- $db->exec("ATTACH DATABASE './" . CACHE_DB::DB_FILE . "' AS disk");
- // prepare all queries, use standard prepared interface
- $diskSelect = $dbDisk->prepare(CACHE_DB::QUERY(/* use defaults */));
- $diskDelete = $dbDisk->prepare(CACHE_DB::QUERY("DELETE"));
- $diskDeleteCheck = $dbDisk->prepare(CACHE_DB::QUERY("SELECT COUNT(*)"));
- // setup the column mapping for inserts
- $arr = array("time", "dataset", "reading");
- $cols = implode(", ", $arr);
- array_walk($arr, function (&$v) { $v = ":{$v}"; });
- $bindings = implode(", ", $arr);
- // [!] careful, importing data uses PHP bound variables
- $boundTime; $boundDataset; $boundReading;
- $stmt = $db->prepare("INSERT INTO sensorLog({$cols}) VALUES({$bindings})");
- $stmt->bindParam(":time", $boundTime, SQLITE3_INTEGER);
- $stmt->bindParam(":dataset", $boundDataset, SQLITE3_INTEGER);
- $stmt->bindParam(":reading", $boundReading, SQLITE3_FLOAT);
- // TheInternet has a list of all the sensor clusters
- $internet = new TheInternet();
- // expand the list into all possible views of the data (Datasets)
- $datasets = Dataset::makeAllDatasetsFromInternet($internet);
- // see: USE_CACHED_ROWS
- $cache = new CachedDatasetData();
- $previous = array(NULL, NULL, NULL);
- // work on all datasets -- unless profiling for speed optimizations
- //define("PROFILE_S_SLOWEST", NULL);
- // allow partial updates for just a single cluster of sensors
- if(false !== $clusterName) $clusterName = Dataset::toSensorId($clusterName);
- foreach($datasets as $dataset) {
- /* make sure start and end are integers before we go stuffing those
- * values into SQL
- * [?] also since start time is modified, re-initialize it every
- * iteration
- */
- $start = intval($startTime);
- $end = intval($endTime);
- $lastSameTimestamp = $start;
- if(defined("PROFILE_S_SLOWEST") or (false !== $clusterName)) {
- $skip = false;
- if($clusterName !== $dataset->getInternalSensorId()) $skip = true;
- if('S' !== $dataset->parentSensorCluster->name) $skip = true;
- //if(FIR_DEFAULT !== $dataset->getFIR()) $skip = true;
- //if(FIR_YEAR !== $dataset->getFIR()) $skip = true;
- if($skip) {
- //echo "skipping..." . PHP_EOL;
- continue;
- }
- }
- /* we should backtrack the time a little bit to try to reduce
- * discontinuity when doing a partial update to the cache
- *
- * this is a minimum, it is more than likely that the database will
- * not have a reading at this timestamp, and there is a somewhat
- * likely chance that the reading after this timestamp will be so far
- * away that the amount of data going into the filter would not be
- * enough to fill it
- *
- * TODO:
- * we should make sure the filter knows our start time so it can start
- * there...
- */
- if(0 !== $start) {
- $seconds = FIR_DELAY_MULTIPLIER::LUT($dataset->getFIR()) *
- 60 /* minutes in an hour */ *
- 60 /* seconds in an hour */
- ;
- //var_dump("need to go back {$seconds} seconds before {$start}");
- if($start >= $seconds) $start -= $seconds;
- }
- $filterDepth = $dataset->parentSensorCluster->options->getSampleDepth();
- $filterDepth *= FIR_DELAY_MULTIPLIER::LUT($dataset->getFIR());
- /* using a row cache means we can skip SQL reads when a dataset uses
- * the same rows
- *
- * [?] if we are careful about how the datasets are organized we can
- * hit this optimization more often, worse case we might miss this
- * optimization every time ::frown face::
- *
- * use some truth tables to determine if we can use cached data
- */
- $previousCluster = $dataset->getInternalSensorId() === $previous[0];
- $previousType = $dataset->getTypeName() === $previous[1];
- $previousDepth = $filterDepth === $previous[2];
- if(defined("USE_CACHED_ROWS") and NULL !== $cache->rows) {
- $cache->rows = ($previousCluster and $previousType)
- ? /* no change needed */ $cache->rows
- : /* need to read SQL */ NULL;
- if(NULL !== $cache->rows) {
- if(defined("VERBOSE_DB_INSERT_LOGGING"))
- echo "[?] using cached rows..." . PHP_EOL;
- }
- } else $cache->rows = NULL;
- if(defined("USE_CACHED_FIR") and NULL !== $cache->filtered) {
- $cache->filtered = ($previousCluster and $previousType and $previousDepth)
- ? /* no change needed */ $cache->filtered
- : /* need to read SQL */ NULL;
- if(NULL !== $cache->filtered) {
- if(defined("VERBOSE_DB_INSERT_LOGGING"))
- echo "[?] using cached fir data..." . PHP_EOL;
- }
- } else $cache->filtered = NULL;
- // record the previous SQL read markers, in-case we want to use caching
- $previous[0] = $dataset->getInternalSensorId();
- $previous[1] = $dataset->getTypeName();
- $previous[2] = $filterDepth;
- $t0 = microtime(true);
- // store some data to later run a health-check on what we are doing
- $data = Dataset::retrieveReducedData($dataset, $start, $end, $cache);
- //var_dump($dataset->stats);
- $data = array_reverse($data, /* keep keys */ true);
- $t0 = "(cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)";
- //if(empty($data)) { var_dump("no records found"); }
- if(defined("VERBOSE_DB_INSERT_LOGGING")) var_dump(
- "{$t0} done processing cluster {$dataset->parentSensorCluster->name} " .
- "input " . count($cache->filtered[0]) . " rows " .
- "output " . count($data) . " rows " .
- "sensor: {$dataset->getTypeName()} " .
- "datasetId:{$dataset->datasetId} " .
- "filter depth:{$filterDepth}"
- );
- $t0 = microtime(true);
- $keptRows = 0;
- /* checking before delete takes some time, but on average ends up
- * saving a bit of time, go figure
- *
- * I cannot find out why this is, SQLite3 documentation and internet
- * people say DELETE is expensive so if we can avoid it we save time
- */
- if(0 !== $start) {
- /* try to reduce the amount of writing to the database, find the
- * timestamp when values start to change and only write those
- */
- $diskSelect->bindValue("dataset", $dataset->datasetId);
- $diskSelect->bindValue("start", $start);
- $diskSelect->bindValue("end", $end);
- $results = $diskSelect->execute();
- while($row = $results->fetchArray()) {
- if(isset($data[$row["time"]])) {
- // compare if float-point values are similar
- $a = $row["reading"];
- $b = $data[$row["time"]];
- //var_dump("compare [{$a}] and [{$b}]");
- if(abs($a - $b) > .00001) break;
- $keptRows++;
- $lastSameTimestamp = $row["time"] + 1;
- //var_dump("keeping {$lastSameTimestamp}");
- unset($data[$row["time"]]);
- }
- }
- //var_dump("keeping [{$keptRows}] rows");
- $diskSelect->reset();
- }
- $t0 = "(select cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)";
- if(defined("VERBOSE_DB_INSERT_LOGGING")) var_dump(
- "{$t0} done checking for extra writes" .
- " avoided {$keptRows} rows"
- );
- $t0 = microtime(true);
- // prepare to remove all entries from the disk
- if(0 === count($data)) {
- //var_dump("skipping DELETE operation, nothing to add");
- } else {
- $diskDeleteCheck->bindValue("dataset", $dataset->datasetId);
- $diskDeleteCheck->bindValue("start", $lastSameTimestamp);
- $diskDeleteCheck->bindValue("end", $end);
- $res = $diskDeleteCheck->execute();
- $res = $res->fetchArray();
- $rowsWouldDelete = $res["COUNT(*)"];
- //var_dump("rows caught by delete: {$rowsWouldDelete}");
- if(0 === $rowsWouldDelete) {
- //var_dump("skipping DELETE operation, nothing would delete");
- } else {
- $diskDelete->bindValue("dataset", $dataset->datasetId);
- $diskDelete->bindValue("start", $lastSameTimestamp);
- $diskDelete->bindValue("end", $end);
- $diskDeleteCheck->reset();
- $diskDelete->execute();
- $diskDelete->reset();
- }
- }
- $t0 = "(delete cpu time " .
- number_format(1000 * (microtime(true) - $t0), 2) . "ms)";
- if(defined("VERBOSE_DB_INSERT_LOGGING")) var_dump(
- "{$t0} done clearing old data"
- );
- $t0 = microtime(true);
- $boundDataset = intval($dataset->datasetId);
- //$db->exec("BEGIN TRANSACTION;");
- while($row = array_slice($data, -1, 1, true)) {
- // remove sliced indice
- array_pop($data);
- // load the database
- $boundTime = intval(key($row));
- $boundReading = (float) array_pop($row);
- $stmt->execute();
- $stmt->reset();
- }
- //$db->exec("END TRANSACTION;");
- if(!defined("FLUSH_DB_AT_ONCE")) {
- // flush memory to disk on every dataset to prevent using too much memory
- $db->exec(
- "INSERT INTO disk.sensorLog ({$cols}) SELECT {$cols} FROM sensorLog;");
- $db->exec("DELETE FROM sensorLog;");
- }
- $t0 = "(cpu time " . number_format(1000 * (microtime(true) - $t0), 2) . "ms)";
- if(defined("VERBOSE_DB_INSERT_LOGGING")) var_dump(
- "{$t0} done saving cluster {$dataset->parentSensorCluster->name} " .
- "rdp: {$dataset->stats->rdpCount} " .
- "sensor: {$dataset->getTypeName()} " .
- "datasetId:{$dataset->datasetId}"
- );
- if(defined("VERBOSE_DB_INSERT_LOGGING")) echo PHP_EOL;
- }
- if(defined("FLUSH_DB_AT_ONCE")) {
- $db->exec("INSERT INTO disk.sensorLog ({$cols}) SELECT {$cols} FROM sensorLog;");
- }
- if((true === $reset) and (true === $fileExists)) {
- trigger_error(
- "[?] removing `" .
- CACHE_DB::DB_FILE .
- "` would be significantly faster " .
- "for full cache refreshing",
- E_USER_WARNING
- );
- }
- }
- }
- /* unnecessary cruft, automatically names stuff, sits in-between the routines
- * that get the data and the code that draws stuff
- *
- * the code that was supplying the highcharts wanted data, names, colors and
- * then on top of that there were a bunch of sensors that have names like
- * 'A', 'B', 'C', and nothing taking responsibility for these things
- *
- * user wants a highchart series
- *
- * maybe user wants to specify some stuff, color, time range, etc, try to
- * accommodate those things
- *
- * [?] availableZoom and availableFIR can be combined to represent multiple
- * views of some data. A data-set is cached for each combination of zoom
- * and FIR, when this was written there were 3 available zooms and 4
- * available FIR meaning there would be 12 data-sets cached per sensor.
- * a sensor cluster, like the BOSCH_BME680, with it's 5 sensors will have
- * 60 cached data-sets
- */
- Class TheInternet {
- public $inline = false;
- private $output = array();
- private $sensors = array();
- private $whatUsersWant;
- public $availableZoom = array(
- ZOOM_DEFAULT,
- ZOOM_96_HOURS,
- ZOOM_MONTH,
- ZOOM_YEAR,
- );
- public $availableFIR = array(
- FIR_OFF,
- FIR_DEFAULT,
- FIR_1_HOUR,
- FIR_96_HOURS,
- FIR_MONTH,
- FIR_YEAR,
- );
- public function __construct() {
- /* [?] notes about the values saved here
- *
- * the sensors that we monitor are all different and in general try
- * their best to report accurate information. They all have their own
- * way of maintaining precision, some, I've made some mistakes and
- * thrown away, for example the temperature sensors, I am pretty
- * sure I lost resolution trying save space on the server. (I should
- * have multiplied the values by a few magnitudes and saved integer
- * values to avoid floating point storage) Anyways, all the sensors
- * have very little memory and either make great use of that memory,
- * or not at all. Either way, the filters on the sensors are limited.
- *
- * We can make really big filters, adjust for phase shift, down sample,
- * all the things - because we have lots of memory and processing power
- *
- * `TheInternet` abstracts some of the complexities of the sensors
- * so that users can ask for things without specifying specific details
- * that are kinda necessary for what they are asking for. There is no
- * place for these things in the underlying routines either, so they
- * end up here, in the middle. For example, the Ramer-Douglas-Peucker
- * algorithm that is used for line simplification wants an `epsilon`
- * value so it knows how tightly to keep it's output to the input it
- * is given.
- *
- * notes about specific values:
- * squish factor: see frontend/tests-and-demos/rdp-algo.php
- * sample depth: resolution should be 1 hour for most the sensors
- * sample period: read the datasheets for each sensor
- */
- // this is just a list of the sensors in one place
- $this->sensors['H'] = new Sensor('H', "DEADBEEF", BOSCH_BME680);
- $this->sensors['I'] = new Sensor('I', "DEADPORK", BOSCH_BME680);
- $this->sensors['J'] = new Sensor('J', "HELLOCAT", BOSCH_BME680);
- $this->sensors['A'] = new Sensor('A', "A", AIR_THINGS, 106.0 / 100);
- $this->sensors['B'] = new Sensor('B', "B", AIR_THINGS, 58.0 / 100);
- $this->sensors['C'] = new Sensor('C', "C", AIR_THINGS, 335.0 / 100);
- $this->sensors['R'] = new Sensor('R', "R", RADON_EYE, 62.0 / 100);
- $this->sensors['S'] = new Sensor('S', "S", RADON_EYE, 24.0 / 100);
- $this->sensors["monkey"] = new Sensor("Ohhh Ahhhh!");
- // inform sensor API about the sampleDepth for the automated sensors
- $this->sensors['R']->options->setSampleDepth(/*every 5 min is*/ 12 /*per hour*/);
- $this->sensors['S']->options->setSampleDepth(/*every 5 min is*/ 12 /*per hour*/);
- $this->sensors['H']->options->setSampleDepth(/*every 15 min is*/ 4 /*per hour*/);
- $this->sensors['I']->options->setSampleDepth(/*every 15 min is*/ 4 /*per hour*/);
- $this->sensors['J']->options->setSampleDepth(/*every 15 min is*/ 4 /*per hour*/);
- // inform sensor API about the samplePeriod for the automated sensors
- $this->sensors['R']->options->setSamplePeriod( 5 * 60 /* seconds */);
- $this->sensors['S']->options->setSamplePeriod( 5 * 60 /* seconds */);
- $this->sensors['H']->options->setSamplePeriod(15 * 60 /* seconds */);
- $this->sensors['I']->options->setSamplePeriod(15 * 60 /* seconds */);
- $this->sensors['J']->options->setSamplePeriod(15 * 60 /* seconds */);
-
- // user does not want Fahrenheit, they want temperature, translate request
- $this->whatUsersWant = TheCutePaletteAndSomeOtherStuff::wordWords();
- // available zoom and FIR might have unnecessary defaults
- $this->availableZoom = array_unique($this->availableZoom);
- $this->availableFIR = array_unique($this->availableFIR);
- }
- // returns a *copy* of the sensor
- public function cloneAllSensors() {
- $arr = array();
- foreach($this->sensors as $sensor) $arr[] = new Sensor(
- $sensor->name,
- $sensor->serial,
- $sensor->sensors,
- $sensor->options
- );
- return $arr;
- }
- // [!] returns a copy of the sensors
- public function getSensor($letter) {
- /* clone would only do a shallow copy and we would lose SensorOptions
- * instead, try to create a new sensor and provide it all the same
- * parameters as the original
- *
- * [alternatively we can give the user our sensor, but that might get
- * a bit out of hand, maybe this does, idontknow]
- */
- return isset($this->sensors[$letter])
- ? new Sensor(
- $this->sensors[$letter]->name,
- $this->sensors[$letter]->serial,
- $this->sensors[$letter]->sensors,
- $this->sensors[$letter]->options
- )
- : NULL;
- }
- /* The front end does not currently know what sensors are available, nor
- * does it provide an interface to select which sensors are of interest
- * to the end user.
- *
- * Names, colors, or general user options are not set, or known, when
- * requests are made, they tend to ask for things that the backend cannot
- * provide
- *
- * This interface protects the backend from these unknowns and tries to pick
- * things that make sense. In a way, it is kind of a dumpster fire with
- * nobody taking responsibility for these kinds of things, but it does serve
- * a purpose and will exist until it doesn't
- *
- * other has a bunch of parameters that are jumbled together
- * some of them are for highcharts, some are for running last minute routines
- * on the data, some that affect what data is queried...
- */
- public function iotSmartAlexaSiriCloud($em = 'H', $what = "radon", $other = array()) {
- $auxFn = isset($other["auxFn"]) ?$other["auxFn"] :NULL;
- // manual read auxiliary measurements have different titles
- $title = isset($other["auxFn"]) ?ucfirst($other["auxFn"]) :"{$em}_{$what}";
- // check if we were passed query options, otherwise assign none
- $queryOptions = (
- isset($other["queryOptions"]) and
- ("QueryOptions" === get_class($other["queryOptions"]))
- ) ? $other["queryOptions"] : NULL;
- // radon detectors get special id
- $title = "radon" === $what ?"detector{$em}" :$title;
- // switch out implicit "temperature" for explicit "Fahrenheit"
- $get = $this->whatUsersWant[$what];
- $td1 = microtime(true);
- // read data from disk or wherever it comes from
- $data = $this->sensors[$em]->$get($queryOptions, $auxFn);
- $td2 = microtime(true);
- // run a user supplied function on the data before presentation
- if(isset($other["dataFn"])) { array_walk($data, $other["dataFn"]); }
- $td3 = microtime(true);
- error_log("data - i1: ".(round(($td2-$td1)*1000))."ms, i2: ".(round(($td3-$td2)*1000))."ms");
- // [!] this prints to STDOUT, do not let the assignment fool you
- // generate the highcharts series
- $this->output[] = new Series(
- $em,
- $get,
- makeSeries($title, $data, $other, false === $this->inline)
- );
- }
- public function getInlineJS_objs($var) {
- $arr = array();
- foreach($this->output as $o) {
- $arr[] = "{$var}.push(new Series('{$o->em}','{$o->type}',{$o->data}));";
- }
- return implode(PHP_EOL, $arr);
- }
- }
- Class Series {
- public $em;
- public $type;
- public $data;
- public function __construct($em = 'H', $type = 'radon', $data = NULL) {
- // no need to initialize anything when not used
- if(NULL === $data) return;
- $this->em = $em;
- $this->type = $type;
- $this->data = $data;
- }
- static public function asJavaScript() {
- return implode(' ', array_map("trim", explode(PHP_EOL,"class Series {
- constructor(em, type, data) {
- this.em = em;
- this.type=type;
- this.data=data;
- }
- };")));
- }
- }
- /* scheme to override the color choices made for highcharts
- * and provide some optional UI functionality like grouping similar graph
- * data, like all radon measurements could are of the same class, they could
- * be averaged, turned off together, whatever
- *
- * after reading documentation, highcharts says that yes chart can be modified
- * at runtime using highchart::chart api as well as highchart::series api
- *
- * charts API for adding series at runtime, do not use at render time
- * highcharts::addSeries(options, [,redraw] [,animation]) returns highcharts::Series
- * highcharts::get(id) returns series or whatever by id
- *
- * also provide a common reference for translating user requested data types
- * and actual datatypes, for example, mapping temperature to Fahrenheit
- *
- */
- require_once("test-pattern.php");
- Class TheCutePaletteAndSomeOtherStuff {
- // user does not want Fahrenheit, they want temperature, translate request
- static public function wordWords() {
- // order these in the same order you want to pull from colors
- return array(
- "radon" => "getRadon",
- "humidity" => "getHumidity",
- "temperature" => "getFahrenheit",
- "indoor_air_quality" => "getIaq",
- "pressure" => "getUkrumayl",
- "wesDust" => "getWesDust",
- "aux" => "getAuxiliary"
- );
- }
- public function activateJavaScriptSuperPowers() {
- }
- }
|