Part 1: Sensor Dataset

We use the San Antonio Smart Sensor Data for Weather in Medical Center, Downtown and Brooks

## Rows: 898098 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): Sensor_id, Zone
## dbl  (5): LAT, LONG, Temp_F, Humidity, Pressure_Pa
## dttm (1): DateTime
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

##     DateTime                       Temp_F           Humidity      
##  Min.   :2021-04-20 00:00:06   Min.   :-999.00   Min.   :-999.00  
##  1st Qu.:2021-06-11 16:25:45   1st Qu.:  74.00   1st Qu.:  46.00  
##  Median :2021-07-25 09:01:14   Median :  79.00   Median :  68.00  
##  Mean   :2021-07-26 20:14:00   Mean   :  77.96   Mean   :  62.18  
##  3rd Qu.:2021-09-10 22:28:09   3rd Qu.:  88.00   3rd Qu.:  85.00  
##  Max.   :2021-10-27 23:57:22   Max.   : 381.00   Max.   : 146.00  
##   Pressure_Pa     
##  Min.   : -999.0  
##  1st Qu.:  939.0  
##  Median :  980.0  
##  Mean   :  596.5  
##  3rd Qu.:  989.0  
##  Max.   :33751.0

Part 2: Data Cleaning

2.1 After Removing NAs Values

2.2 After Removing Values Per Expert Judgement

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   71.00   77.00   81.00   84.37   91.00  112.00
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   27.00   55.00   78.00   71.89   88.00  100.00

Part 3: Line Graphs

3.1 Raw Temperature and Humidity Over Time (By Sensor)

3.2 Daily Minimum and Maximum Temperature and Humidity Over Time (By Sensor)

# Check in: How many times does each Sensor_id come up?
# sensors_clean %>%  count(Sensor_id)

# Add variable to summarise at day level

sensors_day <- sensors_clean%>% group_by(Sensor_id, Day)%>%
  summarise(Zone = Zone, maxTemp = max(Temp_F), maxHumidity = max(Humidity), minTemp = min(Temp_F), minHumidity = min(Humidity))%>%
  distinct()%>% ungroup()

ggplot(sensors_day, aes(x=Day))+ theme_classic()+
  geom_line(aes(y=maxTemp, color=Sensor_id)) + geom_line(aes(y=minTemp, color=Sensor_id))

ggplot(sensors_day, aes(x=Day))+ theme_classic()+
  geom_line(aes(y=maxHumidity, color=Sensor_id)) + geom_line(aes(y=minHumidity, color=Sensor_id))

ggplot(sensors_day, aes(x=Day))+ theme_classic()+
 geom_line(aes(y=maxTemp, color="maxTemp", group = Sensor_id))+ geom_line(aes(y=minTemp, color="minTemp", group = Sensor_id))

ggplot(sensors_day, aes(x=Day))+ theme_classic()+
 geom_line(aes(y=maxHumidity, color="maxHumidity", group = Sensor_id))+ geom_line(aes(y=minHumidity, color="minHumidity", group = Sensor_id))

3.3 Applying Moving Averages to Smooth Data

Part 4: Classify Sensors into Groups

4.1 Run K Means Grouping Algorithm on Sensors

## [1] "Temperature K Means Groups:"
## [1] 3 3 5
##       [,1]      [,2]   [,3]      [,4]  [,5]     [,6]     [,7]     [,8]     [,9]
## 1 104.3333 101.33333 102.00 103.00000 96.00 90.66667 89.00000 88.00000 78.33333
## 2  93.2600  93.72333  94.34  91.64333 89.93 82.22333 81.81667 83.59333 78.43667
## 3 106.6000 106.20000 104.40 105.00000 99.00 92.00000 92.20000 89.00000 78.60000
##      [,10]     [,11]  [,12] [,13]     [,14]     [,15]     [,16]     [,17]
## 1 91.00000  96.66667 101.00 95.00 100.00000  99.33333  97.33333  99.66667
## 2 85.03667  89.35000  91.66 88.28  90.04333  90.84333  91.29333  91.99333
## 3 92.60000 100.00000 105.20 97.20 101.40000 102.20000 102.80000 102.60000
##       [,18]    [,19]  [,20]    [,21]    [,22]     [,23]    [,24]     [,25]
## 1 103.66667 105.3333  99.00 99.00000 93.33333  99.33333 103.6667 105.33333
## 2  93.25667  94.5100  89.93 87.72333 86.52333  91.58000  92.8600  95.03333
## 3 108.00000 106.2000 102.00 99.80000 97.00000 102.20000 108.2000 108.80000
##      [,26]     [,27]     [,28]     [,29]     [,30]
## 1 104.6667 103.33333 103.33333 105.66667 105.66667
## 2  94.3600  94.01667  94.09333  95.14667  95.70667
## 3 108.2000 107.20000 107.60000 109.80000 108.00000
## [1] "Humidity K Means Groups:"
## [1] 8 2 1
##     [,1]   [,2]   [,3]   [,4]   [,5]   [,6]  [,7]   [,8]  [,9] [,10]  [,11]
## 1 92.250 90.625 88.625 91.375 95.500 97.875 91.75 93.750 98.00 96.00 92.625
## 2 97.965 90.975 90.920 91.480 93.975 96.970 95.48 93.495 96.95 95.45 94.495
## 3 96.960 88.940 90.940 94.960 95.980 96.980 96.96 94.970 96.02 94.96 96.980
##    [,12]  [,13]  [,14]  [,15]  [,16]  [,17] [,18] [,19]  [,20]  [,21] [,22]
## 1 91.625 90.875 92.250 93.500 90.625 89.875 92.25  91.5 92.125 83.125  96.5
## 2 93.950 95.490 95.475 97.445 93.500 93.935 95.97  95.0 95.500 91.500  98.0
## 3 94.980 91.070 93.960 92.070 89.960 89.070 92.05  92.0 93.000 84.000  96.0
##    [,23] [,24]  [,25] [,26] [,27]  [,28] [,29] [,30]
## 1 96.875  92.5 91.375 91.75 83.25 84.125 84.25  89.5
## 2 98.000  47.0 82.500 95.50 90.00 94.500 91.50  94.0
## 3 98.000  46.0 81.000 93.00 87.00 86.000 93.00  91.0

4.2 Plot Each Sensor by its K Means Group

##Plot the results
ggplot(sensors_day_av)+
  geom_line(aes(x=Day, y=avgT, color=kmeansTemp, group = Sensor_id))+
  theme_classic()+
  xlab("Day")+
  ylab(paste0("Max Temperature: ", n, "-Day Moving Avg"))+
  ggtitle("Temp in San Antonio")

ggplot(sensors_day_av)+
  geom_line(aes(x=Day, y=avgH, color=kmeansHumid, group = Sensor_id))+
  theme_classic()+
  xlab("Day")+
  ylab(paste0("Max Temperature: ", n, "-Day Moving Avg"))+
  ggtitle("Humidity in San Antonio")

##These look chopped off because we are doing a moving average.

Part 5: Do Sensors in the Same Neighborhoood (Zones) Collect Similar Data

5.1 Map of Zones and Maps of the K Means Results

###Map the Neighborhoods (Zones)

sensorsSF <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)

tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(sensorsSF) + tm_dots(col = 'Zone') 
### Let's see how the sensors were clustered by temperature
tempt <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tm_shape(tempt) + tm_dots(col = 'kmeansTemp') 
### Let's see how the sensors were clustered by humidity
tempt <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tm_shape(tempt) + tm_dots(col = 'kmeansHumid') 

5.2 Confusion Matrix

### How did our classifier do?
# Confusion Matrix for Temperature
table(Individualsensors$Zone, Individualsensors$kmeansTemp)
##                 
##                  1 2 3
##   Brooks         2 1 1
##   Downtown       0 1 2
##   Medical Center 1 1 2
# Confusion Matrix for Humidity
table(Individualsensors$Zone, Individualsensors$kmeansHumid)
##                 
##                  1 2 3
##   Brooks         3 1 0
##   Downtown       2 1 0
##   Medical Center 3 0 1
###Note if you don't have zones, you can do a kmeans classifier on the longitude and latitude!