Part 1: Sensor Dataset

We use the San Antonio Smart Sensor Data for Weather in Medical Center, Downtown and Brooks

## Rows: 898098 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (2): Sensor_id, Zone
## dbl  (5): LAT, LONG, Temp_F, Humidity, Pressure_Pa
## dttm (1): DateTime
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
##     DateTime                       Temp_F           Humidity      
##  Min.   :2021-04-20 00:00:06   Min.   :-999.00   Min.   :-999.00  
##  1st Qu.:2021-06-11 16:25:45   1st Qu.:  74.00   1st Qu.:  46.00  
##  Median :2021-07-25 09:01:14   Median :  79.00   Median :  68.00  
##  Mean   :2021-07-26 20:14:00   Mean   :  77.96   Mean   :  62.18  
##  3rd Qu.:2021-09-10 22:28:09   3rd Qu.:  88.00   3rd Qu.:  85.00  
##  Max.   :2021-10-27 23:57:22   Max.   : 381.00   Max.   : 146.00  
##   Pressure_Pa     
##  Min.   : -999.0  
##  1st Qu.:  939.0  
##  Median :  980.0  
##  Mean   :  596.5  
##  3rd Qu.:  989.0  
##  Max.   :33751.0

Part 2: Data Cleaning

2.1 After Removing NAs Values

2.2 After Removing Values Per Expert Judgement

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   50.34   71.00   76.10   77.66   84.00  112.00
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   50.00   67.00   65.84   86.00   99.00

Part 3: Line Graphs

3.1 Raw Temperature and Humidity Over Time (By Sensor)

3.2 Daily Minimum and Maximum Temperature and Humidity Over Time (By Sensor)

# Check in: How many times does each Sensor_id come up?
# sensors_clean %>%  count(Sensor_id)

# Add variable to summarise at day level

sensors_day <- sensors_clean%>% group_by(Sensor_id, Day)%>%
  summarise(Zone = Zone, maxTemp = max(Temp_F), maxHumidity = max(Humidity), minTemp = min(Temp_F), minHumidity = min(Humidity))%>%
  distinct()%>% ungroup()

ggplot(sensors_day, aes(x=Day))+ theme_classic()+
  geom_line(aes(y=maxTemp, color=Sensor_id)) + geom_line(aes(y=minTemp, color=Sensor_id))

ggplot(sensors_day, aes(x=Day))+ theme_classic()+
  geom_line(aes(y=maxHumidity, color=Sensor_id)) + geom_line(aes(y=minHumidity, color=Sensor_id))

ggplot(sensors_day, aes(x=Day))+ theme_classic()+
 geom_line(aes(y=maxTemp, color="maxTemp", group = Sensor_id))+ geom_line(aes(y=minTemp, color="minTemp", group = Sensor_id))

ggplot(sensors_day, aes(x=Day))+ theme_classic()+
 geom_line(aes(y=maxHumidity, color="maxHumidity", group = Sensor_id))+ geom_line(aes(y=minHumidity, color="minHumidity", group = Sensor_id))

3.3 Applying Moving Averages to Smooth Data

Part 4: Classify Sensors into Groups

4.1 Run K Means Grouping Algorithm on Sensors

## [1] "Temperature K Means Groups:"
## [1] 3 7 2
##       [,1]     [,2]     [,3]     [,4]      [,5]      [,6]      [,7]      [,8]
## 1 84.80000 87.20333 90.35333 90.61667  91.25333  90.31667  90.65000  90.27667
## 2 91.71429 95.42857 98.14286 99.57143 100.42857 100.42857  99.71429 101.28571
## 3 94.50000 98.50000 99.50000 99.00000 102.00000 102.50000 100.00000 104.00000
##       [,9]    [,10]    [,11]    [,12]    [,13]     [,14]    [,15]    [,16]
## 1 89.26667 92.26667 85.06667 88.17667 88.62667  88.21667 90.42667 75.91667
## 2 96.85714 96.71429 95.14286 90.00000 89.85714  97.42857 97.14286 83.57143
## 3 99.00000 99.00000 96.00000 90.50000 91.50000 101.50000 98.00000 84.00000
##      [,17]    [,18]    [,19]    [,20]    [,21]    [,22]    [,23]    [,24]
## 1 74.71667 79.58667 83.60000 82.36667 84.80000 84.76333 86.49000 85.78000
## 2 86.57143 87.71429 89.71429 91.28571 94.71429 94.00000 94.85714 95.42857
## 3 89.50000 91.00000 92.50000 92.50000 97.50000 98.50000 98.50000 99.50000
##       [,25]    [,26]    [,27]
## 1  89.90000 87.24000 79.44000
## 2  99.14286 92.28571 84.57143
## 3 107.00000 95.50000 86.50000
## [1] "Humidity K Means Groups:"
## [1] 6 2 4
##       [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]  [,9]
## 1 93.33333 91.66667 85.33333 68.16667 59.83333 49.66667 79.16667 79.66667 91.00
## 2 91.50000 95.00000 93.00000 81.50000 78.50000 70.50000 88.50000 91.00000 96.00
## 3 95.00000 93.75000 91.50000 76.00000 78.75000 62.50000 87.75000 88.50000 93.75
##   [,10]    [,11]    [,12]    [,13]    [,14] [,15]    [,16] [,17] [,18]    [,19]
## 1  92.0 91.16667 94.83333 94.16667 97.83333    94 57.83333  57.0 66.00 80.33333
## 2  94.0 89.50000 94.50000 91.50000 95.00000    95 74.00000  80.0 84.50 88.50000
## 3  94.5 91.00000 97.00000 95.00000 98.25000    96 59.25000  70.5 75.25 86.25000
##      [,20]    [,21]    [,22] [,23]    [,24] [,25]    [,26] [,27]
## 1 87.33333 89.83333 88.16667 94.00 94.16667 94.00 92.33333 94.50
## 2 92.50000 96.50000 96.50000 96.50 96.00000 97.00 95.50000 91.00
## 3 92.50000 92.00000 92.00000 95.75 94.50000 96.25 95.25000 94.75

4.2 Plot Each Sensor by its K Means Group

##Plot the results
ggplot(sensors_day_av)+
  geom_line(aes(x=Day, y=avgT, color=kmeansTemp, group = Sensor_id))+
  theme_classic()+
  xlab("Day")+
  ylab(paste0("Max Temperature: ", n, "-Day Moving Avg"))+
  ggtitle("Temp in San Antonio")

ggplot(sensors_day_av)+
  geom_line(aes(x=Day, y=avgH, color=kmeansHumid, group = Sensor_id))+
  theme_classic()+
  xlab("Day")+
  ylab(paste0("Max Temperature: ", n, "-Day Moving Avg"))+
  ggtitle("Humidity in San Antonio")

##These look chopped off because we are doing a moving average.

Part 5: Do Sensors in the Same Neighborhoood (Zones) Collect Similar Data

5.1 Map of Zones and Maps of the K Means Results

###Map the Neighborhoods (Zones)

sensorsSF <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)

tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(sensorsSF) + tm_dots(col = 'Zone') 
### Let's see how the sensors were clustered by temperature
tempt <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tm_shape(tempt) + tm_dots(col = 'kmeansTemp') 
### Let's see how the sensors were clustered by humidity
tempt <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tm_shape(tempt) + tm_dots(col = 'kmeansHumid') 

5.2 Confusion Matrix

### How did our classifier do?
# Confusion Matrix for Temperature
table(Individualsensors$Zone, Individualsensors$kmeansTemp)
##                 
##                  1 2 3
##   Brooks         1 2 1
##   Downtown       1 3 0
##   Medical Center 1 2 1
# Confusion Matrix for Humidity
table(Individualsensors$Zone, Individualsensors$kmeansHumid)
##                 
##                  1 2 3
##   Brooks         0 1 3
##   Downtown       3 1 0
##   Medical Center 3 0 1
###Note if you don't have zones, you can do a kmeans classifier on the longitude and latitude!