## Rows: 898098 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): Sensor_id, Zone
## dbl (5): LAT, LONG, Temp_F, Humidity, Pressure_Pa
## dttm (1): DateTime
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## DateTime Temp_F Humidity
## Min. :2021-04-20 00:00:06 Min. :-999.00 Min. :-999.00
## 1st Qu.:2021-06-11 16:25:45 1st Qu.: 74.00 1st Qu.: 46.00
## Median :2021-07-25 09:01:14 Median : 79.00 Median : 68.00
## Mean :2021-07-26 20:14:00 Mean : 77.96 Mean : 62.18
## 3rd Qu.:2021-09-10 22:28:09 3rd Qu.: 88.00 3rd Qu.: 85.00
## Max. :2021-10-27 23:57:22 Max. : 381.00 Max. : 146.00
## Pressure_Pa
## Min. : -999.0
## 1st Qu.: 939.0
## Median : 980.0
## Mean : 596.5
## 3rd Qu.: 989.0
## Max. :33751.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.34 71.00 76.10 77.66 84.00 112.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.00 50.00 67.00 65.84 86.00 99.00
# Check in: How many times does each Sensor_id come up?
# sensors_clean %>% count(Sensor_id)
# Add variable to summarise at day level
sensors_day <- sensors_clean%>% group_by(Sensor_id, Day)%>%
summarise(Zone = Zone, maxTemp = max(Temp_F), maxHumidity = max(Humidity), minTemp = min(Temp_F), minHumidity = min(Humidity))%>%
distinct()%>% ungroup()
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxTemp, color=Sensor_id)) + geom_line(aes(y=minTemp, color=Sensor_id))
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxHumidity, color=Sensor_id)) + geom_line(aes(y=minHumidity, color=Sensor_id))
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxTemp, color="maxTemp", group = Sensor_id))+ geom_line(aes(y=minTemp, color="minTemp", group = Sensor_id))
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxHumidity, color="maxHumidity", group = Sensor_id))+ geom_line(aes(y=minHumidity, color="minHumidity", group = Sensor_id))
## [1] "Temperature K Means Groups:"
## [1] 3 7 2
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## 1 84.80000 87.20333 90.35333 90.61667 91.25333 90.31667 90.65000 90.27667
## 2 91.71429 95.42857 98.14286 99.57143 100.42857 100.42857 99.71429 101.28571
## 3 94.50000 98.50000 99.50000 99.00000 102.00000 102.50000 100.00000 104.00000
## [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
## 1 89.26667 92.26667 85.06667 88.17667 88.62667 88.21667 90.42667 75.91667
## 2 96.85714 96.71429 95.14286 90.00000 89.85714 97.42857 97.14286 83.57143
## 3 99.00000 99.00000 96.00000 90.50000 91.50000 101.50000 98.00000 84.00000
## [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## 1 74.71667 79.58667 83.60000 82.36667 84.80000 84.76333 86.49000 85.78000
## 2 86.57143 87.71429 89.71429 91.28571 94.71429 94.00000 94.85714 95.42857
## 3 89.50000 91.00000 92.50000 92.50000 97.50000 98.50000 98.50000 99.50000
## [,25] [,26] [,27]
## 1 89.90000 87.24000 79.44000
## 2 99.14286 92.28571 84.57143
## 3 107.00000 95.50000 86.50000
## [1] "Humidity K Means Groups:"
## [1] 6 2 4
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## 1 93.33333 91.66667 85.33333 68.16667 59.83333 49.66667 79.16667 79.66667 91.00
## 2 91.50000 95.00000 93.00000 81.50000 78.50000 70.50000 88.50000 91.00000 96.00
## 3 95.00000 93.75000 91.50000 76.00000 78.75000 62.50000 87.75000 88.50000 93.75
## [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19]
## 1 92.0 91.16667 94.83333 94.16667 97.83333 94 57.83333 57.0 66.00 80.33333
## 2 94.0 89.50000 94.50000 91.50000 95.00000 95 74.00000 80.0 84.50 88.50000
## 3 94.5 91.00000 97.00000 95.00000 98.25000 96 59.25000 70.5 75.25 86.25000
## [,20] [,21] [,22] [,23] [,24] [,25] [,26] [,27]
## 1 87.33333 89.83333 88.16667 94.00 94.16667 94.00 92.33333 94.50
## 2 92.50000 96.50000 96.50000 96.50 96.00000 97.00 95.50000 91.00
## 3 92.50000 92.00000 92.00000 95.75 94.50000 96.25 95.25000 94.75
##Plot the results
ggplot(sensors_day_av)+
geom_line(aes(x=Day, y=avgT, color=kmeansTemp, group = Sensor_id))+
theme_classic()+
xlab("Day")+
ylab(paste0("Max Temperature: ", n, "-Day Moving Avg"))+
ggtitle("Temp in San Antonio")
ggplot(sensors_day_av)+
geom_line(aes(x=Day, y=avgH, color=kmeansHumid, group = Sensor_id))+
theme_classic()+
xlab("Day")+
ylab(paste0("Max Temperature: ", n, "-Day Moving Avg"))+
ggtitle("Humidity in San Antonio")
##These look chopped off because we are doing a moving average.
###Map the Neighborhoods (Zones)
sensorsSF <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(sensorsSF) + tm_dots(col = 'Zone')
### Let's see how the sensors were clustered by temperature
tempt <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tm_shape(tempt) + tm_dots(col = 'kmeansTemp')
### Let's see how the sensors were clustered by humidity
tempt <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tm_shape(tempt) + tm_dots(col = 'kmeansHumid')
### How did our classifier do?
# Confusion Matrix for Temperature
table(Individualsensors$Zone, Individualsensors$kmeansTemp)
##
## 1 2 3
## Brooks 1 2 1
## Downtown 1 3 0
## Medical Center 1 2 1
# Confusion Matrix for Humidity
table(Individualsensors$Zone, Individualsensors$kmeansHumid)
##
## 1 2 3
## Brooks 0 1 3
## Downtown 3 1 0
## Medical Center 3 0 1
###Note if you don't have zones, you can do a kmeans classifier on the longitude and latitude!