## Rows: 898098 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Sensor_id, Zone
## dbl (5): LAT, LONG, Temp_F, Humidity, Pressure_Pa
## dttm (1): DateTime
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## DateTime Temp_F Humidity
## Min. :2021-04-20 00:00:06 Min. :-999.00 Min. :-999.00
## 1st Qu.:2021-06-11 16:25:45 1st Qu.: 74.00 1st Qu.: 46.00
## Median :2021-07-25 09:01:14 Median : 79.00 Median : 68.00
## Mean :2021-07-26 20:14:00 Mean : 77.96 Mean : 62.18
## 3rd Qu.:2021-09-10 22:28:09 3rd Qu.: 88.00 3rd Qu.: 85.00
## Max. :2021-10-27 23:57:22 Max. : 381.00 Max. : 146.00
## Pressure_Pa
## Min. : -999.0
## 1st Qu.: 939.0
## Median : 980.0
## Mean : 596.5
## 3rd Qu.: 989.0
## Max. :33751.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 71.00 77.00 81.00 84.37 91.00 112.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 27.00 55.00 78.00 71.89 88.00 100.00
# Check in: How many times does each Sensor_id come up?
# sensors_clean %>% count(Sensor_id)
# Add variable to summarise at day level
sensors_day <- sensors_clean%>% group_by(Sensor_id, Day)%>%
summarise(Zone = Zone, maxTemp = max(Temp_F), maxHumidity = max(Humidity), minTemp = min(Temp_F), minHumidity = min(Humidity))%>%
distinct()%>% ungroup()
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxTemp, color=Sensor_id)) + geom_line(aes(y=minTemp, color=Sensor_id))
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxHumidity, color=Sensor_id)) + geom_line(aes(y=minHumidity, color=Sensor_id))
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxTemp, color="maxTemp", group = Sensor_id))+ geom_line(aes(y=minTemp, color="minTemp", group = Sensor_id))
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxHumidity, color="maxHumidity", group = Sensor_id))+ geom_line(aes(y=minHumidity, color="minHumidity", group = Sensor_id))
## [1] "Temperature K Means Groups:"
## [1] 3 3 5
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## 1 104.3333 101.33333 102.00 103.00000 96.00 90.66667 89.00000 88.00000 78.33333
## 2 93.2600 93.72333 94.34 91.64333 89.93 82.22333 81.81667 83.59333 78.43667
## 3 106.6000 106.20000 104.40 105.00000 99.00 92.00000 92.20000 89.00000 78.60000
## [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17]
## 1 91.00000 96.66667 101.00 95.00 100.00000 99.33333 97.33333 99.66667
## 2 85.03667 89.35000 91.66 88.28 90.04333 90.84333 91.29333 91.99333
## 3 92.60000 100.00000 105.20 97.20 101.40000 102.20000 102.80000 102.60000
## [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## 1 103.66667 105.3333 99.00 99.00000 93.33333 99.33333 103.6667 105.33333
## 2 93.25667 94.5100 89.93 87.72333 86.52333 91.58000 92.8600 95.03333
## 3 108.00000 106.2000 102.00 99.80000 97.00000 102.20000 108.2000 108.80000
## [,26] [,27] [,28] [,29] [,30]
## 1 104.6667 103.33333 103.33333 105.66667 105.66667
## 2 94.3600 94.01667 94.09333 95.14667 95.70667
## 3 108.2000 107.20000 107.60000 109.80000 108.00000
## [1] "Humidity K Means Groups:"
## [1] 8 2 1
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## 1 92.250 90.625 88.625 91.375 95.500 97.875 91.75 93.750 98.00 96.00 92.625
## 2 97.965 90.975 90.920 91.480 93.975 96.970 95.48 93.495 96.95 95.45 94.495
## 3 96.960 88.940 90.940 94.960 95.980 96.980 96.96 94.970 96.02 94.96 96.980
## [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22]
## 1 91.625 90.875 92.250 93.500 90.625 89.875 92.25 91.5 92.125 83.125 96.5
## 2 93.950 95.490 95.475 97.445 93.500 93.935 95.97 95.0 95.500 91.500 98.0
## 3 94.980 91.070 93.960 92.070 89.960 89.070 92.05 92.0 93.000 84.000 96.0
## [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30]
## 1 96.875 92.5 91.375 91.75 83.25 84.125 84.25 89.5
## 2 98.000 47.0 82.500 95.50 90.00 94.500 91.50 94.0
## 3 98.000 46.0 81.000 93.00 87.00 86.000 93.00 91.0
##Plot the results
ggplot(sensors_day_av)+
geom_line(aes(x=Day, y=avgT, color=kmeansTemp, group = Sensor_id))+
theme_classic()+
xlab("Day")+
ylab(paste0("Max Temperature: ", n, "-Day Moving Avg"))+
ggtitle("Temp in San Antonio")
ggplot(sensors_day_av)+
geom_line(aes(x=Day, y=avgH, color=kmeansHumid, group = Sensor_id))+
theme_classic()+
xlab("Day")+
ylab(paste0("Max Temperature: ", n, "-Day Moving Avg"))+
ggtitle("Humidity in San Antonio")
##These look chopped off because we are doing a moving average.
###Map the Neighborhoods (Zones)
sensorsSF <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(sensorsSF) + tm_dots(col = 'Zone')
### Let's see how the sensors were clustered by temperature
tempt <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tm_shape(tempt) + tm_dots(col = 'kmeansTemp')
### Let's see how the sensors were clustered by humidity
tempt <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tm_shape(tempt) + tm_dots(col = 'kmeansHumid')
### How did our classifier do?
# Confusion Matrix for Temperature
table(Individualsensors$Zone, Individualsensors$kmeansTemp)
##
## 1 2 3
## Brooks 2 1 1
## Downtown 0 1 2
## Medical Center 1 1 2
# Confusion Matrix for Humidity
table(Individualsensors$Zone, Individualsensors$kmeansHumid)
##
## 1 2 3
## Brooks 3 1 0
## Downtown 2 1 0
## Medical Center 3 0 1
###Note if you don't have zones, you can do a kmeans classifier on the longitude and latitude!