## Rows: 898098 Columns: 8
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): Sensor_id, Zone
## dbl (5): LAT, LONG, Temp_F, Humidity, Pressure_Pa
## dttm (1): DateTime
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## DateTime Temp_F Humidity
## Min. :2021-04-20 00:00:06 Min. :-999.00 Min. :-999.00
## 1st Qu.:2021-06-11 16:25:45 1st Qu.: 74.00 1st Qu.: 46.00
## Median :2021-07-25 09:01:14 Median : 79.00 Median : 68.00
## Mean :2021-07-26 20:14:00 Mean : 77.96 Mean : 62.18
## 3rd Qu.:2021-09-10 22:28:09 3rd Qu.: 88.00 3rd Qu.: 85.00
## Max. :2021-10-27 23:57:22 Max. : 381.00 Max. : 146.00
## Pressure_Pa
## Min. : -999.0
## 1st Qu.: 939.0
## Median : 980.0
## Mean : 596.5
## 3rd Qu.: 989.0
## Max. :33751.0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 25.22 71.04 76.00 77.68 83.00 109.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.0 58.0 79.0 72.8 88.0 100.0
# Check in: How many times does each Sensor_id come up?
# sensors_clean %>% count(Sensor_id)
# Add variable to summarise at day level
sensors_day <- sensors_clean%>% group_by(Sensor_id, Day)%>%
summarise(Zone = Zone, maxTemp = max(Temp_F), maxHumidity = max(Humidity), minTemp = min(Temp_F), minHumidity = min(Humidity))%>%
distinct()%>% ungroup()
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxTemp, color=Sensor_id)) + geom_line(aes(y=minTemp, color=Sensor_id))
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxHumidity, color=Sensor_id)) + geom_line(aes(y=minHumidity, color=Sensor_id))
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxTemp, color="maxTemp", group = Sensor_id))+ geom_line(aes(y=minTemp, color="minTemp", group = Sensor_id))
ggplot(sensors_day, aes(x=Day))+ theme_classic()+
geom_line(aes(y=maxHumidity, color="maxHumidity", group = Sensor_id))+ geom_line(aes(y=minHumidity, color="minHumidity", group = Sensor_id))
## [1] "Temperature K Means Groups:"
## [1] 3 5 3
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## 1 101.3333 105.0 91.66667 94.33333 97.66667 96.33333 97.33333 103.6667
## 2 103.4000 108.2 93.60000 97.60000 100.00000 99.20000 99.40000 106.6000
## 3 91.9600 95.3 83.86333 82.44000 87.16333 87.31333 90.46333 91.4800
## [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17]
## 1 96.33333 93.00 84.66667 85.33333 92.33333 85.00 91.66667 92.66667 92.00
## 2 98.40000 95.60 87.00000 87.20000 94.80000 89.00 95.20000 95.80000 93.40
## 3 88.77333 86.04 75.42667 73.96333 82.36667 74.53 82.40000 84.67333 85.28
## [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## 1 97.00 95.66667 84.00000 84.66667 94.33333 96.00 98.66667 96.33333
## 2 100.80 99.60000 86.60000 87.60000 96.60000 100.80 102.00000 100.60000
## 3 85.31 86.26333 79.40667 77.42000 85.09667 89.55 89.85667 90.51667
## [,26] [,27] [,28] [,29]
## 1 96.33333 95.33333 91.33333 91.33333
## 2 97.60000 98.80000 93.60000 94.00000
## 3 87.54667 84.09333 83.29000 86.31667
## [1] "Humidity K Means Groups:"
## [1] 1 5 5
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## 1 88.0 95.0 95.0 66.0 69.0 87.0 87.0 87.0 93.0 93.0 91.0 76.0 82.0 96.0
## 2 98.2 95.2 91.2 75.0 70.8 81.6 86.6 84.2 92.4 94.6 90.8 78.2 79.8 95.6
## 3 97.2 95.6 92.8 79.6 83.0 87.8 90.4 87.4 92.8 93.6 91.8 82.4 87.8 93.8
## [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## 1 95.0 90.0 93.000 81.000 92.000 96.000 93.000 98.000 93.000 91.000 90.000
## 2 96.0 89.6 90.600 90.600 89.200 94.600 91.600 97.600 90.800 88.200 87.400
## 3 95.8 92.0 91.886 93.036 92.332 93.054 93.074 96.774 92.728 90.838 89.798
## [,26] [,27] [,28] [,29]
## 1 95.000 98.00 92.000 92.000
## 2 94.000 97.20 91.200 89.800
## 3 92.474 94.93 94.624 91.702
##Plot the results
ggplot(sensors_day_av)+
geom_line(aes(x=Day, y=avgT, color=kmeansTemp, group = Sensor_id))+
theme_classic()+
xlab("Day")+
ylab(paste0("Max Temperature: ", n, "-Day Moving Avg"))+
ggtitle("Temp in San Antonio")
ggplot(sensors_day_av)+
geom_line(aes(x=Day, y=avgH, color=kmeansHumid, group = Sensor_id))+
theme_classic()+
xlab("Day")+
ylab(paste0("Max Temperature: ", n, "-Day Moving Avg"))+
ggtitle("Humidity in San Antonio")
##These look chopped off because we are doing a moving average.
###Map the Neighborhoods (Zones)
sensorsSF <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tmap_mode('view')
## tmap mode set to interactive viewing
tm_shape(sensorsSF) + tm_dots(col = 'Zone')
### Let's see how the sensors were clustered by temperature
tempt <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tm_shape(tempt) + tm_dots(col = 'kmeansTemp')
### Let's see how the sensors were clustered by humidity
tempt <- st_as_sf(Individualsensors,coords=c("LONG","LAT"), crs=4326)
tm_shape(tempt) + tm_dots(col = 'kmeansHumid')
### How did our classifier do?
# Confusion Matrix for Temperature
table(Individualsensors$Zone, Individualsensors$kmeansTemp)
##
## 1 2 3
## Brooks 2 1 1
## Downtown 0 2 1
## Medical Center 1 2 1
# Confusion Matrix for Humidity
table(Individualsensors$Zone, Individualsensors$kmeansHumid)
##
## 1 2 3
## Brooks 1 0 3
## Downtown 0 2 1
## Medical Center 0 3 1
###Note if you don't have zones, you can do a kmeans classifier on the longitude and latitude!
library (NbClust)
library (cluster)
library (clustertend)
library (factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
pam.res3 <- pam(SensorHumid, 3, metric = "euclidean", stand = FALSE) #pam can also be used to create kmean cluster #model
# Visualizing the Results
fviz_cluster(pam.res3, data = data, palette = c("#FC4E07", "#00AFBB", "#E7B800"), ellipse.type = "euclid",
star.plot = TRUE,
repel = TRUE,
ggtheme = theme_minimal() )
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
#Validating the Cluster
fviz_silhouette(pam.res3, palette = "jco", ggtheme = theme_classic())
## cluster size ave.sil.width
## 1 1 2 0.46
## 2 2 6 0.40
## 3 3 3 0.22