# Importing the packages and reading the data set
library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
my_data <- read_delim("C:/Users/user/Documents/Statistics/Telangana_2018_complete_weather_data.csv",delim=",")
## Rows: 230384 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): District, Mandal, Location, Date
## dbl (6): row_id, temp_min, temp_max, humidity_min, humidity_max, wind_speed
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# creating a variable_set_1
variable_set_1 <- my_data %>%
group_by(temp_min,temp_max) %>%
summarise(avg_temp=(temp_min+temp_max)/2)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'temp_min', 'temp_max'. You can override
## using the `.groups` argument.
summary(variable_set_1)
## temp_min temp_max avg_temp
## Min. : 5.00 Min. :22.00 Min. :17.05
## 1st Qu.:20.20 1st Qu.:31.80 1st Qu.:26.15
## Median :23.50 Median :34.60 Median :28.60
## Mean :22.51 Mean :34.76 Mean :28.63
## 3rd Qu.:25.40 3rd Qu.:37.70 3rd Qu.:31.10
## Max. :34.60 Max. :45.40 Max. :39.80
# Adding new column to variable_set_1
variable_set_1$Temperature_Range <- cut(variable_set_1$avg_temp,
breaks = c(-Inf, 26, 31, Inf),
labels = c('Low_temp', 'Moderate_Temp', 'High_Temp'),
ordered_result = TRUE)
view(variable_set_1)
# creating a variable_set_2
variable_set_2 <-my_data %>%
group_by(humidity_min,humidity_max) %>%
summarise(avg_humid=(humidity_min + humidity_max)/2)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'humidity_min', 'humidity_max'. You can
## override using the `.groups` argument.
summary(variable_set_2)
## humidity_min humidity_max avg_humid
## Min. : 0.1 Min. : 4.00 Min. :11.35
## 1st Qu.:24.6 1st Qu.: 70.90 1st Qu.:48.95
## Median :36.9 Median : 83.10 Median :60.25
## Mean :41.3 Mean : 81.08 Mean :61.19
## 3rd Qu.:55.5 3rd Qu.: 94.70 3rd Qu.:73.35
## Max. :99.9 Max. :100.00 Max. :99.95
# Adding new column to variable_set_2
variable_set_2$humidity_Range <- cut(variable_set_2$avg_humid,
breaks = c(-Inf, 49, 73, Inf),
labels = c('Low_Humidity', 'Moderate_Humidity', 'High_Humidity'),
ordered_result = TRUE)
view(variable_set_2)
# creating a variable_set_3
variable_set_3 <- my_data %>%
summarise(wind_speed=(wind_speed)/1)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
summary(variable_set_3$wind_speed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.10 6.80 10.20 10.82 14.30 69.00
# Adding new column to variable_set_3
variable_set_3$wind_speed_Range <- cut(variable_set_3$wind_speed,
breaks = c(-Inf,7, 14, Inf),
labels = c('Low_Wind', 'Moderate_Wind', 'High_Wind'),
ordered_result = TRUE)
view(variable_set_3)
# Scatter plot for Temperature_Range vs. avg_temp
ggplot(variable_set_1, aes(x = avg_temp, y = Temperature_Range, fill = Temperature_Range)) +
geom_point(aes(color = Temperature_Range)) +
labs(x = "Average Temperature", y = "Temperature Range") +
ggtitle("Temperature Range vs. Average Temperature")
# Scatter plot for humidity_Range vs. avg_humid
ggplot(variable_set_2, aes(x = avg_humid, y = humidity_Range, fill = humidity_Range)) +
geom_point(aes(color = humidity_Range)) +
labs(x = "Average Humidity", y = "Humidity Range") +
ggtitle("Humidity Range vs. Average Humidity")
# Scatter plot for wind_speed_Range vs. wind_speed
ggplot(variable_set_3, aes(x = wind_speed, y = wind_speed_Range, fill = wind_speed_Range)) +
geom_point(aes(color = wind_speed_Range)) +
labs(x = "Wind Speed", y = "Wind Speed Range") +
ggtitle("Wind Speed Range vs. Wind Speed")
cor(variable_set_1$avg_temp, as.numeric(variable_set_1$Temperature_Range), method = "pearson")
## [1] 0.9138537
cor(variable_set_2$avg_humid, as.numeric(variable_set_2$humidity_Range), method = "pearson")
## [1] 0.9126292
cor(variable_set_3$wind_speed, as.numeric(variable_set_3$wind_speed_Range), method = "pearson")
## [1] 0.8506889
# Confidence interval for Average Temperature
temp_range <- variable_set_1$avg_temp
conf_interval_temp_range <- t.test(temp_range)$conf.int
conf_interval_temp_range
## [1] 28.61777 28.64753
## attr(,"conf.level")
## [1] 0.95
# Confidence interval for Average Humidity
humid_range <- variable_set_2$avg_humid
conf_interval_humid_range <- t.test(humid_range)$conf.int
conf_interval_humid_range
## [1] 61.12038 61.25417
## attr(,"conf.level")
## [1] 0.95
# Confidence interval for Wind Speed
wind_speed_Range <- variable_set_3$wind_speed
conf_interval_wind_speed_Range <- t.test(wind_speed_Range)$conf.int
conf_interval_wind_speed_Range
## [1] 10.79247 10.84307
## attr(,"conf.level")
## [1] 0.95