# Importing the packages and reading the data set
library(readr)
library(stats)
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ stringr 1.5.0
## ✔ forcats 1.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(pwr)
library(effsize)
my_data <- read_delim("C:/Users/user/Documents/Statistics/Telangana_2018_complete_weather_data.csv",delim=",")
## Rows: 230384 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): District, Mandal, Location, Date
## dbl (6): row_id, temp_min, temp_max, humidity_min, humidity_max, wind_speed
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Response Variable(continuous column) : humidity_max.
Explanatory variable(categorical column) : District.
Null Hypothesis (H0): The means of “humidity_max” are equal for all districts.
Alternative Hypothesis (H1): The means of “humidity_max” are not equal for at least one district.
anova_result <- aov(humidity_max ~ District, data = my_data)
summary(anova_result)
## Df Sum Sq Mean Sq F value Pr(>F)
## District 30 4112839 137095 640.1 <2e-16 ***
## Residuals 230353 49339602 214
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tukey_result <- TukeyHSD(anova_result)
p_values <- tukey_result$District[,4]
alpha<- 0.05
significant_comparisons <- sum(p_values < alpha)
if (significant_comparisons > 0) {
cat("Reject the null hypothesis. At least one district has a significantly different mean 'humidity_max'.\n")
} else {
cat("Fail to reject the null hypothesis. There is no significant difference in mean 'humidity_max' among districts.\n")
}
## Reject the null hypothesis. At least one district has a significantly different mean 'humidity_max'.
Another Response variable(Continuous Column) : humidity_min
Null Hypothesis (H0): The regression coefficient is equal to zero, indicating that there is no linear relationship between “humidity_min” and “humidity_max.”
Alternative Hypothesis (H1): The regression coefficient of is not equal to zero, suggesting that there is a linear relationship between “humidity_min” and “humidity_max.”
model <- lm(humidity_min ~ humidity_max, data = my_data)
summary(model)
##
## Call:
## lm(formula = humidity_min ~ humidity_max, data = my_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.866 -11.392 -0.729 11.140 115.780
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -28.840649 0.184191 -156.6 <2e-16 ***
## humidity_max 0.865069 0.002233 387.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.32 on 230382 degrees of freedom
## Multiple R-squared: 0.3945, Adjusted R-squared: 0.3945
## F-statistic: 1.501e+05 on 1 and 230382 DF, p-value: < 2.2e-16
p_value <- summary(model)$coefficients["humidity_max", "Pr(>|t|)"]
alpha <- 0.05
if (p_value < alpha) {
cat("Reject the null hypothesis. There is a significant linear relationship between humidity_min and humidity_max.\n")
} else {
cat("Fail to reject the null hypothesis. There is no significant linear relationship between humidity_min and humidity_max.\n")
}
## Reject the null hypothesis. There is a significant linear relationship between humidity_min and humidity_max.
From the above Hypothesis test, P_value is less than the significance value(alpha) concluding that there is a relationship between humidity_max and humidity_min
# QQ-plot
qqnorm(residuals(model))
qqline(residuals(model))
# Residual plot
plot(model, which = 1)
One other variable (used in First ANOVA) : District
If you want to consider interaction term between “humidity_max” and “District,” . This allow me to assess whether the relationship between “humidity_min” and “humidity_max” varies by district.
interaction_model <- lm(humidity_min ~ humidity_max * District, data = my_data)
summary(interaction_model)
##
## Call:
## lm(formula = humidity_min ~ humidity_max * District, data = my_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58.922 -11.172 -0.562 10.897 137.032
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -28.844621 0.868712 -33.204
## humidity_max 0.938602 0.011473 81.810
## DistrictBhadradri-Kothagudem -5.618858 2.082859 -2.698
## DistrictHyderabad 5.245200 1.488243 3.524
## DistrictJagtial -16.681932 1.268538 -13.151
## DistrictJangaon 11.860816 1.738170 6.824
## DistrictJayashankar-Bhupalpally -13.821078 1.544230 -8.950
## DistrictJogulamba-Gadwal 6.712413 1.491295 4.501
## DistrictKamareddy -8.304384 1.249201 -6.648
## DistrictKarimnagar -12.233676 1.479667 -8.268
## DistrictKhammam 26.778334 1.628915 16.439
## DistrictKumuram Bheem - Asifabad -10.820434 1.457355 -7.425
## DistrictMahabubabad 4.062261 1.840136 2.208
## DistrictMahabubnagar -5.328168 1.168851 -4.558
## DistrictMancherial -22.462853 1.472347 -15.256
## DistrictMedak -10.045564 1.201180 -8.363
## DistrictMedchal-Malkajgiri 10.891883 1.434804 7.591
## DistrictNagarkurnool -3.667367 1.309563 -2.800
## DistrictNalgonda 22.563203 1.209731 18.651
## DistrictNirmal -10.744748 1.254666 -8.564
## DistrictNizamabad -3.377754 1.135197 -2.975
## DistrictPeddapalli -14.312275 1.669918 -8.571
## DistrictRajanna-Siricilla -4.764673 1.374759 -3.466
## DistrictRangareddy -0.860372 1.142903 -0.753
## DistrictSangareddy -2.884698 1.167676 -2.470
## DistrictSiddipet -2.440627 1.252282 -1.949
## DistrictSuryapet 24.068200 1.588841 15.148
## DistrictVikarabad -4.887130 1.203608 -4.060
## DistrictWanaparthy -2.942661 1.476243 -1.993
## DistrictWarangal (R) -6.833910 1.913256 -3.572
## DistrictWarangal (U) -7.369207 2.027299 -3.635
## DistrictYadadri-Bhongir 18.867377 1.507579 12.515
## humidity_max:DistrictBhadradri-Kothagudem -0.038724 0.023917 -1.619
## humidity_max:DistrictHyderabad -0.102320 0.019496 -5.248
## humidity_max:DistrictJagtial 0.141910 0.015973 8.885
## humidity_max:DistrictJangaon -0.235722 0.021393 -11.019
## humidity_max:DistrictJayashankar-Bhupalpally 0.087651 0.018743 4.676
## humidity_max:DistrictJogulamba-Gadwal -0.140676 0.019678 -7.149
## humidity_max:DistrictKamareddy 0.059778 0.016070 3.720
## humidity_max:DistrictKarimnagar 0.064720 0.018150 3.566
## humidity_max:DistrictKhammam -0.397150 0.019118 -20.774
## humidity_max:DistrictKumuram Bheem - Asifabad 0.127120 0.019009 6.687
## humidity_max:DistrictMahabubabad -0.145657 0.022138 -6.580
## humidity_max:DistrictMahabubnagar 0.007429 0.015174 0.490
## humidity_max:DistrictMancherial 0.230288 0.018429 12.496
## humidity_max:DistrictMedak 0.042453 0.015267 2.781
## humidity_max:DistrictMedchal-Malkajgiri -0.220198 0.018326 -12.015
## humidity_max:DistrictNagarkurnool -0.024717 0.016818 -1.470
## humidity_max:DistrictNalgonda -0.361243 0.015172 -23.810
## humidity_max:DistrictNirmal 0.101949 0.016224 6.284
## humidity_max:DistrictNizamabad -0.022474 0.014617 -1.538
## humidity_max:DistrictPeddapalli 0.102514 0.020469 5.008
## humidity_max:DistrictRajanna-Siricilla -0.028918 0.017454 -1.657
## humidity_max:DistrictRangareddy -0.052521 0.014759 -3.559
## humidity_max:DistrictSangareddy -0.029264 0.015161 -1.930
## humidity_max:DistrictSiddipet -0.067168 0.015769 -4.260
## humidity_max:DistrictSuryapet -0.370164 0.018929 -19.555
## humidity_max:DistrictVikarabad 0.002182 0.015519 0.141
## humidity_max:DistrictWanaparthy -0.006976 0.019260 -0.362
## humidity_max:DistrictWarangal (R) -0.011609 0.022884 -0.507
## humidity_max:DistrictWarangal (U) -0.013969 0.024034 -0.581
## humidity_max:DistrictYadadri-Bhongir -0.329670 0.018456 -17.862
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## humidity_max < 2e-16 ***
## DistrictBhadradri-Kothagudem 0.006983 **
## DistrictHyderabad 0.000424 ***
## DistrictJagtial < 2e-16 ***
## DistrictJangaon 8.89e-12 ***
## DistrictJayashankar-Bhupalpally < 2e-16 ***
## DistrictJogulamba-Gadwal 6.76e-06 ***
## DistrictKamareddy 2.98e-11 ***
## DistrictKarimnagar < 2e-16 ***
## DistrictKhammam < 2e-16 ***
## DistrictKumuram Bheem - Asifabad 1.13e-13 ***
## DistrictMahabubabad 0.027274 *
## DistrictMahabubnagar 5.16e-06 ***
## DistrictMancherial < 2e-16 ***
## DistrictMedak < 2e-16 ***
## DistrictMedchal-Malkajgiri 3.18e-14 ***
## DistrictNagarkurnool 0.005104 **
## DistrictNalgonda < 2e-16 ***
## DistrictNirmal < 2e-16 ***
## DistrictNizamabad 0.002926 **
## DistrictPeddapalli < 2e-16 ***
## DistrictRajanna-Siricilla 0.000529 ***
## DistrictRangareddy 0.451574
## DistrictSangareddy 0.013495 *
## DistrictSiddipet 0.051303 .
## DistrictSuryapet < 2e-16 ***
## DistrictVikarabad 4.90e-05 ***
## DistrictWanaparthy 0.046225 *
## DistrictWarangal (R) 0.000355 ***
## DistrictWarangal (U) 0.000278 ***
## DistrictYadadri-Bhongir < 2e-16 ***
## humidity_max:DistrictBhadradri-Kothagudem 0.105423
## humidity_max:DistrictHyderabad 1.54e-07 ***
## humidity_max:DistrictJagtial < 2e-16 ***
## humidity_max:DistrictJangaon < 2e-16 ***
## humidity_max:DistrictJayashankar-Bhupalpally 2.92e-06 ***
## humidity_max:DistrictJogulamba-Gadwal 8.77e-13 ***
## humidity_max:DistrictKamareddy 0.000199 ***
## humidity_max:DistrictKarimnagar 0.000363 ***
## humidity_max:DistrictKhammam < 2e-16 ***
## humidity_max:DistrictKumuram Bheem - Asifabad 2.28e-11 ***
## humidity_max:DistrictMahabubabad 4.73e-11 ***
## humidity_max:DistrictMahabubnagar 0.624423
## humidity_max:DistrictMancherial < 2e-16 ***
## humidity_max:DistrictMedak 0.005424 **
## humidity_max:DistrictMedchal-Malkajgiri < 2e-16 ***
## humidity_max:DistrictNagarkurnool 0.141654
## humidity_max:DistrictNalgonda < 2e-16 ***
## humidity_max:DistrictNirmal 3.31e-10 ***
## humidity_max:DistrictNizamabad 0.124163
## humidity_max:DistrictPeddapalli 5.49e-07 ***
## humidity_max:DistrictRajanna-Siricilla 0.097556 .
## humidity_max:DistrictRangareddy 0.000373 ***
## humidity_max:DistrictSangareddy 0.053584 .
## humidity_max:DistrictSiddipet 2.05e-05 ***
## humidity_max:DistrictSuryapet < 2e-16 ***
## humidity_max:DistrictVikarabad 0.888206
## humidity_max:DistrictWanaparthy 0.717182
## humidity_max:DistrictWarangal (R) 0.611937
## humidity_max:DistrictWarangal (U) 0.561095
## humidity_max:DistrictYadadri-Bhongir < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.07 on 230322 degrees of freedom
## Multiple R-squared: 0.4133, Adjusted R-squared: 0.4132
## F-statistic: 2660 on 61 and 230322 DF, p-value: < 2.2e-16
qqnorm(residuals(interaction_model))
qqline(residuals(interaction_model))
plot(interaction_model, which = 1)