Nishant Dudhwala (S3752868)
Last updated: 24 October, 2019
\[ H_0: \ The \ data \ sets \ variables \ does \ not \ fit \ the \ linear \ regression \ model\] \[ H_A: \ The \ data \ sets \ variables \ does \ fit \ the \ linear \ regression \ model\]
Road_accident %>% summarise(
"Min"=min(Road_accident$Population_of_India,na.rm=TRUE),
"Q1"=quantile(Road_accident$Population_of_India ,probs = .25,na.rm=TRUE),
"Median"=median(Road_accident$Population_of_India ,na.rm = TRUE),
"Q3"=quantile(Road_accident$Population_of_India ,probs = .75,na.rm=TRUE),
"Max"=max(Road_accident$Population_of_India ,na.rm = TRUE),
"Mean"=mean(Road_accident$Population_of_India ,na.rm = TRUE),
"SD"=sd(Road_accident$Population_of_India ,na.rm = TRUE),
n=n()
)-> t1
knitr::kable(t1)| Min | Q1 | Median | Q3 | Max | Mean | SD | n |
|---|---|---|---|---|---|---|---|
| 1014825 | 1083268 | 1152774 | 1220234 | 1283601 | 1152031 | 85878.86 | 18 |
Road_accident %>% summarise(
"Lobound"=(quantile(Road_accident$Population_of_India ,probs = .25)
-1.5*IQR(Road_accident$Population_of_India)),
"NumLoOuts"=sum(Road_accident$Population_of_India < Lobound)%>%round(0),
"SD_Min2Mean"=((mean(Road_accident$Population_of_India)
-min(Road_accident$Population_of_India))/sd(Road_accident$Population_of_India)),
"Upbound"=(quantile(Road_accident$Population_of_India ,probs = .75) +
1.5*IQR(Road_accident$Population_of_India)),
"NumUpOuts"=sum(Road_accident$Population_of_India > Upbound)%>%round(0),
"Mean"=mean(Road_accident$Population_of_India ,na.rm = TRUE),
"SD_Mean2Max"=((max(Road_accident$Population_of_India)
-mean(Road_accident&Population_of_India))/sd(Road_accident$Population_of_India)),
n=n()
)->t2
knitr::kable(t2)| Lobound | NumLoOuts | SD_Min2Mean | Upbound | NumUpOuts | Mean | SD_Mean2Max | n |
|---|---|---|---|---|---|---|---|
| 877819.5 | 0 | 1.597673 | 1425683 | 0 | 1152031 | NA | 18 |
Road_accident %>% summarise(
"Min"=min(Road_accident$Total_Number_of_Road_Accident,na.rm=TRUE),
"Q1"=quantile(Road_accident$Total_Number_of_Road_Accident ,probs = .25,na.rm=TRUE),
"Median"=median(Road_accident$Total_Number_of_Road_Accident ,na.rm = TRUE),
"Q3"=quantile(Road_accident$Total_Number_of_Road_Accident ,probs = .75,na.rm=TRUE),
"Max"=max(Road_accident$Total_Number_of_Road_Accident ,na.rm = TRUE),
"Mean"=mean(Road_accident$Total_Number_of_Road_Accident ,na.rm = TRUE),
"SD"=sd(Road_accident$Total_Number_of_Road_Accident ,na.rm = TRUE),
n=n()
)-> t3
knitr::kable(t3)| Min | Q1 | Median | Q3 | Max | Mean | SD | n |
|---|---|---|---|---|---|---|---|
| 391449 | 432246.2 | 479934 | 488669 | 501423 | 461236.4 | 37479.06 | 18 |
Road_accident %>% summarise(
"Lobound"=(quantile(Road_accident$Total_Number_of_Road_Accident ,probs = .25)
-1.5*IQR(Road_accident$Total_Number_of_Road_Accident)),
"NumLoOuts"=sum(Road_accident$Total_Number_of_Road_Accident < Lobound)%>%round(0),
"SD_Min2Mean"=((mean(Road_accident$Total_Number_of_Road_Accident)
-min(Road_accident$Total_Number_of_Road_Accident))
/sd(Road_accident$Total_Number_of_Road_Accident)),
"Upbound"=(quantile(Road_accident$Total_Number_of_Road_Accident ,probs = .75)
+1.5*IQR(Road_accident$Total_Number_of_Road_Accident)),
"NumUpOuts"=sum(Road_accident$Total_Number_of_Road_Accident > Upbound)%>%round(0),
"Mean"=mean(Road_accident$Total_Number_of_Road_Accident ,na.rm = TRUE),
"SD_Mean2Max"=((max(Road_accident$Total_Number_of_Road_Accident)
-mean(Road_accident&Total_Number_of_Road_Accident))
/sd(Road_accident$Total_Number_of_Road_Accident)),
n=n()
)->t4
knitr::kable(t4)| Lobound | NumLoOuts | SD_Min2Mean | Upbound | NumUpOuts | Mean | SD_Mean2Max | n |
|---|---|---|---|---|---|---|---|
| 347612.1 | 0 | 1.862038 | 573303.1 | 0 | 461236.4 | NA | 18 |
boxplot(
(Road_accident$Population_of_India)/2.7,
(Road_accident$Total_Number_of_Road_Accident),
ylab="Population of India",
xlab="Population of India and Total_Number_of_Road_Accident"
)
axis(1, at=1:2,labels = c("Population_of_India","Total_Number_of_Road_Accident"))t.test(Road_accident$Population_of_India,
Road_accident$Total_Number_of_Road_Accident,
paired = TRUE,
alternative = "two.side",
conf.level = .95
)##
## Paired t-test
##
## data: Road_accident$Population_of_India and Road_accident$Total_Number_of_Road_Accident
## t = 50.514, df = 17, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 661942.3 719647.4
## sample estimates:
## mean of the differences
## 690794.9
A<-Road_accident$Population_of_India^2
B<-(Road_accident$Total_Number_of_Road_Accident)^2
AB<-Road_accident$Population_of_India*(Road_accident$Total_Number_of_Road_Accident)
sum_A <-sum(Road_accident$Total_Number_of_Road_Accident)
sum_B <-sum(Road_accident$Population_of_India)
sum_A_sq <-sum(Road_accident$Total_Number_of_Road_Accident)^2
sum_B_sq <-sum(Road_accident$Population_of_India)^2
sum_AB<-sum(Road_accident$Population_of_India*(Road_accident$Total_Number_of_Road_Accident))
n<-length(Road_accident$Total_Number_of_Road_Accident)
LAA<-sum_A_sq-((sum_A^2)/n)
LBB<-sum_B_sq-((sum_B^2)/n)
LAB=sum_AB-((sum_A)*(sum_B)/n)
b=LAB/LAA
a=mean((Road_accident$Total_Number_of_Road_Accident)-b*mean(Road_accident$Population_of_India))
plot((Road_accident$Total_Number_of_Road_Accident)~Road_accident$Population_of_India,data=Road_accident)
abline(a=a, b=b, col="Blue")
abline(lm((Road_accident$Total_Number_of_Road_Accident)~Road_accident$Population_of_India))model1<-lm(Population_of_India~Total_Number_of_Road_Accident, data = Road_accident)
model1%>%summary()##
## Call:
## lm(formula = Population_of_India ~ Total_Number_of_Road_Accident,
## data = Road_accident)
##
## Residuals:
## Min 1Q Median 3Q Max
## -58156 -33497 -7503 21035 124491
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.632e+05 1.434e+05 1.836 0.0851 .
## Total_Number_of_Road_Accident 1.927e+00 3.100e-01 6.217 1.23e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47900 on 16 degrees of freedom
## Multiple R-squared: 0.7072, Adjusted R-squared: 0.6889
## F-statistic: 38.65 on 1 and 16 DF, p-value: 1.233e-05
R2<-(b*LAB)/LBB
R2## [1] 8.009119e-08
pf(q=38.65, 1, 16,lower.tail = FALSE)## [1] 1.233201e-05
model1 %>% anova()Hence we reject \(H_0\), which means the data fits a linear regression model.
model1%>%summary()%>%coef()## Estimate Std. Error t value
## (Intercept) 2.632418e+05 1.434104e+05 1.835584
## Total_Number_of_Road_Accident 1.926972e+00 3.099610e-01 6.216819
## Pr(>|t|)
## (Intercept) 8.507555e-02
## Total_Number_of_Road_Accident 1.233415e-05
\[H_A: \mu \ne 0 \]
This hypothesis statistics gives below result : \[ t = \ 6.216819 \] \[ p < \ .001\] The constant is statistically significant at the 0.05 level. This means there is statistically significant evidence that the constant is not 0.
plot(model1)