df <- read.csv("Project.csv")
Linear Regression
Simple Linear Regression
linear <- lm(Fatalities ~ Injured, df)
linear
##
## Call:
## lm(formula = Fatalities ~ Injured, data = df)
##
## Coefficients:
## (Intercept) Injured
## 3.7118 0.1173
We have 3.7118 as intercept of yield and 0.1173 of injured.
Summary of Linear regression
summary(linear)
##
## Call:
## lm(formula = Fatalities ~ Injured, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.550 -3.005 -0.829 1.288 39.069
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.711791 0.261691 14.18 <2e-16 ***
## Injured 0.117339 0.008587 13.66 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.606 on 321 degrees of freedom
## Multiple R-squared: 0.3678, Adjusted R-squared: 0.3658
## F-statistic: 186.7 on 1 and 321 DF, p-value: < 2.2e-16
Plot of the Linear regression
plot(linear)
library(ggplot2)
ggplot
ggplot(df,aes(x= Injured, y = Fatalities))+
geom_jitter()+
geom_smooth(method = lm)
## `geom_smooth()` using formula 'y ~ x'
multiple <- lm(Total.victims ~ Injured + Fatalities, df)
Summary of the Multiple Regression
summary(multiple)
##
## Call:
## lm(formula = Total.victims ~ Injured + Fatalities, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6154 -0.5924 0.2417 0.3171 3.2146
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.222644 0.039270 -5.67 3.2e-08 ***
## Injured 1.002671 0.001271 789.13 < 2e-16 ***
## Fatalities 0.967611 0.006567 147.35 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5419 on 320 degrees of freedom
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9997
## F-statistic: 6.212e+05 on 2 and 320 DF, p-value: < 2.2e-16
Plot of the multiple regression
plot(multiple)
Multiple variable regression
multiple2 <- lm(Total.victims~Injured:Fatalities, df)
summary(multiple2)
##
## Call:
## lm(formula = Total.victims ~ Injured:Fatalities, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.811 -3.829 -2.773 0.755 58.368
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.7728020 0.4271457 18.20 <2e-16 ***
## Injured:Fatalities 0.0188801 0.0002457 76.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.655 on 321 degrees of freedom
## Multiple R-squared: 0.9485, Adjusted R-squared: 0.9483
## F-statistic: 5906 on 1 and 321 DF, p-value: < 2.2e-16
Lets get some more visual representation of my dataset.
plot(multiple2)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
Summary of the all my variables
multiple3 <- lm(Total.victims ~ Injured + Fatalities + Age + Gender, df)
summary(multiple3)
##
## Call:
## lm(formula = Total.victims ~ Injured + Fatalities + Age + Gender,
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7578 -0.5394 0.2080 0.3371 3.1446
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.110378 0.117268 0.941 0.3473
## Injured 1.002572 0.001260 795.669 <2e-16 ***
## Fatalities 0.971265 0.006608 146.988 <2e-16 ***
## Age -0.005970 0.002719 -2.195 0.0289 *
## GenderMale -0.183619 0.093361 -1.967 0.0501 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.536 on 318 degrees of freedom
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9997
## F-statistic: 3.175e+05 on 4 and 318 DF, p-value: < 2.2e-16
Plot of the multiple variable regression
plot(multiple3)