df <- read.csv("Project.csv")
                      Linear Regression

Simple Linear Regression

linear <- lm(Fatalities ~ Injured, df)
linear
## 
## Call:
## lm(formula = Fatalities ~ Injured, data = df)
## 
## Coefficients:
## (Intercept)      Injured  
##      3.7118       0.1173

We have 3.7118 as intercept of yield and 0.1173 of injured.

Summary of Linear regression

summary(linear)
## 
## Call:
## lm(formula = Fatalities ~ Injured, data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.550 -3.005 -0.829  1.288 39.069 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.711791   0.261691   14.18   <2e-16 ***
## Injured     0.117339   0.008587   13.66   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.606 on 321 degrees of freedom
## Multiple R-squared:  0.3678, Adjusted R-squared:  0.3658 
## F-statistic: 186.7 on 1 and 321 DF,  p-value: < 2.2e-16

Plot of the Linear regression

plot(linear)

library(ggplot2)

ggplot

ggplot(df,aes(x= Injured, y = Fatalities))+
  geom_jitter()+
  geom_smooth(method = lm)
## `geom_smooth()` using formula 'y ~ x'

multiple <- lm(Total.victims ~ Injured + Fatalities, df)

Summary of the Multiple Regression

summary(multiple)
## 
## Call:
## lm(formula = Total.victims ~ Injured + Fatalities, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6154 -0.5924  0.2417  0.3171  3.2146 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.222644   0.039270   -5.67  3.2e-08 ***
## Injured      1.002671   0.001271  789.13  < 2e-16 ***
## Fatalities   0.967611   0.006567  147.35  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5419 on 320 degrees of freedom
## Multiple R-squared:  0.9997, Adjusted R-squared:  0.9997 
## F-statistic: 6.212e+05 on 2 and 320 DF,  p-value: < 2.2e-16

Plot of the multiple regression

plot(multiple)

Multiple variable regression

multiple2 <- lm(Total.victims~Injured:Fatalities, df)
summary(multiple2)
## 
## Call:
## lm(formula = Total.victims ~ Injured:Fatalities, data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.811 -3.829 -2.773  0.755 58.368 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        7.7728020  0.4271457   18.20   <2e-16 ***
## Injured:Fatalities 0.0188801  0.0002457   76.85   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.655 on 321 degrees of freedom
## Multiple R-squared:  0.9485, Adjusted R-squared:  0.9483 
## F-statistic:  5906 on 1 and 321 DF,  p-value: < 2.2e-16

Lets get some more visual representation of my dataset.

plot(multiple2)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

Summary of the all my variables

multiple3 <- lm(Total.victims ~ Injured + Fatalities + Age + Gender, df)
summary(multiple3)
## 
## Call:
## lm(formula = Total.victims ~ Injured + Fatalities + Age + Gender, 
##     data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7578 -0.5394  0.2080  0.3371  3.1446 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.110378   0.117268   0.941   0.3473    
## Injured      1.002572   0.001260 795.669   <2e-16 ***
## Fatalities   0.971265   0.006608 146.988   <2e-16 ***
## Age         -0.005970   0.002719  -2.195   0.0289 *  
## GenderMale  -0.183619   0.093361  -1.967   0.0501 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.536 on 318 degrees of freedom
## Multiple R-squared:  0.9997, Adjusted R-squared:  0.9997 
## F-statistic: 3.175e+05 on 4 and 318 DF,  p-value: < 2.2e-16

Plot of the multiple variable regression

plot(multiple3)