## [1] 241 9
## [1] "Name" "Park" "Track" "Speed" "Height"
## [6] "Drop" "Length" "Duration" "Inversions"
visualizing variable of interest
plot(Duration ~ Speed, data = x) #visualizing variable of interest
Setting aside two outlying coasters
xx <- x[x$Name !="Tower of Terror",]
xx2 <- xx[xx$Name != "Xcelerator",] #setting aside two outlying coasters
m <- lm(Duration ~ Speed, data = xx2 ); summary(m)
##
## Call:
## lm(formula = Duration ~ Speed, data = xx2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -90.884 -22.281 0.888 17.250 94.085
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.1193 11.3554 2.036 0.0435 *
## Speed 1.8034 0.1963 9.186 2.73e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 35.8 on 153 degrees of freedom
## (84 observations deleted due to missingness)
## Multiple R-squared: 0.3555, Adjusted R-squared: 0.3512
## F-statistic: 84.38 on 1 and 153 DF, p-value: 2.733e-16
plot(Duration ~ Speed, data = xx2)
abline(m) #adding regression line to our plot
Regression Assumption on Residual plot
plot(resid(m) ~ fitted(m)) #regression assumption of residual plot
Adding categorical variables to our regression
m1 <- lm(Duration ~ Speed, data = xx2[xx2$Track=="Wood",]); summary(m1) # analyzing regression for Wood tracks
##
## Call:
## lm(formula = Duration ~ Speed, data = xx2[xx2$Track == "Wood",
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -72.695 -20.165 -2.616 16.066 66.257
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.2167 54.3124 0.612 0.5474
## Speed 1.8600 0.9252 2.010 0.0574 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.6 on 21 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1614, Adjusted R-squared: 0.1215
## F-statistic: 4.041 on 1 and 21 DF, p-value: 0.05741
m2 <- lm(Duration ~ Speed, data = xx2[xx2$Track=="Steel",]); summary(m1) # analyzing regression for Steel tracks
##
## Call:
## lm(formula = Duration ~ Speed, data = xx2[xx2$Track == "Wood",
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -72.695 -20.165 -2.616 16.066 66.257
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.2167 54.3124 0.612 0.5474
## Speed 1.8600 0.9252 2.010 0.0574 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.6 on 21 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.1614, Adjusted R-squared: 0.1215
## F-statistic: 4.041 on 1 and 21 DF, p-value: 0.05741
Both m1 ans m2 are roughly parallel(part of linear condition) Tip: this regression works only if m1 and m2 have similar slopes but different intercepts
Setting indicators/dummy variables as track = 1 if Steel, and track = 0 if Wood
m3 <- lm(Duration ~ Speed + Track, data = xx2); summary(m3)
##
## Call:
## lm(formula = Duration ~ Speed + Track, data = xx2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -88.314 -21.094 2.243 19.050 96.452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.1399 11.2617 1.966 0.0511 .
## Speed 1.7791 0.1949 9.127 4.04e-16 ***
## TrackWood 15.7830 8.0305 1.965 0.0512 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 35.47 on 152 degrees of freedom
## (84 observations deleted due to missingness)
## Multiple R-squared: 0.3714, Adjusted R-squared: 0.3632
## F-statistic: 44.91 on 2 and 152 DF, p-value: 4.734e-16