Importing Excel spreadsheet as an R dataframe

library("readxl")
df<-read_excel("Electric cars.xlsx")
# Preview of the spreadsheet
head(df)
## # A tibble: 6 × 6
##    Year `BEV average price (USD)` `Global Sales Volume` Mileage `Lithium Ion B…`
##   <dbl>                     <dbl>                 <dbl>   <dbl>            <dbl>
## 1  2010                     64032                 50000     127             1191
## 2  2011                     51736                 60000     139              924
## 3  2012                     52084                 80000     160              726
## 4  2013                     56028                150000     189              668
## 5  2014                     44776                224700     210              592
## 6  2015                     42340                380100     211              384
## # … with 1 more variable: AveragePriceOfNewCar <dbl>

Average price of the new car is the y variable, while range (mileage) in kilometers is the x variable.

#Setting the x and y variables from the spreadsheet into seperate variables for easy file access
x<-df$Mileage
y<-df$AveragePriceOfNewCar
print("Printing X values to test if pushed correctly")
## [1] "Printing X values to test if pushed correctly"
print(x)
##  [1] 127 139 160 189 210 211 233 267 304 336 338 349 400
print("Printing Y values to test if pushed correctly")
## [1] "Printing Y values to test if pushed correctly"
print(y)
##  [1] 37500 37311 36874 37826 37519 38240 38455 38350 38365 40546 44021 49185
## [13] 48000

Plotting the dataset

#Plotting the dataset
plot(x,y, main="Electric Cars", xlab="Range (Mileage) in Kilometers", ylab="Average Price of New Car", col="blue")

Running the cor function on the dataset to find the correlation between the two variables

cor(x,y)
## [1] 0.8155397

Running the lm function on the dataset to find the linear regression line

cars1<-lm(y~x)
# Equation of the linear regression line
cars1
## 
## Call:
## lm(formula = y ~ x)
## 
## Coefficients:
## (Intercept)            x  
##    30469.94        38.64

Plotting the linear regression line

#Plotting the linear regression line
plot(x,y, main="Electric Cars", xlab="Range (Mileage) in Kilometers", ylab="Average Price of New Car", col="blue")
abline(lm(y~x))

Summary of the dataset

summary(cars1)$coef
##                Estimate  Std. Error   t value     Pr(>|t|)
## (Intercept) 30469.94300 2190.055443 13.912864 2.509358e-08
## x              38.64013    8.267068  4.673982 6.781452e-04
rSquare <-summary(cars1)$r.squared
outputVar <- paste("The coefficient of determination is", rSquare, "so the model is moderatly positive. This means that the model is not very strong, but it is not weak either.")
outputVar
## [1] "The coefficient of determination is 0.665104927228271 so the model is moderatly positive. This means that the model is not very strong, but it is not weak either."
intercetp <-summary(cars1)$coef[1]
slope <-summary(cars1)$coef[2]
outputVar <- paste("The Equation of the regression line is: ŷ = b0 + b1x. p̂ = ", intercetp, "+", slope, "x")
outputVar
## [1] "The Equation of the regression line is: ŷ = b0 + b1x. p̂ =  30469.9429989069 + 38.640129026727 x"

Calculating the Anova of the linear model

anova(cars1)
## Analysis of Variance Table
## 
## Response: y
##           Df    Sum Sq   Mean Sq F value    Pr(>F)    
## x          1 139323361 139323361  21.846 0.0006781 ***
## Residuals 11  70152400   6377491                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Anova Analysis

SSR<-anova(cars1)$Sum[1]
SSE<-anova(cars1)$Sum[2]
SST<- SSR+SSE
output1 <- paste("The sum of squares for the regression is", SSR)
output2 <- paste("The sum of squares for the error is", SSE)
output3 <- paste("The total sum of squares is", SSR,"+", SSE,"=",SST)
output4 <- paste("We can calculate the R^2 value from the total sum of squares and the sum of squares for the regression. R^2 = SSR/SST = ", SSR,"/",SST,"=", SSR/SST)
output1
## [1] "The sum of squares for the regression is 139323360.827154"
output2
## [1] "The sum of squares for the error is 70152400.2497694"
output3
## [1] "The total sum of squares is 139323360.827154 + 70152400.2497694 = 209475761.076923"
output4
## [1] "We can calculate the R^2 value from the total sum of squares and the sum of squares for the regression. R^2 = SSR/SST =  139323360.827154 / 209475761.076923 = 0.665104927228272"

Plotting the final graph with the linear regresison model

plot(x,y, main= "Range vs Average Price")
abline(cars1$coef,lty=1)