IMPORTING THE DATASET AND CHANGING THE COLUM NAMES TO HELP WITH CORRELATION MATRIX
EVP <- read.csv("C:/Users/Ozili Nwokobia/Downloads/Electric_Vehicle_Population_Data.csv", header=FALSE, stringsAsFactors=TRUE)
View(EVP)
str(EVP)
## 'data.frame': 181459 obs. of 17 variables:
## $ V1 : Factor w/ 11061 levels "1C4JJXN60P","1C4JJXN61P",..: 7382 8404 8432 3624 18 3407 32 902 3330 7922 ...
## $ V2 : Factor w/ 195 levels "","Ada","Adams",..: 42 82 175 175 175 192 175 84 163 82 ...
## $ V3 : Factor w/ 728 levels "","Aberdeen",..: 113 575 451 308 641 724 451 297 407 575 ...
## $ V4 : Factor w/ 45 levels "AE","AK","AL",..: 40 44 44 44 44 44 44 44 44 44 ...
## $ V5 : Factor w/ 873 levels "","01545","01731",..: 873 403 579 585 629 726 578 512 362 398 ...
## $ V6 : Factor w/ 23 levels "1997","1998",..: 23 15 16 15 19 18 21 15 18 20 ...
## $ V7 : Factor w/ 41 levels "ALFA ROMEO","AUDI",..: 24 2 2 36 18 36 18 7 36 2 ...
## $ V8 : Factor w/ 144 levels "330E","500","530E",..: 82 9 9 84 138 83 138 136 83 101 ...
## $ V9 : Factor w/ 3 levels "Battery Electric Vehicle (BEV)",..: 2 3 3 1 3 1 3 3 1 3 ...
## $ V10: Factor w/ 4 levels "Clean Alternative Fuel Vehicle (CAFV) Eligibility",..: 1 4 4 2 4 2 4 2 2 4 ...
## $ V11: Factor w/ 104 levels "0","10","100",..: 104 21 21 33 47 61 32 81 64 39 ...
## $ V12: Factor w/ 32 levels "0","102000","109000",..: 32 1 1 1 1 1 1 1 1 1 ...
## $ V13: Factor w/ 51 levels "","1","10","11",..: 51 29 16 16 14 7 16 17 2 31 ...
## $ V14: Factor w/ 181459 levels "100005","100021575",..: 181459 94962 99631 25259 25283 81396 75801 28622 179999 61426 ...
## $ V15: Factor w/ 872 levels "","POINT (-104.5164515 37.1682585)",..: 872 471 618 592 600 264 609 549 435 469 ...
## $ V16: Factor w/ 78 levels "","AVISTA CORP",..: 63 59 76 76 76 68 76 76 76 59 ...
## $ V17: Factor w/ 2126 levels "","01001020100",..: 181 886 1941 1971 1985 2078 1937 1241 1673 800 ...
new_column_names <- c(
"VIN", "County", "City", "State", "Postal_Code", "Model_Year",
"Make", "Model", "Electric_Vehicle_Type",
"CAFV_Eligibility",
"Electric Range", "Base MSRP", "Legislative_District",
"DOL_Vehicle_ID", "Vehicle_Location", "Electric_Utility",
"2020_Census_Tract"
)
names(EVP)<-new_column_names
str(EVP)
## 'data.frame': 181459 obs. of 17 variables:
## $ VIN : Factor w/ 11061 levels "1C4JJXN60P","1C4JJXN61P",..: 7382 8404 8432 3624 18 3407 32 902 3330 7922 ...
## $ County : Factor w/ 195 levels "","Ada","Adams",..: 42 82 175 175 175 192 175 84 163 82 ...
## $ City : Factor w/ 728 levels "","Aberdeen",..: 113 575 451 308 641 724 451 297 407 575 ...
## $ State : Factor w/ 45 levels "AE","AK","AL",..: 40 44 44 44 44 44 44 44 44 44 ...
## $ Postal_Code : Factor w/ 873 levels "","01545","01731",..: 873 403 579 585 629 726 578 512 362 398 ...
## $ Model_Year : Factor w/ 23 levels "1997","1998",..: 23 15 16 15 19 18 21 15 18 20 ...
## $ Make : Factor w/ 41 levels "ALFA ROMEO","AUDI",..: 24 2 2 36 18 36 18 7 36 2 ...
## $ Model : Factor w/ 144 levels "330E","500","530E",..: 82 9 9 84 138 83 138 136 83 101 ...
## $ Electric_Vehicle_Type: Factor w/ 3 levels "Battery Electric Vehicle (BEV)",..: 2 3 3 1 3 1 3 3 1 3 ...
## $ CAFV_Eligibility : Factor w/ 4 levels "Clean Alternative Fuel Vehicle (CAFV) Eligibility",..: 1 4 4 2 4 2 4 2 2 4 ...
## $ Electric Range : Factor w/ 104 levels "0","10","100",..: 104 21 21 33 47 61 32 81 64 39 ...
## $ Base MSRP : Factor w/ 32 levels "0","102000","109000",..: 32 1 1 1 1 1 1 1 1 1 ...
## $ Legislative_District : Factor w/ 51 levels "","1","10","11",..: 51 29 16 16 14 7 16 17 2 31 ...
## $ DOL_Vehicle_ID : Factor w/ 181459 levels "100005","100021575",..: 181459 94962 99631 25259 25283 81396 75801 28622 179999 61426 ...
## $ Vehicle_Location : Factor w/ 872 levels "","POINT (-104.5164515 37.1682585)",..: 872 471 618 592 600 264 609 549 435 469 ...
## $ Electric_Utility : Factor w/ 78 levels "","AVISTA CORP",..: 63 59 76 76 76 68 76 76 76 59 ...
## $ 2020_Census_Tract : Factor w/ 2126 levels "","01001020100",..: 181 886 1941 1971 1985 2078 1937 1241 1673 800 ...
CREATING THE CORRELATION MATRIX
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.92 loaded
summary(EVP)
## VIN County City State
## 7SAYGDEE6P: 1244 King :94460 Seattle : 30045 WA :181060
## 7SAYGDEE7P: 1242 Snohomish:21439 Bellevue : 9117 CA : 102
## 7SAYGDEE8P: 1199 Pierce :14043 Redmond : 6568 VA : 47
## 7SAYGDEE5P: 1191 Clark :10675 Vancouver: 6329 MD : 32
## 7SAYGDEEXP: 1184 Thurston : 6600 Bothell : 5961 TX : 26
## 7SAYGDEE9P: 1172 Kitsap : 5956 Kirkland : 5465 NC : 17
## (Other) :174227 (Other) :28286 (Other) :117974 (Other): 175
## Postal_Code Model_Year Make Model
## 98052 : 4637 2023 :58393 TESLA :80819 MODEL Y:37007
## 98012 : 3392 2022 :27922 NISSAN :14037 MODEL 3:30150
## 98033 : 3135 2021 :19034 CHEVROLET:13864 LEAF :13356
## 98188 : 3012 2018 :14291 FORD : 9527 MODEL S: 7731
## 98006 : 2908 2020 :11851 BMW : 7680 BOLT EV: 6935
## 98004 : 2885 2019 :10922 KIA : 7642 MODEL X: 5883
## (Other):161490 (Other):39046 (Other) :47890 (Other):80397
## Electric_Vehicle_Type
## Battery Electric Vehicle (BEV) :141973
## Electric Vehicle Type : 1
## Plug-in Hybrid Electric Vehicle (PHEV): 39485
##
##
##
##
## CAFV_Eligibility
## Clean Alternative Fuel Vehicle (CAFV) Eligibility : 1
## Clean Alternative Fuel Vehicle Eligible :66816
## Eligibility unknown as battery range has not been researched:94730
## Not eligible due to low battery range :19912
##
##
##
## Electric Range Base MSRP Legislative_District DOL_Vehicle_ID
## 0 :94730 0 :178146 41 : 11727 100005 : 1
## 215 : 6395 69900 : 1346 45 : 10937 100021575: 1
## 25 : 4186 31950 : 382 48 : 10003 10002338 : 1
## 220 : 4078 52900 : 221 1 : 7907 100024515: 1
## 32 : 4065 32250 : 134 11 : 7761 100025159: 1
## 238 : 3905 59900 : 130 5 : 7755 100039761: 1
## (Other):64100 (Other): 1100 (Other):125369 (Other) :181453
## Vehicle_Location
## POINT (-122.1207376 47.6705374): 4637
## POINT (-122.1873 47.820245) : 3392
## POINT (-122.20264 47.6785) : 3135
## POINT (-122.271716 47.452837) : 3012
## POINT (-122.16937 47.571015) : 2908
## POINT (-122.202397 47.619252) : 2885
## (Other) :161490
## Electric_Utility
## PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) :67180
## PUGET SOUND ENERGY INC :36705
## CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA) :32012
## BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF CLARK COUNTY - (WA) :10420
## BONNEVILLE POWER ADMINISTRATION||CITY OF TACOMA - (WA)||PENINSULA LIGHT COMPANY: 7959
## PUGET SOUND ENERGY INC||PUD NO 1 OF WHATCOM COUNTY : 4090
## (Other) :23093
## 2020_Census_Tract
## 53033028200: 2551
## 53033028500: 931
## 53033026200: 861
## 53033032321: 832
## 53033009300: 698
## 53067011200: 692
## (Other) :174894
numerical_data <- EVP[, c("Model_Year", "Make", "Electric Range", "Base MSRP", "Legislative_District", "DOL_Vehicle_ID", "2020_Census_Tract")]
numerical_data <- data.frame(
Model_Year = as.numeric(EVP$`Model_Year`),
Make = as.numeric(EVP$Make),
Electric_Range = as.numeric(EVP$`Electric Range`),
Base_MSRP = as.numeric(EVP$`Base MSRP`),
Legislative_District = as.numeric(EVP$`Legislative_District`),
DOL_Vehicle_ID = as.numeric(EVP$`DOL_Vehicle_ID`),
Census_Tract = as.numeric(EVP$`2020_Census_Tract`)
)
cor_matrix <- cor(numerical_data, use = "complete.obs")
print(cor_matrix)
## Model_Year Make Electric_Range Base_MSRP
## Model_Year 1.000000000 0.10604580 -0.631586234 -0.229088717
## Make 0.106045802 1.00000000 -0.235628109 0.023671795
## Electric_Range -0.631586234 -0.23562811 1.000000000 0.029286264
## Base_MSRP -0.229088717 0.02367180 0.029286264 1.000000000
## Legislative_District -0.011734536 0.05926066 -0.036061521 0.009837911
## DOL_Vehicle_ID 0.215340032 -0.01554491 -0.090602918 -0.038630819
## Census_Tract -0.009285861 -0.01660358 0.001267998 -0.001126856
## Legislative_District DOL_Vehicle_ID Census_Tract
## Model_Year -0.011734536 0.215340032 -0.009285861
## Make 0.059260664 -0.015544905 -0.016603581
## Electric_Range -0.036061521 -0.090602918 0.001267998
## Base_MSRP 0.009837911 -0.038630819 -0.001126856
## Legislative_District 1.000000000 -0.015431012 -0.073510279
## DOL_Vehicle_ID -0.015431012 1.000000000 0.004637161
## Census_Tract -0.073510279 0.004637161 1.000000000
corrplot(cor_matrix, method = "circle", title = "Circle Method")
corrplot(cor_matrix, method = "color", title = "Color Method")