IMPORTING THE DATASET AND CHANGING THE COLUM NAMES TO HELP WITH CORRELATION MATRIX

EVP <- read.csv("C:/Users/Ozili Nwokobia/Downloads/Electric_Vehicle_Population_Data.csv", header=FALSE, stringsAsFactors=TRUE)
View(EVP)
str(EVP)
## 'data.frame':    181459 obs. of  17 variables:
##  $ V1 : Factor w/ 11061 levels "1C4JJXN60P","1C4JJXN61P",..: 7382 8404 8432 3624 18 3407 32 902 3330 7922 ...
##  $ V2 : Factor w/ 195 levels "","Ada","Adams",..: 42 82 175 175 175 192 175 84 163 82 ...
##  $ V3 : Factor w/ 728 levels "","Aberdeen",..: 113 575 451 308 641 724 451 297 407 575 ...
##  $ V4 : Factor w/ 45 levels "AE","AK","AL",..: 40 44 44 44 44 44 44 44 44 44 ...
##  $ V5 : Factor w/ 873 levels "","01545","01731",..: 873 403 579 585 629 726 578 512 362 398 ...
##  $ V6 : Factor w/ 23 levels "1997","1998",..: 23 15 16 15 19 18 21 15 18 20 ...
##  $ V7 : Factor w/ 41 levels "ALFA ROMEO","AUDI",..: 24 2 2 36 18 36 18 7 36 2 ...
##  $ V8 : Factor w/ 144 levels "330E","500","530E",..: 82 9 9 84 138 83 138 136 83 101 ...
##  $ V9 : Factor w/ 3 levels "Battery Electric Vehicle (BEV)",..: 2 3 3 1 3 1 3 3 1 3 ...
##  $ V10: Factor w/ 4 levels "Clean Alternative Fuel Vehicle (CAFV) Eligibility",..: 1 4 4 2 4 2 4 2 2 4 ...
##  $ V11: Factor w/ 104 levels "0","10","100",..: 104 21 21 33 47 61 32 81 64 39 ...
##  $ V12: Factor w/ 32 levels "0","102000","109000",..: 32 1 1 1 1 1 1 1 1 1 ...
##  $ V13: Factor w/ 51 levels "","1","10","11",..: 51 29 16 16 14 7 16 17 2 31 ...
##  $ V14: Factor w/ 181459 levels "100005","100021575",..: 181459 94962 99631 25259 25283 81396 75801 28622 179999 61426 ...
##  $ V15: Factor w/ 872 levels "","POINT (-104.5164515 37.1682585)",..: 872 471 618 592 600 264 609 549 435 469 ...
##  $ V16: Factor w/ 78 levels "","AVISTA CORP",..: 63 59 76 76 76 68 76 76 76 59 ...
##  $ V17: Factor w/ 2126 levels "","01001020100",..: 181 886 1941 1971 1985 2078 1937 1241 1673 800 ...
new_column_names <- c(
  "VIN", "County", "City", "State", "Postal_Code", "Model_Year", 
  "Make", "Model", "Electric_Vehicle_Type", 
  "CAFV_Eligibility", 
  "Electric Range", "Base MSRP", "Legislative_District", 
  "DOL_Vehicle_ID", "Vehicle_Location", "Electric_Utility", 
  "2020_Census_Tract"
)
names(EVP)<-new_column_names
str(EVP)
## 'data.frame':    181459 obs. of  17 variables:
##  $ VIN                  : Factor w/ 11061 levels "1C4JJXN60P","1C4JJXN61P",..: 7382 8404 8432 3624 18 3407 32 902 3330 7922 ...
##  $ County               : Factor w/ 195 levels "","Ada","Adams",..: 42 82 175 175 175 192 175 84 163 82 ...
##  $ City                 : Factor w/ 728 levels "","Aberdeen",..: 113 575 451 308 641 724 451 297 407 575 ...
##  $ State                : Factor w/ 45 levels "AE","AK","AL",..: 40 44 44 44 44 44 44 44 44 44 ...
##  $ Postal_Code          : Factor w/ 873 levels "","01545","01731",..: 873 403 579 585 629 726 578 512 362 398 ...
##  $ Model_Year           : Factor w/ 23 levels "1997","1998",..: 23 15 16 15 19 18 21 15 18 20 ...
##  $ Make                 : Factor w/ 41 levels "ALFA ROMEO","AUDI",..: 24 2 2 36 18 36 18 7 36 2 ...
##  $ Model                : Factor w/ 144 levels "330E","500","530E",..: 82 9 9 84 138 83 138 136 83 101 ...
##  $ Electric_Vehicle_Type: Factor w/ 3 levels "Battery Electric Vehicle (BEV)",..: 2 3 3 1 3 1 3 3 1 3 ...
##  $ CAFV_Eligibility     : Factor w/ 4 levels "Clean Alternative Fuel Vehicle (CAFV) Eligibility",..: 1 4 4 2 4 2 4 2 2 4 ...
##  $ Electric Range       : Factor w/ 104 levels "0","10","100",..: 104 21 21 33 47 61 32 81 64 39 ...
##  $ Base MSRP            : Factor w/ 32 levels "0","102000","109000",..: 32 1 1 1 1 1 1 1 1 1 ...
##  $ Legislative_District : Factor w/ 51 levels "","1","10","11",..: 51 29 16 16 14 7 16 17 2 31 ...
##  $ DOL_Vehicle_ID       : Factor w/ 181459 levels "100005","100021575",..: 181459 94962 99631 25259 25283 81396 75801 28622 179999 61426 ...
##  $ Vehicle_Location     : Factor w/ 872 levels "","POINT (-104.5164515 37.1682585)",..: 872 471 618 592 600 264 609 549 435 469 ...
##  $ Electric_Utility     : Factor w/ 78 levels "","AVISTA CORP",..: 63 59 76 76 76 68 76 76 76 59 ...
##  $ 2020_Census_Tract    : Factor w/ 2126 levels "","01001020100",..: 181 886 1941 1971 1985 2078 1937 1241 1673 800 ...

CREATING THE CORRELATION MATRIX

library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.92 loaded
summary(EVP)
##          VIN               County             City            State       
##  7SAYGDEE6P:  1244   King     :94460   Seattle  : 30045   WA     :181060  
##  7SAYGDEE7P:  1242   Snohomish:21439   Bellevue :  9117   CA     :   102  
##  7SAYGDEE8P:  1199   Pierce   :14043   Redmond  :  6568   VA     :    47  
##  7SAYGDEE5P:  1191   Clark    :10675   Vancouver:  6329   MD     :    32  
##  7SAYGDEEXP:  1184   Thurston : 6600   Bothell  :  5961   TX     :    26  
##  7SAYGDEE9P:  1172   Kitsap   : 5956   Kirkland :  5465   NC     :    17  
##  (Other)   :174227   (Other)  :28286   (Other)  :117974   (Other):   175  
##   Postal_Code       Model_Year           Make           Model      
##  98052  :  4637   2023   :58393   TESLA    :80819   MODEL Y:37007  
##  98012  :  3392   2022   :27922   NISSAN   :14037   MODEL 3:30150  
##  98033  :  3135   2021   :19034   CHEVROLET:13864   LEAF   :13356  
##  98188  :  3012   2018   :14291   FORD     : 9527   MODEL S: 7731  
##  98006  :  2908   2020   :11851   BMW      : 7680   BOLT EV: 6935  
##  98004  :  2885   2019   :10922   KIA      : 7642   MODEL X: 5883  
##  (Other):161490   (Other):39046   (Other)  :47890   (Other):80397  
##                             Electric_Vehicle_Type
##  Battery Electric Vehicle (BEV)        :141973   
##  Electric Vehicle Type                 :     1   
##  Plug-in Hybrid Electric Vehicle (PHEV): 39485   
##                                                  
##                                                  
##                                                  
##                                                  
##                                                      CAFV_Eligibility
##  Clean Alternative Fuel Vehicle (CAFV) Eligibility           :    1  
##  Clean Alternative Fuel Vehicle Eligible                     :66816  
##  Eligibility unknown as battery range has not been researched:94730  
##  Not eligible due to low battery range                       :19912  
##                                                                      
##                                                                      
##                                                                      
##  Electric Range    Base MSRP      Legislative_District   DOL_Vehicle_ID  
##  0      :94730   0      :178146   41     : 11727       100005   :     1  
##  215    : 6395   69900  :  1346   45     : 10937       100021575:     1  
##  25     : 4186   31950  :   382   48     : 10003       10002338 :     1  
##  220    : 4078   52900  :   221   1      :  7907       100024515:     1  
##  32     : 4065   32250  :   134   11     :  7761       100025159:     1  
##  238    : 3905   59900  :   130   5      :  7755       100039761:     1  
##  (Other):64100   (Other):  1100   (Other):125369       (Other)  :181453  
##                         Vehicle_Location 
##  POINT (-122.1207376 47.6705374):  4637  
##  POINT (-122.1873 47.820245)    :  3392  
##  POINT (-122.20264 47.6785)     :  3135  
##  POINT (-122.271716 47.452837)  :  3012  
##  POINT (-122.16937 47.571015)   :  2908  
##  POINT (-122.202397 47.619252)  :  2885  
##  (Other)                        :161490  
##                                                                         Electric_Utility
##  PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA)                                  :67180  
##  PUGET SOUND ENERGY INC                                                         :36705  
##  CITY OF SEATTLE - (WA)|CITY OF TACOMA - (WA)                                   :32012  
##  BONNEVILLE POWER ADMINISTRATION||PUD NO 1 OF CLARK COUNTY - (WA)               :10420  
##  BONNEVILLE POWER ADMINISTRATION||CITY OF TACOMA - (WA)||PENINSULA LIGHT COMPANY: 7959  
##  PUGET SOUND ENERGY INC||PUD NO 1 OF WHATCOM COUNTY                             : 4090  
##  (Other)                                                                        :23093  
##    2020_Census_Tract 
##  53033028200:  2551  
##  53033028500:   931  
##  53033026200:   861  
##  53033032321:   832  
##  53033009300:   698  
##  53067011200:   692  
##  (Other)    :174894
numerical_data <- EVP[, c("Model_Year", "Make", "Electric Range", "Base MSRP", "Legislative_District", "DOL_Vehicle_ID", "2020_Census_Tract")]

numerical_data <- data.frame(
  Model_Year = as.numeric(EVP$`Model_Year`),
  Make = as.numeric(EVP$Make),
  Electric_Range = as.numeric(EVP$`Electric Range`),
  Base_MSRP = as.numeric(EVP$`Base MSRP`),
  Legislative_District = as.numeric(EVP$`Legislative_District`),
  DOL_Vehicle_ID = as.numeric(EVP$`DOL_Vehicle_ID`),
  Census_Tract = as.numeric(EVP$`2020_Census_Tract`)
)



cor_matrix <- cor(numerical_data, use = "complete.obs")
print(cor_matrix)
##                        Model_Year        Make Electric_Range    Base_MSRP
## Model_Year            1.000000000  0.10604580   -0.631586234 -0.229088717
## Make                  0.106045802  1.00000000   -0.235628109  0.023671795
## Electric_Range       -0.631586234 -0.23562811    1.000000000  0.029286264
## Base_MSRP            -0.229088717  0.02367180    0.029286264  1.000000000
## Legislative_District -0.011734536  0.05926066   -0.036061521  0.009837911
## DOL_Vehicle_ID        0.215340032 -0.01554491   -0.090602918 -0.038630819
## Census_Tract         -0.009285861 -0.01660358    0.001267998 -0.001126856
##                      Legislative_District DOL_Vehicle_ID Census_Tract
## Model_Year                   -0.011734536    0.215340032 -0.009285861
## Make                          0.059260664   -0.015544905 -0.016603581
## Electric_Range               -0.036061521   -0.090602918  0.001267998
## Base_MSRP                     0.009837911   -0.038630819 -0.001126856
## Legislative_District          1.000000000   -0.015431012 -0.073510279
## DOL_Vehicle_ID               -0.015431012    1.000000000  0.004637161
## Census_Tract                 -0.073510279    0.004637161  1.000000000
corrplot(cor_matrix, method = "circle", title = "Circle Method")

corrplot(cor_matrix, method = "color", title = "Color Method")