##1a.importing data set in to the rstudio.
cardata <- read.csv(“el_car_data_1.csv”)
##cheak the first rows 10 and the last 10 rows and total number of rows
head(cardata, 10)
tail(cardata, 10)
nrow(cardata)
##1bCalculate two new variables (columns) pct_diesel, pct_gasoline that are the percentages of diesel and gasoline cars in the municipalities.
cardata\(pct_diesel <- (cardata\)diesel / cardata$total) * 100
cardata\(pct_gasoline <- (cardata\)gasoline / cardata$total) * 100
##1C It is best to have zone defined as an R factor. This means we give the vector “levels” corresponding to the possible values.
cardata\(zone <- factor(cardata\)zone, levels = c(“city”, “below30”, “30to60”, “above60”), ordered = TRUE)
##1D.make subsets cardata12 and cardata18 with data for the two years involved. Check that there are 422 rows of data in each.
cardata12 <- subset(cardata, year == 2012)
cardata18 <- subset(cardata, year == 2018)
nrow(cardata12)
nrow(cardata18)
##1E In 2018 cheak how many municipalities are in different zones by making table and by barplot to visualize the distribution.
#by table >table(cardata18$zone)
barplot(table(cardata18$zone), main = “Municipalities by Zone in 2018”)
##1F In 2018, what was the maximum and minimum population in municipalities, and what are the names of the two? (Can be solved with what we know, but you might want to check ?which.min )
#maximum population >max_pop <- max(cardata18$population)
min_pop <- min(cardata18$population)
#create the names max_muni and min_muni which have maximum and minimum population respectively
max_muni <- cardata18\(municip_name[which.max(cardata18\)population)]
min_muni <- cardata18\(municip_name[which.min(cardata18\)population)]
cat(“Max population:”, max_pop, “in”, max_muni, “”)
cat(“Min population:”, min_pop, “in”, min_muni)
##1G Mean adjusted income by year
with(cardata, tapply(income_med_adj, year, mean))
##1H. cardata12 with highest value for pct_elcar in 2012
S <- order(cardata12$pct_elcar, decreasing = TRUE)
#look at the first 10: ’
S[1:10]
top10_2012 <- cardata12[S[1:10], ]
##1I.cardata18 with highest valuees of pct_eclar in 2018
S18 <- order(cardata18$pct_elcar, decreasing = TRUE)
top10_2018 <- cardata18[S18[1:10], ]
#to look at the top10_2018 data only with thir municipality and zones
top10_2018[, c(“municip_name”, “pct_elcar”, “zone”)]
##1Jfind the mean value for pct_elcar by zone in 2018
with(cardata18, tapply(pct_elcar, zone, mean))
##1k.. Boxplot of pct_elcar by zone in 2018
with(cardata18, boxplot(pct_elcar ~ zone, main = “Electric Car Percentage by Zone (2018)”))
##1L L. National percentage of electric cars
#percenatage in 2012
total_el_2012 <- sum(cardata12$el)
total_cars_2012 <- sum(cardata12$total)
national_pct_2012 <- (total_el_2012 / total_cars_2012) * 100
total_el_2018 <- sum(cardata18$el)
total_cars_2018 <- sum(cardata18$total)
national_pct_2018 <- (total_el_2018 / total_cars_2018) * 100
#showing the percantage of electric cars in each years(2012 & 2018)
cat(“National pct el cars 2012:”, national_pct_2012, “%”)
cat(“National pct el cars 2018:”, national_pct_2018, “%”)
##1M. Correlation and scatterplot: pct_expense vs pct_elcar (2018)
cor_expense_el <- cor(cardata18\(pct_expense, cardata18\)pct_elcar)
cat(“Correlation:”, cor_expense_el, “”)
plot(cardata18\(pct_expense, cardata18\)pct_elcar, xlab = “Pct Expense”, ylab = “Pct Electric Cars”, main = “Pct Expense vs Electric Cars (2018)”)
##1 N. correlation between sent_index and pct_elcar. Also make a plot of them.
cor_sent_el <- cor(cardata18\(sent_index, cardata18\)pct_elcar)
cat(“Correlation:”, cor_sent_el, “”)
#plot
plot(cardata18\(sent_index, cardata18\)pct_elcar, xlab = “Sent Index”, ylab = “Pct Electric Cars”, main = “Sent Index vs Electric Cars (2018)”)
##1 O. correlation between pct_diesel and pct_elcar. Also make a plot of them.
cor_diesel_el <- cor(cardata18\(pct_diesel, cardata18\)pct_elcar)
cat(“Correlation:”, cor_diesel_el, “”)
plot(cardata18$ pct_diesel, cardata18$pct_elcar, xlab = “pct_diesel”, ylab = “Pct Electric Cars”, main = “pct_diesel vs Electric Cars (2018)”)
##1P. we can add population distribution histogram
hist(cardata18$population, breaks = 50, main = “Population Distribution 2018”)