##1a.importing data set in to the rstudio.

cardata <- read.csv(“el_car_data_1.csv”)

##cheak the first rows 10 and the last 10 rows and total number of rows

Check first 10 rows

head(cardata, 10)

Check last 10 rows

tail(cardata, 10)

Number of rows

nrow(cardata)

##1bCalculate two new variables (columns) pct_diesel, pct_gasoline that are the percentages of diesel and gasoline cars in the municipalities.

cardata\(pct_diesel <- (cardata\)diesel / cardata$total) * 100

cardata\(pct_gasoline <- (cardata\)gasoline / cardata$total) * 100

##1C It is best to have zone defined as an R factor. This means we give the vector “levels” corresponding to the possible values.

cardata\(zone <- factor(cardata\)zone, levels = c(“city”, “below30”, “30to60”, “above60”), ordered = TRUE)

##1D.make subsets cardata12 and cardata18 with data for the two years involved. Check that there are 422 rows of data in each.

cardata12

cardata12 <- subset(cardata, year == 2012)

cardata18

cardata18 <- subset(cardata, year == 2018)

Check number of rows

nrow(cardata12)

nrow(cardata18)

##1E In 2018 cheak how many municipalities are in different zones by making table and by barplot to visualize the distribution.

#by table >table(cardata18$zone)

plot

barplot(table(cardata18$zone), main = “Municipalities by Zone in 2018”)

##1F In 2018, what was the maximum and minimum population in municipalities, and what are the names of the two? (Can be solved with what we know, but you might want to check ?which.min )

#maximum population >max_pop <- max(cardata18$population)

minimum population

min_pop <- min(cardata18$population)

#create the names max_muni and min_muni which have maximum and minimum population respectively

max_muni <- cardata18\(municip_name[which.max(cardata18\)population)]

min_muni <- cardata18\(municip_name[which.min(cardata18\)population)]

maximum and minimum population municipality with thir names and population number.

cat(“Max population:”, max_pop, “in”, max_muni, “”)

cat(“Min population:”, min_pop, “in”, min_muni)

##1G Mean adjusted income by year

with(cardata, tapply(income_med_adj, year, mean))

##1H. cardata12 with highest value for pct_elcar in 2012

S <- order(cardata12$pct_elcar, decreasing = TRUE)

#look at the first 10: ’

S[1:10]

top10_2012 <- cardata12[S[1:10], ]

##1I.cardata18 with highest valuees of pct_eclar in 2018

S18 <- order(cardata18$pct_elcar, decreasing = TRUE)

top10_2018 <- cardata18[S18[1:10], ]

#to look at the top10_2018 data only with thir municipality and zones

top10_2018[, c(“municip_name”, “pct_elcar”, “zone”)]

##1Jfind the mean value for pct_elcar by zone in 2018

with(cardata18, tapply(pct_elcar, zone, mean))

##1k.. Boxplot of pct_elcar by zone in 2018

with(cardata18, boxplot(pct_elcar ~ zone, main = “Electric Car Percentage by Zone (2018)”))

##1L L. National percentage of electric cars

#percenatage in 2012

total_el_2012 <- sum(cardata12$el)

total_cars_2012 <- sum(cardata12$total)

national_pct_2012 <- (total_el_2012 / total_cars_2012) * 100

percantage in 2018

total_el_2018 <- sum(cardata18$el)

total_cars_2018 <- sum(cardata18$total)

national_pct_2018 <- (total_el_2018 / total_cars_2018) * 100

#showing the percantage of electric cars in each years(2012 & 2018)

cat(“National pct el cars 2012:”, national_pct_2012, “%”)

cat(“National pct el cars 2018:”, national_pct_2018, “%”)

as we see in above statistics the percentage of electrical cars of norway in diffrent municipality increases from 0.33% to 7.17%

##1M. Correlation and scatterplot: pct_expense vs pct_elcar (2018)

cor_expense_el <- cor(cardata18\(pct_expense, cardata18\)pct_elcar)

cat(“Correlation:”, cor_expense_el, “”)

scatter plot

plot(cardata18\(pct_expense, cardata18\)pct_elcar, xlab = “Pct Expense”, ylab = “Pct Electric Cars”, main = “Pct Expense vs Electric Cars (2018)”)

as seen in above there is possetive corelation between pct_expense and pct_elcar8. if there is high percentage of people who reported travel expenses for tax deduction in their annual tax declaration there incentive to buy electrical car also high

##1 N. correlation between sent_index and pct_elcar. Also make a plot of them.

cor_sent_el <- cor(cardata18\(sent_index, cardata18\)pct_elcar)

cat(“Correlation:”, cor_sent_el, “”)

#plot

plot(cardata18\(sent_index, cardata18\)pct_elcar, xlab = “Sent Index”, ylab = “Pct Electric Cars”, main = “Sent Index vs Electric Cars (2018)”)

as we see in the polot and correlation(0.629) there is high correlation between a national index measuring the sentrality of municipalities and electrical cars. number of electrical cars increase with index of centarlity.

##1 O. correlation between pct_diesel and pct_elcar. Also make a plot of them.

cor_diesel_el <- cor(cardata18\(pct_diesel, cardata18\)pct_elcar)

cat(“Correlation:”, cor_diesel_el, “”)

plot

plot(cardata18$ pct_diesel, cardata18$pct_elcar, xlab = “pct_diesel”, ylab = “Pct Electric Cars”, main = “pct_diesel vs Electric Cars (2018)”)

as seen in above plot and statistics there is negative high correlation between percentage of elecrical car and percentage of diesel which is -0.7259.if there is high percentage of eletrical cars in municipalities there is small percentage of diesel vice versa.

##1P. we can add population distribution histogram

hist(cardata18$population, breaks = 50, main = “Population Distribution 2018”)