getwd(Project 1) [1] "C:/Users/ayomi/OneDrive/Documents" setwd ("C:/Users/ayomi/OneDrive/Documents")

Import CarSurvey1 using "Import Dataset" function

Import CarSurvey2 using "Import Dataset" function

Import Dataset using read.csv

library(readxl) CarSurvey1 <- readexcel("C:/Sage/CarSurvey1.xlsx") View(CarSurvey1) library(readxl) CarSurvey2 <- readexcel("C:/Sage/CarSurvey2.xlsx") View(CarSurvey2) str(Car1) head(CarSurvey1,n=10) str(CarSurvey2) head(CarSurvey2,n=10)

Create a master data set

Rename Unique ID in CarSurvey2 to match CarSurvey1

names(CarSurvey2)[1]<-c("Resp") head(CarSurvey2,n=10)

Merge CarSurvey 1 and CarSurvey 2 into one dataset

Cartotal<-merge(CarSurvey1, CarSurvey2, by = "Resp") str(Cartotal)

Read excel.csv file (save excel file as CSV UTF-8 (cOMMA DELIMITED))

Car1<-readexcel("C:/Sage/CarSurvey_1.xlsx")

Display the structure of Car1 (data frame)

str(Car1)

Display the first few rows of Car1 (data frame)

head(Car1,n=5)

Read excel.csv file (save excel file as CSV UTF-8(cOMMA DELIMITED))

Car2<-readexcel("C:/Sage/CarSurvey_2.xlsx")

Display the structure of Car2 (data frame)

str(Car2)

Display the first few rows of Car 2 (data frame)

head(Car2,n=10)

Save the merged data ((Car_Total)) to a file

## Save as CSV write.csv(Cartotal, "Cartotal", row.names=FALSE) #row.names= FALSE PREVENTS R View(Car_total)

Read the Excel file

Replace "file_path.xlsx with the path to your file

CarSurvey1<-readexcel("C:/Sage/CarSurvey1.xlsx") CarSurvey2<-readexcel("C:/Sage/CarSurvey2.xlsx")

Replace the Missing Values

Check the master dataset

summary(Car_total)

Calculate mean of Att_1. if output is NA, this means there is a null value in the column

mean(Cartotal$Att1)

Find mean of Att_1 without NA values

meanAtt1<-mean(Cartotal$Att1,na.rm=TRUE) print(meanAtt1)

Find the mean of Education without NA values

mean(Car_total$Education)

Find the mean of Value Perception 1. If the output is NA, this means there is a null value

mean(Cartotal$ValuPercp_1)

Find mean of ValuPercp1 without NA values

meanValuPercp1<-mean(Cartotal$ValuPercp1,na.rm=TRUE) print(meanValuPercp_1)

Fine the mean of Value Perception 2.

mean(Cartotal$ValuPercp_2)

Adjust for NA values regarding the mean of Value Perception 2

meanValuPercp2<-mean(Cartotal$ValuPercp2,na.rm=TRUE) print(meanValuPercp_2)

Replace NA vALUES with calculated mean of Att_1

Car_total[is.na(Cartotal$Att1), "Att_1"] <- meanAtt1

check to see if NA rows are replaced with calaculated mean

Car_total[c(rownames(na_rows)),]

Create a new column to calaculate mean of Att1 and Att2

Cartotal$AttMean = (Cartotal$Att1+ Cartotal$Att2) / 2 View(Car_total[c("Att1", "Att2", "Att_Mean")]) #Use this view to check calc.

check for NA vlaues in Att_Mean. If zero rows return that means there are no null values

narows <- Cartotal[is.na(Cartotal$AttMean),] print(na_rows)

Adjust for NA values regarding the mean of Value Perception 2

meanValuPercp2<-mean(Cartotal$ValuPercp2,na.rm=TRUE) print(meanValuPercp_2)

Replace NA vALUES with calculated mean of Att_1

Car_total[is.na(Cartotal$Att1), "Att_1"] <- meanAtt1

check to see if NA rows are replaced with calaculated mean

Car_total[c(rownames(na_rows)),]

Create a new column to calaculate mean of Att1 and Att2

Cartotal$AttMean = (Cartotal$Att1+ Cartotal$Att2) / 2 View(Car_total[c("Att1", "Att2", "Att_Mean")]) #Use this view to check calc.

Demo_A What is the distirbuiton of cars across the regions? (Frequesncy Count)

Graph

ggplot(Cartotal,aes(x=Region, fill= Region))+ themebw()+ geombar()+ geomtext(stat="count", aes(label=..count..), vjust=0)+ labs(y="Number of Cars", x ="Region", title ="Number of Cars by Region")

What is the distribution of cars accross regions (Percentage wise?)

carregionpercentage <- prop.table(table(Cartotal$Region)) print(carregion_percentage)

What is the model distibution by region?

Cartotal$Model<-as.factor #transform into categorical variable Cartotal$Region<-as.factor

What is the distribution of owners and renters (Percentage wise?)

residencepercentage <- prop.table(table(Cartotal$Residence)) print(residence_percentage)

What is the model distibution by residence?

Cartotal$Model<-as.factor #transform into categorical variable Cartotal$Residence<-as.factor

Graph of residence types (pie chart distribution)

ggplot(Cartotal,aes(x="Residence Type", y=Residence, fill= Residence))+ geomcol() coord_polar(theta = Residence)

What is the make distibution by region?

#Group car by make library(stringr) #import library

Seperate model col into two, delimit using space

Cartotal[c('Make', 'Model_v1')] <- strsplitfixed(Cartotal$Model, "", 2)

see the two new columns ("Make", and "Makev1" in CarTotal data file)

View(Car_total)

check values of new column

table(Cartotal$Make) count(Cartotal, Cartotal$Make, Cartotal$Model_v1, names = "Freq")

Graph

ggplot(Cartotal,aes(x=Region,fill=Make))+ themebw()+ geom_bar()+ labs(y="Number of Cars", title = "Number of Cars per make by Region")

Group by parent company

install.packages("dplyr") #install library to access casewhen and mutate functions library(dplyr) #import librabry Cartotal <- Cartotal %>% #call dataframe and create new column with new groupings mutate(Parent = casewhen(Make == "Buick" ~ "General Motors", Make == "Chevrolet" ~ "General Motors", Make == "Chrysler" ~ "Chrysler", Make == "Dodge" ~ "Chrysler", Make == "Fiat" ~ "Chrysler", Make == "Ford" ~ "Ford", Make == "Honda" ~ "Honda", Make == "Kia" ~ "Kia", Make == "Lincoln" ~ "Ford", Make == "Toyota" ~ "Toyota", TRUE ~ "Check"))

check if grouping is correct

count(Cartotal, Cartotal$Make, Cartotal$Parent, name= "Freq") table(Cartotal$Make)

Graph

ggplot(Cartotal,aes(x=Region,fill=Parent))+ themebw()+ geom_bar()+ labs(y="number of cars", title = "Number of Cars Presnt Company by Region")

Graph

ggplot(Cartotal,aes(x=Region,fill=Parent))+ themebw()+ facetwrap(~Model)+ geombar()+ labs(y="Number of Cars", title = "Number of Cars by Model and Region")

What is the attitude mean by make and region?

create contingency table

brandregiontable <- aggregate(Att1~Parent+Region, Cartotal, mean) print(brandregiontable)

Graph

ggplot(brandregiontable, aes(x=Region, y=Att1, group=Parent)) + geomline(aes(color=Parent))+ geompoint(aes(colour = Parent))+ labs(y="Att1 Mean", title = "Attitude Mean by Parent and Region")

What is the attitude mean for a specific make by region?

Filter by a specific brand (Ford)

FordAtt1Mean <- brandregiontable %>% filter(Parent == "Ford") head(FordAtt1Mean,n=10) table(FordAtt1Mean$Parent) #Check to ensure new dataframe only includes Ford

Graph

ggplot(FordAtt1Mean, aes(x=Region, y=Att1, group=Parent)) + geomline(aes(color=Parent))+ geompoint(aes(color=Parent))+ scaleycontinuous(limits = c(3, 6)) #specify y-axis so it is consistent labs(y="Att1 Mean"), title = "Attitude Mean for Ford and General Motors by Region"

Find rows with NA in Residence

narows <- Cartotal[is.na(Cartotal$Att1),] print(na_rows)

Find mean of Residence without NA values

meanResidence<-mean(Car_total$Residence,na.rm = TRUE) print(meanResidence)

Replace NA vlaues with calculated mean of Residence

Car_total[is.na(Car_total$Residence), "Residence"] <- meanResidence

What is the distribution of consumers across the two types of residences?Let 1 represent Own and 2 represent Rent

ggplot(Cartotal,aes(x=Residence, fill = Residence))+ themeclassic()+ geombar()+ geomtext(stat="count", aes(label = ..count..), vjust=0) + labs(y="Number of Consumers", x = "Residence", title = "Number of Consumers who Rent and Own")

Find and Graph how satisfied our resppodents are based on post purchase feedback

ggplot(Cartotal,aes(x=Post-Satis, fill = Post-Satis))+ themeclassic()+ geombar()+ geomtext(stat="count", aes(label=..count..), vjust=0) + labs(y="Level of Satisfaction")

Export New Dataset to .csv

write.csv(Cartotal, "Project1FausatCartotal.csv")

Create R Project HTML Link with Knit Document

Step 1: Save R file as R Markdown

install.packages("rmarkdown")