getwd(Project 1) [1] "C:/Users/ayomi/OneDrive/Documents" setwd ("C:/Users/ayomi/OneDrive/Documents")

Import CarSurvey1 using "Import Dataset" function

Import CarSurvey2 using "Import Dataset" function

Import Dataset using read.csv

library(readxl) CarSurvey1 <- readexcel("C:/Sage/CarSurvey1.xlsx") View(CarSurvey1) library(readxl) CarSurvey2 <- readexcel("C:/Sage/CarSurvey2.xlsx") View(CarSurvey2) str(Car1) head(CarSurvey1,n=10) str(CarSurvey2) head(CarSurvey2,n=10)

Create a master data set

Rename Unique ID in CarSurvey2 to match CarSurvey1

names(CarSurvey2)[1]<-c("Resp") head(CarSurvey2,n=10)

Merge CarSurvey 1 and CarSurvey 2 into one dataset

Cartotal<-merge(CarSurvey1, CarSurvey2, by = "Resp") str(Cartotal)

Read excel.csv file (save excel file as CSV UTF-8 (cOMMA DELIMITED))

Car1<-readexcel("C:/Sage/CarSurvey_1.xlsx")

Display the structure of Car1 (data frame)

str(Car1)

Display the first few rows of Car1 (data frame)

head(Car1,n=5)

Read excel.csv file (save excel file as CSV UTF-8(cOMMA DELIMITED))

Car2<-readexcel("C:/Sage/CarSurvey_2.xlsx")

Display the structure of Car2 (data frame)

str(Car2)

Display the first few rows of Car 2 (data frame)

head(Car2,n=10)

Save the merged data ((Car_Total)) to a file

## Save as CSV write.csv(Cartotal, "Cartotal", row.names=FALSE) #row.names= FALSE PREVENTS R View(Car_total)

Read the Excel file

Replace "file_path.xlsx with the path to your file

CarSurvey1<-readexcel("C:/Sage/CarSurvey1.xlsx") CarSurvey2<-readexcel("C:/Sage/CarSurvey2.xlsx")

Replace the Missing Values

Check the master dataset

summary(Car_total)

Calculate mean of Att_1. if output is NA, this means there is a null value in the column

mean(Cartotal$Att1)

Find mean of Att_1 without NA values

meanAtt1<-mean(Cartotal$Att1,na.rm=TRUE) print(meanAtt1)

Find the mean of Education without NA values

mean(Car_total$Education)

Find the mean of Value Perception 1. If the output is NA, this means there is a null value

mean(Cartotal$ValuPercp_1)

Find mean of ValuPercp1 without NA values

meanValuPercp1<-mean(Cartotal$ValuPercp1,na.rm=TRUE) print(meanValuPercp_1)

Fine the mean of Value Perception 2.

mean(Cartotal$ValuPercp_2)

Adjust for NA values regarding the mean of Value Perception 2

meanValuPercp2<-mean(Cartotal$ValuPercp2,na.rm=TRUE) print(meanValuPercp_2)

Replace NA vALUES with calculated mean of Att_1

Car_total[is.na(Cartotal$Att1), "Att_1"] <- meanAtt1

check to see if NA rows are replaced with calaculated mean

Car_total[c(rownames(na_rows)),]

Create a new column to calaculate mean of Att1 and Att2

Cartotal$AttMean = (Cartotal$Att1+ Cartotal$Att2) / 2 View(Car_total[c("Att1", "Att2", "Att_Mean")]) #Use this view to check calc.

check for NA vlaues in Att_Mean. If zero rows return that means there are no null values

narows <- Cartotal[is.na(Cartotal$AttMean),] print(na_rows)

Adjust for NA values regarding the mean of Value Perception 2

meanValuPercp2<-mean(Cartotal$ValuPercp2,na.rm=TRUE) print(meanValuPercp_2)

Replace NA vALUES with calculated mean of Att_1

Car_total[is.na(Cartotal$Att1), "Att_1"] <- meanAtt1

check to see if NA rows are replaced with calaculated mean

Car_total[c(rownames(na_rows)),]

Create a new column to calaculate mean of Att1 and Att2

Cartotal$AttMean = (Cartotal$Att1+ Cartotal$Att2) / 2 View(Car_total[c("Att1", "Att2", "Att_Mean")]) #Use this view to check calc.

Demo_A What is the distirbuiton of cars across the regions? (Frequesncy Count)

Graph

ggplot(Cartotal,aes(x=Region, fill= Region))+ themebw()+ geombar()+ geomtext(stat="count", aes(label=..count..), vjust=0)+ labs(y="Number of Cars", x ="Region", title ="Number of Cars by Region")

What is the distribution of cars accross regions (Percentage wise?)

carregionpercentage <- prop.table(table(Cartotal$Region)) print(carregion_percentage)

What is the model distibution by region?

Cartotal$Model<-as.factor #transform into categorical variable Cartotal$Region<-as.factor

What is the distribution of owners and renters (Percentage wise?)

residencepercentage <- prop.table(table(Cartotal$Residence)) print(residence_percentage)

What is the model distibution by residence?

Cartotal$Model<-as.factor #transform into categorical variable Cartotal$Residence<-as.factor

Graph of residence types (pie chart distribution)

ggplot(Cartotal,aes(x="Residence Type", y=Residence, fill= Residence))+ geomcol() coord_polar(theta = Residence)

What is the make distibution by region?

#Group car by make library(stringr) #import library

Seperate model col into two, delimit using space

Cartotal[c('Make', 'Model_v1')] <- strsplitfixed(Cartotal$Model, "", 2)

see the two new columns ("Make", and "Makev1" in CarTotal data file)

View(Car_total)

check values of new column

table(Cartotal$Make) count(Cartotal, Cartotal$Make, Cartotal$Model_v1, names = "Freq")

Graph

ggplot(Cartotal,aes(x=Region,fill=Make))+ themebw()+ geom_bar()+ labs(y="Number of Cars", title = "Number of Cars per make by Region")

Group by parent company

install.packages("dplyr") #install library to access casewhen and mutate functions library(dplyr) #import librabry Cartotal <- Cartotal %>% #call dataframe and create new column with new groupings mutate(Parent = casewhen(Make == "Buick" ~ "General Motors", Make == "Chevrolet" ~ "General Motors", Make == "Chrysler" ~ "Chrysler", Make == "Dodge" ~ "Chrysler", Make == "Fiat" ~ "Chrysler", Make == "Ford" ~ "Ford", Make == "Honda" ~ "Honda", Make == "Kia" ~ "Kia", Make == "Lincoln" ~ "Ford", Make == "Toyota" ~ "Toyota", TRUE ~ "Check"))

check if grouping is correct

count(Cartotal, Cartotal$Make, Cartotal$Parent, name= "Freq") table(Cartotal$Make)

Graph

ggplot(Cartotal,aes(x=Region,fill=Parent))+ themebw()+ geom_bar()+ labs(y="number of cars", title = "Number of Cars Presnt Company by Region")

Graph

ggplot(Cartotal,aes(x=Region,fill=Parent))+ themebw()+ facetwrap(~Model)+ geombar()+ labs(y="Number of Cars", title = "Number of Cars by Model and Region")

What is the attitude mean by make and region?

create contingency table

brandregiontable <- aggregate(Att1~Parent+Region, Cartotal, mean) print(brandregiontable)

Graph

ggplot(brandregiontable, aes(x=Region, y=Att1, group=Parent)) + geomline(aes(color=Parent))+ geompoint(aes(colour = Parent))+ labs(y="Att1 Mean", title = "Attitude Mean by Parent and Region")

What is the attitude mean for a specific make by region?

Filter by a specific brand (Ford)

FordAtt1Mean <- brandregiontable %>% filter(Parent == "Ford") head(FordAtt1Mean,n=10) table(FordAtt1Mean$Parent) #Check to ensure new dataframe only includes Ford

Graph

ggplot(FordAtt1Mean, aes(x=Region, y=Att1, group=Parent)) + geomline(aes(color=Parent))+ geompoint(aes(color=Parent))+ scaleycontinuous(limits = c(3, 6)) #specify y-axis so it is consistent labs(y="Att1 Mean"), title = "Attitude Mean for Ford and General Motors by Region"

Find rows with NA in Residence

narows <- Cartotal[is.na(Cartotal$Att1),] print(na_rows)

Find mean of Residence without NA values

meanResidence<-mean(Car_total$Residence,na.rm = TRUE) print(meanResidence)

Replace NA vlaues with calculated mean of Residence

Car_total[is.na(Car_total$Residence), "Residence"] <- meanResidence

What is the distribution of consumers across the two types of residences?Let 1 represent Own and 2 represent Rent

ggplot(Cartotal,aes(x=Residence, fill = Residence))+ themeclassic()+ geombar()+ geomtext(stat="count", aes(label = ..count..), vjust=0) + labs(y="Number of Consumers", x = "Residence", title = "Number of Consumers who Rent and Own")

Find and Graph how satisfied our resppodents are based on post purchase feedback

ggplot(Cartotal,aes(x=Post-Satis, fill = Post-Satis))+ themeclassic()+ geombar()+ geomtext(stat="count", aes(label=..count..), vjust=0) + labs(y="Level of Satisfaction")

Import CarSurvey1 using "Import Dataset" function

Import CarSurvey2 using "Import Dataset" function

Import Dataset using read.csv

Create a master data set

Rename Unique ID in CarSurvey2 to match CarSurvey1

Merge CarSurvey 1 and CarSurvey 2 into one dataset

Read excel.csv file (save excel file as CSV UTF-8 (cOMMA DELIMITED))

Display the structure of Car1 (data frame)

Display the first few rows of Car1 (data frame)

Read excel.csv file (save excel file as CSV UTF-8(cOMMA DELIMITED))

Display the structure of Car2 (data frame)

Display the first few rows of Car 2 (data frame)

Save the merged data ((Car_Total)) to a file

Read the Excel file

Replace "file_path.xlsx with the path to your file

Replace the Missing Values

Check the master dataset

Calculate mean of Att_1. if output is NA, this means there is a null value in the column

Find mean of Att_1 without NA values

Find the mean of Education without NA values

Find the mean of Value Perception 1. If the output is NA, this means there is a null value

Find mean of ValuPercp1 without NA values

Fine the mean of Value Perception 2.

Adjust for NA values regarding the mean of Value Perception 2

Replace NA vALUES with calculated mean of Att_1

check to see if NA rows are replaced with calaculated mean

Create a new column to calaculate mean of Att1 and Att2

check for NA vlaues in Att_Mean. If zero rows return that means there are no null values

Adjust for NA values regarding the mean of Value Perception 2

Replace NA vALUES with calculated mean of Att_1

check to see if NA rows are replaced with calaculated mean

Create a new column to calaculate mean of Att1 and Att2

Demo_A What is the distirbuiton of cars across the regions? (Frequesncy Count)

Graph

What is the distribution of cars accross regions (Percentage wise?)

What is the model distibution by region?

What is the distribution of owners and renters (Percentage wise?)

What is the model distibution by residence?

Graph of residence types (pie chart distribution)

What is the make distibution by region?

Seperate model col into two, delimit using space

see the two new columns ("Make", and "Makev1" in CarTotal data file)

check values of new column

Graph

Group by parent company

check if grouping is correct

Graph

Graph

What is the attitude mean by make and region?

create contingency table

Graph

What is the attitude mean for a specific make by region?

Filter by a specific brand (Ford)

Graph

Find rows with NA in Residence

Find mean of Residence without NA values

Replace NA vlaues with calculated mean of Residence

What is the distribution of consumers across the two types of residences?Let 1 represent Own and 2 represent Rent

Find and Graph how satisfied our resppodents are based on post purchase feedback

Export New Dataset to .csv

Create R Project HTML Link with Knit Document

Step 1: Save R file as R Markdown