getwd(Project 1) [1] "C:/Users/ayomi/OneDrive/Documents" setwd ("C:/Users/ayomi/OneDrive/Documents")
Import CarSurvey1 using "Import Dataset" function
Import CarSurvey2 using "Import Dataset" function
Import Dataset using read.csv
library(readxl) CarSurvey1 <- readexcel("C:/Sage/CarSurvey1.xlsx") View(CarSurvey1) library(readxl) CarSurvey2 <- readexcel("C:/Sage/CarSurvey2.xlsx") View(CarSurvey2) str(Car1) head(CarSurvey1,n=10) str(CarSurvey2) head(CarSurvey2,n=10)
Rename Unique ID in CarSurvey2 to match CarSurvey1
names(CarSurvey2)[1]<-c("Resp") head(CarSurvey2,n=10)
Merge CarSurvey 1 and CarSurvey 2 into one dataset
Cartotal<-merge(CarSurvey1, CarSurvey2, by = "Resp") str(Cartotal)
Read excel.csv file (save excel file as CSV UTF-8 (cOMMA DELIMITED))
Car1<-readexcel("C:/Sage/CarSurvey_1.xlsx")
str(Car1)
head(Car1,n=5)
Car2<-readexcel("C:/Sage/CarSurvey_2.xlsx")
str(Car2)
head(Car2,n=10)
## Save as CSV write.csv(Cartotal, "Cartotal", row.names=FALSE) #row.names= FALSE PREVENTS R View(Car_total)
CarSurvey1<-readexcel("C:/Sage/CarSurvey1.xlsx") CarSurvey2<-readexcel("C:/Sage/CarSurvey2.xlsx")
summary(Car_total)
mean(Cartotal$Att1)
meanAtt1<-mean(Cartotal$Att1,na.rm=TRUE) print(meanAtt1)
mean(Car_total$Education)
mean(Cartotal$ValuPercp_1)
meanValuPercp1<-mean(Cartotal$ValuPercp1,na.rm=TRUE) print(meanValuPercp_1)
mean(Cartotal$ValuPercp_2)
meanValuPercp2<-mean(Cartotal$ValuPercp2,na.rm=TRUE) print(meanValuPercp_2)
Car_total[is.na(Cartotal$Att1), "Att_1"] <- meanAtt1
Car_total[c(rownames(na_rows)),]
Cartotal$AttMean = (Cartotal$Att1+ Cartotal$Att2) / 2 View(Car_total[c("Att1", "Att2", "Att_Mean")]) #Use this view to check calc.
narows <- Cartotal[is.na(Cartotal$AttMean),] print(na_rows)
meanValuPercp2<-mean(Cartotal$ValuPercp2,na.rm=TRUE) print(meanValuPercp_2)
Car_total[is.na(Cartotal$Att1), "Att_1"] <- meanAtt1
Car_total[c(rownames(na_rows)),]
Cartotal$AttMean = (Cartotal$Att1+ Cartotal$Att2) / 2 View(Car_total[c("Att1", "Att2", "Att_Mean")]) #Use this view to check calc.
ggplot(Cartotal,aes(x=Region, fill= Region))+ themebw()+ geombar()+ geomtext(stat="count", aes(label=..count..), vjust=0)+ labs(y="Number of Cars", x ="Region", title ="Number of Cars by Region")
carregionpercentage <- prop.table(table(Cartotal$Region)) print(carregion_percentage)
Cartotal$Model<-as.factor #transform into categorical variable Cartotal$Region<-as.factor
residencepercentage <- prop.table(table(Cartotal$Residence)) print(residence_percentage)
Cartotal$Model<-as.factor #transform into categorical variable Cartotal$Residence<-as.factor
ggplot(Cartotal,aes(x="Residence Type", y=Residence, fill= Residence))+ geomcol() coord_polar(theta = Residence)
#Group car by make library(stringr) #import library
Cartotal[c('Make', 'Model_v1')] <- strsplitfixed(Cartotal$Model, "", 2)
View(Car_total)
table(Cartotal$Make) count(Cartotal, Cartotal$Make, Cartotal$Model_v1, names = "Freq")
ggplot(Cartotal,aes(x=Region,fill=Make))+ themebw()+ geom_bar()+ labs(y="Number of Cars", title = "Number of Cars per make by Region")
install.packages("dplyr") #install library to access casewhen and mutate functions library(dplyr) #import librabry Cartotal <- Cartotal %>% #call dataframe and create new column with new groupings mutate(Parent = casewhen(Make == "Buick" ~ "General Motors", Make == "Chevrolet" ~ "General Motors", Make == "Chrysler" ~ "Chrysler", Make == "Dodge" ~ "Chrysler", Make == "Fiat" ~ "Chrysler", Make == "Ford" ~ "Ford", Make == "Honda" ~ "Honda", Make == "Kia" ~ "Kia", Make == "Lincoln" ~ "Ford", Make == "Toyota" ~ "Toyota", TRUE ~ "Check"))
count(Cartotal, Cartotal$Make, Cartotal$Parent, name= "Freq") table(Cartotal$Make)
ggplot(Cartotal,aes(x=Region,fill=Parent))+ themebw()+ geom_bar()+ labs(y="number of cars", title = "Number of Cars Presnt Company by Region")
ggplot(Cartotal,aes(x=Region,fill=Parent))+ themebw()+ facetwrap(~Model)+ geombar()+ labs(y="Number of Cars", title = "Number of Cars by Model and Region")
brandregiontable <- aggregate(Att1~Parent+Region, Cartotal, mean) print(brandregiontable)
ggplot(brandregiontable, aes(x=Region, y=Att1, group=Parent)) + geomline(aes(color=Parent))+ geompoint(aes(colour = Parent))+ labs(y="Att1 Mean", title = "Attitude Mean by Parent and Region")
FordAtt1Mean <- brandregiontable %>% filter(Parent == "Ford") head(FordAtt1Mean,n=10) table(FordAtt1Mean$Parent) #Check to ensure new dataframe only includes Ford
ggplot(FordAtt1Mean, aes(x=Region, y=Att1, group=Parent)) + geomline(aes(color=Parent))+ geompoint(aes(color=Parent))+ scaleycontinuous(limits = c(3, 6)) #specify y-axis so it is consistent labs(y="Att1 Mean"), title = "Attitude Mean for Ford and General Motors by Region"
narows <- Cartotal[is.na(Cartotal$Att1),] print(na_rows)
meanResidence<-mean(Car_total$Residence,na.rm = TRUE) print(meanResidence)
Car_total[is.na(Car_total$Residence), "Residence"] <- meanResidence
ggplot(Cartotal,aes(x=Residence, fill = Residence))+ themeclassic()+ geombar()+ geomtext(stat="count", aes(label = ..count..), vjust=0) + labs(y="Number of Consumers", x = "Residence", title = "Number of Consumers who Rent and Own")
ggplot(Cartotal,aes(x=Post-Satis
, fill =
Post-Satis
))+ themeclassic()+ geombar()+
geomtext(stat="count", aes(label=..count..), vjust=0) +
labs(y="Level of Satisfaction")
write.csv(Cartotal, "Project1FausatCartotal.csv")
install.packages("rmarkdown")