#knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(moments)
library(tidyverse) # data manipulation
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.4 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
#remove.packages("rlang")
#install.packages("rlang")
#getwd()
library(readxl)
Starbucks_satisfactory_survey <- read_excel("Starbucks satisfactory survey.xlsx")
#data <- read_excel("C:/Users/junol/OneDrive/2 Harrisburg University/Semester 5_2020 Late Fall - fee, transcript/525 Andrea L'Huillier, Thu 730PM/Term Project/Starbucks satisfactory survey.xlsx",
# col_types = c("text", "text", "text",
# "text", "text", "text", "text", "text",
# "text", "text", "text", "text", "numeric",
# "numeric", "numeric", "numeric",
# "numeric", "numeric", "numeric",
# "text", "text"))
data <- Starbucks_satisfactory_survey
View(data)
glimpse(data)
## Rows: 122
## Columns: 21
## $ Timestamp <chr> ...
## $ `1. Your Gender` <chr> ...
## $ `2. Your Age` <chr> ...
## $ `3. Are you currently....?` <chr> ...
## $ `4. What is your annual income?` <chr> ...
## $ `5. How often do you visit Starbucks?` <chr> ...
## $ `6. How do you usually enjoy Starbucks?` <chr> ...
## $ `7. How much time do you normally spend during your visit?` <chr> ...
## $ `8. The nearest Starbucks's outlet to you is...?` <chr> ...
## $ `9. Do you have Starbucks membership card?` <chr> ...
## $ `10. What do you most frequently purchase at Starbucks?` <chr> ...
## $ `11. On average, how much would you spend at Starbucks per visit?` <chr> ...
## $ `12. How would you rate the quality of Starbucks compared to other brands (Coffee Bean, Old Town White Coffee..) to be:` <dbl> ...
## $ `13. How would you rate the price range at Starbucks?` <dbl> ...
## $ `14. How important are sales and promotions in your purchase decision?` <dbl> ...
## $ `15. How would you rate the ambiance at Starbucks? (lighting, music, etc...)` <dbl> ...
## $ `16. You rate the WiFi quality at Starbucks as..` <dbl> ...
## $ `17. How would you rate the service at Starbucks? (Promptness, friendliness, etc..)` <dbl> ...
## $ `18. How likely you will choose Starbucks for doing business meetings or hangout with friends?` <dbl> ...
## $ `19. How do you come to hear of promotions at Starbucks? Check all that apply.` <chr> ...
## $ `20. Will you continue buying at Starbucks?` <chr> ...
##Step 1. Check for NA’s in the dataset and look for the data types and summary
sum(is.na(data))
## [1] 0
glimpse(data)
## Rows: 122
## Columns: 21
## $ Timestamp <chr> ...
## $ `1. Your Gender` <chr> ...
## $ `2. Your Age` <chr> ...
## $ `3. Are you currently....?` <chr> ...
## $ `4. What is your annual income?` <chr> ...
## $ `5. How often do you visit Starbucks?` <chr> ...
## $ `6. How do you usually enjoy Starbucks?` <chr> ...
## $ `7. How much time do you normally spend during your visit?` <chr> ...
## $ `8. The nearest Starbucks's outlet to you is...?` <chr> ...
## $ `9. Do you have Starbucks membership card?` <chr> ...
## $ `10. What do you most frequently purchase at Starbucks?` <chr> ...
## $ `11. On average, how much would you spend at Starbucks per visit?` <chr> ...
## $ `12. How would you rate the quality of Starbucks compared to other brands (Coffee Bean, Old Town White Coffee..) to be:` <dbl> ...
## $ `13. How would you rate the price range at Starbucks?` <dbl> ...
## $ `14. How important are sales and promotions in your purchase decision?` <dbl> ...
## $ `15. How would you rate the ambiance at Starbucks? (lighting, music, etc...)` <dbl> ...
## $ `16. You rate the WiFi quality at Starbucks as..` <dbl> ...
## $ `17. How would you rate the service at Starbucks? (Promptness, friendliness, etc..)` <dbl> ...
## $ `18. How likely you will choose Starbucks for doing business meetings or hangout with friends?` <dbl> ...
## $ `19. How do you come to hear of promotions at Starbucks? Check all that apply.` <chr> ...
## $ `20. Will you continue buying at Starbucks?` <chr> ...
summary(data)
## Timestamp 1. Your Gender 2. Your Age
## Length:122 Length:122 Length:122
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## 3. Are you currently....? 4. What is your annual income?
## Length:122 Length:122
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## 5. How often do you visit Starbucks? 6. How do you usually enjoy Starbucks?
## Length:122 Length:122
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## 7. How much time do you normally spend during your visit?
## Length:122
## Class :character
## Mode :character
##
##
##
## 8. The nearest Starbucks's outlet to you is...?
## Length:122
## Class :character
## Mode :character
##
##
##
## 9. Do you have Starbucks membership card?
## Length:122
## Class :character
## Mode :character
##
##
##
## 10. What do you most frequently purchase at Starbucks?
## Length:122
## Class :character
## Mode :character
##
##
##
## 11. On average, how much would you spend at Starbucks per visit?
## Length:122
## Class :character
## Mode :character
##
##
##
## 12. How would you rate the quality of Starbucks compared to other brands (Coffee Bean, Old Town White Coffee..) to be:
## Min. :1.000
## 1st Qu.:3.000
## Median :4.000
## Mean :3.664
## 3rd Qu.:4.000
## Max. :5.000
## 13. How would you rate the price range at Starbucks?
## Min. :1.000
## 1st Qu.:2.000
## Median :3.000
## Mean :2.893
## 3rd Qu.:4.000
## Max. :5.000
## 14. How important are sales and promotions in your purchase decision?
## Min. :1.000
## 1st Qu.:3.000
## Median :4.000
## Mean :3.795
## 3rd Qu.:5.000
## Max. :5.000
## 15. How would you rate the ambiance at Starbucks? (lighting, music, etc...)
## Min. :1.000
## 1st Qu.:3.000
## Median :4.000
## Mean :3.754
## 3rd Qu.:4.000
## Max. :5.000
## 16. You rate the WiFi quality at Starbucks as..
## Min. :1.000
## 1st Qu.:3.000
## Median :3.000
## Mean :3.254
## 3rd Qu.:4.000
## Max. :5.000
## 17. How would you rate the service at Starbucks? (Promptness, friendliness, etc..)
## Min. :1.000
## 1st Qu.:3.000
## Median :4.000
## Mean :3.746
## 3rd Qu.:4.000
## Max. :5.000
## 18. How likely you will choose Starbucks for doing business meetings or hangout with friends?
## Min. :1.000
## 1st Qu.:3.000
## Median :4.000
## Mean :3.516
## 3rd Qu.:4.000
## Max. :5.000
## 19. How do you come to hear of promotions at Starbucks? Check all that apply.
## Length:122
## Class :character
## Mode :character
##
##
##
## 20. Will you continue buying at Starbucks?
## Length:122
## Class :character
## Mode :character
##
##
##
##Step 2. Convert variables to categorical variables
data$`1. Your Gender` = factor(data$`1. Your Gender`,
levels = c("Male", "Female") ,
labels = c("M", "F"))
data$`2. Your Age` = factor(data$`2. Your Age`,
levels = c("<20", ">40", "20-29", "30-39") ,
labels = c("1", "2", "3", "4"))
data$`3. Are you currently....?` = factor(data$`3. Are you currently....?`,
levels = c("Employed", "Housewife", "Self-employed", "Student") ,
labels = c("E", "H", "SE", "S"))
data$`4. What is your annual income?` = factor(data$`4. What is your annual income?`,
levels = c("<25,000", "25,000-50,000", "50,000-100,000", "100,000-150,000", ">150,000"),
labels = c("1", "2", "3", "4", "S"))
sum(is.na(data$`4. What is your annual income?`))
## [1] 0
glimpse(data)
## Rows: 122
## Columns: 21
## $ Timestamp <chr> ...
## $ `1. Your Gender` <fct> ...
## $ `2. Your Age` <fct> ...
## $ `3. Are you currently....?` <fct> ...
## $ `4. What is your annual income?` <fct> ...
## $ `5. How often do you visit Starbucks?` <chr> ...
## $ `6. How do you usually enjoy Starbucks?` <chr> ...
## $ `7. How much time do you normally spend during your visit?` <chr> ...
## $ `8. The nearest Starbucks's outlet to you is...?` <chr> ...
## $ `9. Do you have Starbucks membership card?` <chr> ...
## $ `10. What do you most frequently purchase at Starbucks?` <chr> ...
## $ `11. On average, how much would you spend at Starbucks per visit?` <chr> ...
## $ `12. How would you rate the quality of Starbucks compared to other brands (Coffee Bean, Old Town White Coffee..) to be:` <dbl> ...
## $ `13. How would you rate the price range at Starbucks?` <dbl> ...
## $ `14. How important are sales and promotions in your purchase decision?` <dbl> ...
## $ `15. How would you rate the ambiance at Starbucks? (lighting, music, etc...)` <dbl> ...
## $ `16. You rate the WiFi quality at Starbucks as..` <dbl> ...
## $ `17. How would you rate the service at Starbucks? (Promptness, friendliness, etc..)` <dbl> ...
## $ `18. How likely you will choose Starbucks for doing business meetings or hangout with friends?` <dbl> ...
## $ `19. How do you come to hear of promotions at Starbucks? Check all that apply.` <chr> ...
## $ `20. Will you continue buying at Starbucks?` <chr> ...
view(data)
skewness(data[13])
## 12. How would you rate the quality of Starbucks compared to other brands (Coffee Bean, Old Town White Coffee..) to be:
## -0.4802077
kurtosis(data[13])
## 12. How would you rate the quality of Starbucks compared to other brands (Coffee Bean, Old Town White Coffee..) to be:
## 3.118867
#approximately symmetric distribution
##Step 2. Visualization, correlation plot? I’m having trouble here. The data type for data[13] is numeric, but my R keeps telling me “Error in density.default(data[13]) : argument ‘x’ must be numeric”. Would be great if you guys can help to debug.
Also I get error message for boxplot(): Error in stats::model.frame.default(formula = data[13] ~ data[20], data = data) : invalid type (list) for variable ‘data[13]’
#str(data[13])
plot(density(data$`13. How would you rate the price range at Starbucks?`), main = "Price Range rating density plot", xlab = "Price Range")
hist(data$`13. How would you rate the price range at Starbucks?`, xlab = "Price range rating", breaks = 25, main = "How would you rate price range at Starbucks")
boxplot(data$`13. How would you rate the price range at Starbucks?` ~ data$`20. Will you continue buying at Starbucks?`, data = data, xlab = "Will you continue buying at Starbucks", ylab = "Price range rating", main = "Price Range - Continue buying at Starbucks")
##Step 3. Logistic Regression
data$`22 Factor for 13` <- factor(data$`13. How would you rate the price range at Starbucks?`)
typeof(data$`13. How would you rate the price range at Starbucks?`)
## [1] "double"
typeof(data$`22 Factor for 13`)
## [1] "integer"
#logit <- glm(`20. Will you continue buying at Starbucks?` ~ `13. How would you rate the price range at Starbucks?`, data = data)
data$`23. Continue Buying` <- if_else(data$`20. Will you continue buying at Starbucks?` == "Yes", 1, 0)
typeof(data$`23. Continue Buying`)
## [1] "double"
data$`23. Continue Buying` <- as.integer(data$`23. Continue Buying`)
typeof(data$`23. Continue Buying`)
## [1] "integer"
table(data$`23. Continue Buying`)
##
## 0 1
## 28 94
#two-way contingency table
xtabs(~`23. Continue Buying` + `22 Factor for 13`, data = data )
## 22 Factor for 13
## 23. Continue Buying 1 2 3 4 5
## 0 8 13 6 1 0
## 1 6 14 42 23 9
logit <- glm(`22 Factor for 13` ~ `23. Continue Buying`, data = data, family = "binomial")
##Step 4. Monte Carlo Simulation?