getwd()
## [1] "/Users/nikibirkner/Documents/Others/Thesis/Others"
data = read.csv("Raw_Language Attitudes_February23,2020.csv")
data = data[-1,]
data = data[-1,]
new_data = subset(data, Status != "Survey Preview")
new_data = subset(new_data, Q9 != "")
dim(new_data)
## [1] 309 44
for(i in 1:length(new_data$Q14_2_TEXT)){
if(grepl("Spain", as.character(new_data$Q14_2_TEXT[i]), ignore.case=TRUE) == "TRUE"){
new_data$Q14_2_TEXT[i] = "Spain"
}
if(grepl("Èspaña", as.character(new_data$Q14_2_TEXT[i]), ignore.case=TRUE) == "TRUE"){
new_data$Q14_2_TEXT[i] = "Spain"
}
if(grepl("Madrid", as.character(new_data$Q14_2_TEXT[i]), ignore.case=TRUE) == "TRUE"){
new_data$Q14_2_TEXT[i] = "Spain"
}
if(grepl("Mexico", as.character(new_data$Q14_2_TEXT[i]), ignore.case=TRUE) == "TRUE"){
new_data$Q14_2_TEXT[i] = "Mexico"
}
}
v <- c()
for(i in 1:length(new_data$Q14_2_TEXT)){
if(as.character(new_data$Q14[i]) == "USA"){
v[i] <- "USA"
}
if(as.character(new_data$Q14_2_TEXT[i] == "Spain")){
v[i] <- "Spain"
}
if(as.character(new_data$Q14_2_TEXT[i] == "Mexico")){
v[i] <- "Mexico"
}
}
for(i in 1:length(v)){
if(is.na(v[i])){
v[i] <- "Other"
}
}
new_data$Merged <- v
Function to normalize data:
normalize_data <- function(data.frame, question, names){
table <- table(data.frame$Merged, question)
df <- data.frame(table)
df
names(df) <- c("Country", "Answer", "Frequency")
df <- df %>% filter(Answer=="Not offensive" | Answer=="Slightly offensive"|Answer==
"Moderately offensive" | Answer=="Very offensive") %>% arrange(desc(Country))
totals <- matrix(0, ncol=2, nrow=length(names))
totals.df <- data.frame(totals)
names(totals.df) <- c("Country", "Total")
totals.df$Country <- names
for(i in 1:length(names)){
filter <- df %>% filter(Country == names[i])
total <- sum(filter$Frequency)
totals.df$Total[i] <- total
}
percentage.df <- merge(df, totals.df, by="Country")
percentage.df$Percentage <- percentage.df$Frequency/percentage.df$Total
percentage.df
}
Q2_data <- new_data %>% filter(Q2!="")
names <- unique(Q2_data$Merged)
Q2_normalized <- normalize_data(Q2_data, Q2_data$Q2, names)
Q2_normalized <- Q2_normalized %>% filter(Percentage!=0)
Q2_normalized
## Country Answer Frequency Total Percentage
## 1 Mexico Moderately offensive 9 14 0.6428571
## 2 Mexico Slightly offensive 5 14 0.3571429
## 3 Other Moderately offensive 7 10 0.7000000
## 4 Other Not offensive 2 10 0.2000000
## 5 Other Slightly offensive 1 10 0.1000000
## 6 Spain Moderately offensive 4 10 0.4000000
## 7 Spain Not offensive 1 10 0.1000000
## 8 Spain Slightly offensive 5 10 0.5000000
## 9 USA Moderately offensive 6 18 0.3333333
## 10 USA Not offensive 5 18 0.2777778
## 11 USA Slightly offensive 7 18 0.3888889
p<-ggplot(Q2_normalized, aes(x=factor(Answer, levels=c("Extremely offensive",
"Moderately offensive", "Slightly offensive", "Not offensive")),
y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())
p + labs(title = "Question 2",
subtitle = "We could not be seated at the restaurant. It was flooded with Gringos.") +
xlab("Rank") + ylab("Normalized Percentage")
Q3_data <- new_data %>% filter(Q3!="")
names <- unique(Q3_data$Merged)
Q3_normalized <- normalize_data(Q3_data, Q3_data$Q3, names)
Q3_normalized <- Q3_normalized %>% filter(Percentage!=0)
Q3_normalized
## Country Answer Frequency Total Percentage
## 1 Mexico Moderately offensive 3 20 0.1500000
## 2 Mexico Not offensive 11 20 0.5500000
## 3 Mexico Slightly offensive 6 20 0.3000000
## 4 Other Moderately offensive 3 14 0.2142857
## 5 Other Not offensive 5 14 0.3571429
## 6 Other Slightly offensive 6 14 0.4285714
## 7 Spain Moderately offensive 4 15 0.2666667
## 8 Spain Not offensive 7 15 0.4666667
## 9 Spain Slightly offensive 4 15 0.2666667
## 10 USA Moderately offensive 4 27 0.1481481
## 11 USA Not offensive 16 27 0.5925926
## 12 USA Slightly offensive 7 27 0.2592593
p<-ggplot(Q3_normalized, aes(x=factor(Answer, levels=c("Extremely offensive",
"Moderately offensive", "Slightly offensive", "Not offensive")),
y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())
p + labs(title="Question 3",
subtitle="The Americans interrupted with their opinions in the conference.") +
xlab("Rank") + ylab("Normalized Percentage")
Q4_data <- new_data %>% filter(Q4!="")
names <- unique(Q4_data$Merged)
Q4_normalized <- normalize_data(Q4_data, Q4_data$Q4, names)
Q4_normalized <- Q4_normalized %>% filter(Percentage!=0)
Q4_normalized
## Country Answer Frequency Total Percentage
## 1 Mexico Moderately offensive 6 17 0.3529412
## 2 Mexico Not offensive 4 17 0.2352941
## 3 Mexico Slightly offensive 7 17 0.4117647
## 4 Other Moderately offensive 4 11 0.3636364
## 5 Other Slightly offensive 7 11 0.6363636
## 6 Spain Moderately offensive 4 8 0.5000000
## 7 Spain Slightly offensive 4 8 0.5000000
## 8 USA Moderately offensive 13 20 0.6500000
## 9 USA Not offensive 1 20 0.0500000
## 10 USA Slightly offensive 6 20 0.3000000
p<-ggplot(Q4_normalized, aes(x=factor(Answer, levels=c("Extremely offensive",
"Moderately offensive", "Slightly offensive", "Not offensive")),
y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())
p + labs(title="Question 4",
subtitle="We could not be seated at the restaurant. It was filled with Gringos.") +
xlab("Rank") + ylab("Normalized Percentage")
Q6_data <- new_data %>% filter(Q6!="")
names <- unique(Q6_data$Merged)
Q6_normalized <- normalize_data(Q6_data, Q6_data$Q6, names)
Q6_normalized <- Q6_normalized %>% filter(Percentage!=0)
Q6_normalized
## Country Answer Frequency Total Percentage
## 1 Mexico Moderately offensive 3 18 0.1666667
## 2 Mexico Not offensive 6 18 0.3333333
## 3 Mexico Slightly offensive 9 18 0.5000000
## 4 Other Moderately offensive 5 13 0.3846154
## 5 Other Not offensive 4 13 0.3076923
## 6 Other Slightly offensive 4 13 0.3076923
## 7 Spain Moderately offensive 2 11 0.1818182
## 8 Spain Not offensive 2 11 0.1818182
## 9 Spain Slightly offensive 7 11 0.6363636
## 10 USA Moderately offensive 7 25 0.2800000
## 11 USA Not offensive 9 25 0.3600000
## 12 USA Slightly offensive 9 25 0.3600000
p<-ggplot(Q6_normalized, aes(x=factor(Answer, levels=c("Extremely offensive",
"Moderately offensive", "Slightly offensive", "Not offensive")),
y=Percentage, fill=Country)) + geom_bar(stat = "identity", position = position_dodge())
p + labs(title = "Question 6",
subtitle = "We could not be seated at the restaurant. It was flooded with Americans.") +
xlab("Rank") + ylab("Normalized Percentage")
Q7_data <- new_data %>% filter(Q7!="")
names <- unique(Q7_data$Merged)
Q7_normalized <- normalize_data(Q7_data, Q7_data$Q7, names)
Q7_normalized <- Q7_normalized %>% filter(Percentage!=0)
Q7_normalized
## Country Answer Frequency Total Percentage
## 1 Mexico Moderately offensive 5 14 0.35714286
## 2 Mexico Not offensive 1 14 0.07142857
## 3 Mexico Slightly offensive 8 14 0.57142857
## 4 Other Moderately offensive 5 8 0.62500000
## 5 Other Not offensive 1 8 0.12500000
## 6 Other Slightly offensive 2 8 0.25000000
## 7 Spain Moderately offensive 9 13 0.69230769
## 8 Spain Not offensive 1 13 0.07692308
## 9 Spain Slightly offensive 3 13 0.23076923
## 10 USA Moderately offensive 4 18 0.22222222
## 11 USA Not offensive 4 18 0.22222222
## 12 USA Slightly offensive 10 18 0.55555556
p<-ggplot(Q7_normalized, aes(x=factor(Answer, levels=c("Extremely offensive",
"Moderately offensive", "Slightly offensive", "Not offensive")),
y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())
p + labs(title="Question 7",
subtitle="The Gringos interrupted with their opinions in the conference.") +
xlab("Rank") + ylab("Normalized Percentage")
Q8_data <- new_data %>% filter(Q8!="")
names <- unique(Q8_data$Merged)
Q8_normalized <- normalize_data(Q8_data, Q8_data$Q8, names)
Q8_normalized <- Q8_normalized %>% filter(Percentage!=0)
Q8_normalized
## Country Answer Frequency Total Percentage
## 1 Mexico Moderately offensive 5 16 0.3125000
## 2 Mexico Not offensive 6 16 0.3750000
## 3 Mexico Slightly offensive 5 16 0.3125000
## 4 Other Not offensive 2 6 0.3333333
## 5 Other Slightly offensive 4 6 0.6666667
## 6 Spain Moderately offensive 3 12 0.2500000
## 7 Spain Not offensive 2 12 0.1666667
## 8 Spain Slightly offensive 7 12 0.5833333
## 9 USA Moderately offensive 13 27 0.4814815
## 10 USA Not offensive 7 27 0.2592593
## 11 USA Slightly offensive 7 27 0.2592593
p<-ggplot(Q8_normalized, aes(x=factor(Answer, levels=c("Extremely offensive",
"Moderately offensive", "Slightly offensive", "Not offensive")),
y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())
p + labs(title="Question 8",
subtitle="We could not be seated at the restaurant. It was filled with Americans.") +
xlab("Rank") + ylab("Normalized Percentage")