Importing the data

getwd()
## [1] "/Users/nikibirkner/Documents/Others/Thesis/Others"
data = read.csv("Raw_Language Attitudes_February23,2020.csv")
data = data[-1,]
data = data[-1,]

Cleaning up the data

new_data = subset(data, Status != "Survey Preview")
new_data = subset(new_data, Q9 != "")
dim(new_data)
## [1] 309  44
for(i in 1:length(new_data$Q14_2_TEXT)){
  if(grepl("Spain", as.character(new_data$Q14_2_TEXT[i]), ignore.case=TRUE) == "TRUE"){
    new_data$Q14_2_TEXT[i] = "Spain"
  }
  if(grepl("Èspaña", as.character(new_data$Q14_2_TEXT[i]), ignore.case=TRUE) == "TRUE"){
    new_data$Q14_2_TEXT[i] = "Spain"
  }
  if(grepl("Madrid", as.character(new_data$Q14_2_TEXT[i]), ignore.case=TRUE) == "TRUE"){
    new_data$Q14_2_TEXT[i] = "Spain"
  }
  if(grepl("Mexico", as.character(new_data$Q14_2_TEXT[i]), ignore.case=TRUE) == "TRUE"){
    new_data$Q14_2_TEXT[i] = "Mexico"
  }
}

v <- c()

for(i in 1:length(new_data$Q14_2_TEXT)){
  if(as.character(new_data$Q14[i]) == "USA"){
    v[i] <- "USA"
  }
  if(as.character(new_data$Q14_2_TEXT[i] == "Spain")){
    v[i] <- "Spain"
  }
  if(as.character(new_data$Q14_2_TEXT[i] == "Mexico")){
    v[i] <- "Mexico"
  }
}

for(i in 1:length(v)){
  if(is.na(v[i])){
    v[i] <- "Other"
  }
}

new_data$Merged <- v

Function to normalize data:

normalize_data <- function(data.frame, question, names){
  table <- table(data.frame$Merged, question)
  df <- data.frame(table)
  df
  names(df) <- c("Country", "Answer", "Frequency")
  df <- df %>% filter(Answer=="Not offensive" | Answer=="Slightly offensive"|Answer==
  "Moderately offensive" | Answer=="Very offensive") %>% arrange(desc(Country))
  totals <- matrix(0, ncol=2, nrow=length(names))
  totals.df <- data.frame(totals)
  names(totals.df) <- c("Country", "Total")
  totals.df$Country <- names
  for(i in 1:length(names)){
    filter <- df %>% filter(Country == names[i]) 
    total <- sum(filter$Frequency)
    totals.df$Total[i] <- total
  } 
  percentage.df <- merge(df, totals.df, by="Country")
  percentage.df$Percentage <- percentage.df$Frequency/percentage.df$Total
  percentage.df
}

Initial plots

Question 1: The Americans shared their opinions in the conference.

library(ggplot2)

Q1_data <- new_data %>% filter(Q1!="")
names <- unique(Q1_data$Merged)

Q1_normalized <- normalize_data(Q1_data, Q1_data$Q1, names)
Q1_normalized <- Q1_normalized %>% filter(Percentage!=0)
Q1_normalized
##    Country               Answer Frequency Total Percentage
## 1   Mexico Moderately offensive         1    18 0.05555556
## 2   Mexico        Not offensive        16    18 0.88888889
## 3   Mexico   Slightly offensive         1    18 0.05555556
## 4    Other Moderately offensive         1    12 0.08333333
## 5    Other        Not offensive        10    12 0.83333333
## 6    Other   Slightly offensive         1    12 0.08333333
## 7    Spain        Not offensive        14    17 0.82352941
## 8    Spain   Slightly offensive         3    17 0.17647059
## 9      USA        Not offensive        30    31 0.96774194
## 10     USA   Slightly offensive         1    31 0.03225806
p<-ggplot(Q1_normalized, aes(x=factor(Answer, levels=c("Extremely offensive", 
  "Moderately offensive", "Slightly offensive", "Not offensive")), 
   y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())

p + labs(title="Question 1", 
  subtitle="The Americans shared their opinions in the conference.") + 
  xlab("Rank") + ylab("Normalized Percentage")

Question 2: We could not be seated at the restaurant. It was flooded with Gringos.

Q2_data <- new_data %>% filter(Q2!="")
names <- unique(Q2_data$Merged)

Q2_normalized <- normalize_data(Q2_data, Q2_data$Q2, names)
Q2_normalized <- Q2_normalized %>% filter(Percentage!=0)
Q2_normalized
##    Country               Answer Frequency Total Percentage
## 1   Mexico Moderately offensive         9    14  0.6428571
## 2   Mexico   Slightly offensive         5    14  0.3571429
## 3    Other Moderately offensive         7    10  0.7000000
## 4    Other        Not offensive         2    10  0.2000000
## 5    Other   Slightly offensive         1    10  0.1000000
## 6    Spain Moderately offensive         4    10  0.4000000
## 7    Spain        Not offensive         1    10  0.1000000
## 8    Spain   Slightly offensive         5    10  0.5000000
## 9      USA Moderately offensive         6    18  0.3333333
## 10     USA        Not offensive         5    18  0.2777778
## 11     USA   Slightly offensive         7    18  0.3888889
p<-ggplot(Q2_normalized, aes(x=factor(Answer, levels=c("Extremely offensive", 
  "Moderately offensive", "Slightly offensive", "Not offensive")), 
  y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())

p + labs(title = "Question 2", 
  subtitle = "We could not be seated at the restaurant. It was flooded with Gringos.") + 
  xlab("Rank") + ylab("Normalized Percentage")

Question 3: The Americans interrupted with their opinions in the conference.

Q3_data <- new_data %>% filter(Q3!="")
names <- unique(Q3_data$Merged)

Q3_normalized <- normalize_data(Q3_data, Q3_data$Q3, names)
Q3_normalized <- Q3_normalized %>% filter(Percentage!=0)
Q3_normalized
##    Country               Answer Frequency Total Percentage
## 1   Mexico Moderately offensive         3    20  0.1500000
## 2   Mexico        Not offensive        11    20  0.5500000
## 3   Mexico   Slightly offensive         6    20  0.3000000
## 4    Other Moderately offensive         3    14  0.2142857
## 5    Other        Not offensive         5    14  0.3571429
## 6    Other   Slightly offensive         6    14  0.4285714
## 7    Spain Moderately offensive         4    15  0.2666667
## 8    Spain        Not offensive         7    15  0.4666667
## 9    Spain   Slightly offensive         4    15  0.2666667
## 10     USA Moderately offensive         4    27  0.1481481
## 11     USA        Not offensive        16    27  0.5925926
## 12     USA   Slightly offensive         7    27  0.2592593
p<-ggplot(Q3_normalized, aes(x=factor(Answer, levels=c("Extremely offensive", 
  "Moderately offensive", "Slightly offensive", "Not offensive")), 
  y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())

p + labs(title="Question 3", 
  subtitle="The Americans interrupted with their opinions in the conference.") + 
  xlab("Rank") + ylab("Normalized Percentage")

Question 4: We could not be seated at the restaurant. It was filled with Gringos.

Q4_data <- new_data %>% filter(Q4!="")
names <- unique(Q4_data$Merged)

Q4_normalized <- normalize_data(Q4_data, Q4_data$Q4, names)
Q4_normalized <- Q4_normalized %>% filter(Percentage!=0)
Q4_normalized
##    Country               Answer Frequency Total Percentage
## 1   Mexico Moderately offensive         6    17  0.3529412
## 2   Mexico        Not offensive         4    17  0.2352941
## 3   Mexico   Slightly offensive         7    17  0.4117647
## 4    Other Moderately offensive         4    11  0.3636364
## 5    Other   Slightly offensive         7    11  0.6363636
## 6    Spain Moderately offensive         4     8  0.5000000
## 7    Spain   Slightly offensive         4     8  0.5000000
## 8      USA Moderately offensive        13    20  0.6500000
## 9      USA        Not offensive         1    20  0.0500000
## 10     USA   Slightly offensive         6    20  0.3000000
p<-ggplot(Q4_normalized, aes(x=factor(Answer, levels=c("Extremely offensive", 
   "Moderately offensive", "Slightly offensive", "Not offensive")), 
   y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())

p + labs(title="Question 4", 
  subtitle="We could not be seated at the restaurant. It was filled with Gringos.") + 
  xlab("Rank") + ylab("Normalized Percentage")

Question 5: The Gringos shared their opinions in the conference.

Q5_data <- new_data %>% filter(Q5!="")
names <- unique(Q5_data$Merged)

Q5_normalized <- normalize_data(Q5_data, Q5_data$Q5, names)
Q5_normalized <- Q5_normalized %>% filter(Percentage!=0)
Q5_normalized
##    Country               Answer Frequency Total Percentage
## 1   Mexico Moderately offensive         9    20 0.45000000
## 2   Mexico        Not offensive         4    20 0.20000000
## 3   Mexico   Slightly offensive         7    20 0.35000000
## 4    Other Moderately offensive         4    15 0.26666667
## 5    Other        Not offensive         4    15 0.26666667
## 6    Other   Slightly offensive         7    15 0.46666667
## 7    Spain Moderately offensive         9    13 0.69230769
## 8    Spain        Not offensive         1    13 0.07692308
## 9    Spain   Slightly offensive         3    13 0.23076923
## 10     USA Moderately offensive         6    17 0.35294118
## 11     USA        Not offensive         3    17 0.17647059
## 12     USA   Slightly offensive         8    17 0.47058824
p<-ggplot(Q5_normalized, aes(x=factor(Answer, levels=c("Extremely offensive", 
  "Moderately offensive", "Slightly offensive", "Not offensive")), 
  y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())
p + labs(title="Question 5", 
  subtitle="The Gringos shared their opinions in the conference.") + 
  xlab("Rank") + ylab("Normalized Percentage")

Question 6: We could not be seated at the restaurant. It was flooded with Americans.

Q6_data <- new_data %>% filter(Q6!="")
names <- unique(Q6_data$Merged)

Q6_normalized <- normalize_data(Q6_data, Q6_data$Q6, names)
Q6_normalized <- Q6_normalized %>% filter(Percentage!=0)
Q6_normalized
##    Country               Answer Frequency Total Percentage
## 1   Mexico Moderately offensive         3    18  0.1666667
## 2   Mexico        Not offensive         6    18  0.3333333
## 3   Mexico   Slightly offensive         9    18  0.5000000
## 4    Other Moderately offensive         5    13  0.3846154
## 5    Other        Not offensive         4    13  0.3076923
## 6    Other   Slightly offensive         4    13  0.3076923
## 7    Spain Moderately offensive         2    11  0.1818182
## 8    Spain        Not offensive         2    11  0.1818182
## 9    Spain   Slightly offensive         7    11  0.6363636
## 10     USA Moderately offensive         7    25  0.2800000
## 11     USA        Not offensive         9    25  0.3600000
## 12     USA   Slightly offensive         9    25  0.3600000
p<-ggplot(Q6_normalized, aes(x=factor(Answer, levels=c("Extremely offensive", 
  "Moderately offensive", "Slightly offensive", "Not offensive")), 
  y=Percentage, fill=Country)) + geom_bar(stat = "identity", position = position_dodge())

p + labs(title = "Question 6", 
  subtitle = "We could not be seated at the restaurant. It was flooded with Americans.") + 
  xlab("Rank") + ylab("Normalized Percentage")

Question 7: The Gringos interrupted with their opinions in the conference.

Q7_data <- new_data %>% filter(Q7!="")
names <- unique(Q7_data$Merged)

Q7_normalized <- normalize_data(Q7_data, Q7_data$Q7, names)
Q7_normalized <- Q7_normalized %>% filter(Percentage!=0)
Q7_normalized
##    Country               Answer Frequency Total Percentage
## 1   Mexico Moderately offensive         5    14 0.35714286
## 2   Mexico        Not offensive         1    14 0.07142857
## 3   Mexico   Slightly offensive         8    14 0.57142857
## 4    Other Moderately offensive         5     8 0.62500000
## 5    Other        Not offensive         1     8 0.12500000
## 6    Other   Slightly offensive         2     8 0.25000000
## 7    Spain Moderately offensive         9    13 0.69230769
## 8    Spain        Not offensive         1    13 0.07692308
## 9    Spain   Slightly offensive         3    13 0.23076923
## 10     USA Moderately offensive         4    18 0.22222222
## 11     USA        Not offensive         4    18 0.22222222
## 12     USA   Slightly offensive        10    18 0.55555556
p<-ggplot(Q7_normalized, aes(x=factor(Answer, levels=c("Extremely offensive", 
  "Moderately offensive", "Slightly offensive", "Not offensive")), 
  y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())

p + labs(title="Question 7", 
  subtitle="The Gringos interrupted with their opinions in the conference.") + 
  xlab("Rank") + ylab("Normalized Percentage")

Question 8: We could not be seated at the restaurant. It was filled with Americans.

Q8_data <- new_data %>% filter(Q8!="")
names <- unique(Q8_data$Merged)

Q8_normalized <- normalize_data(Q8_data, Q8_data$Q8, names)
Q8_normalized <- Q8_normalized %>% filter(Percentage!=0)
Q8_normalized
##    Country               Answer Frequency Total Percentage
## 1   Mexico Moderately offensive         5    16  0.3125000
## 2   Mexico        Not offensive         6    16  0.3750000
## 3   Mexico   Slightly offensive         5    16  0.3125000
## 4    Other        Not offensive         2     6  0.3333333
## 5    Other   Slightly offensive         4     6  0.6666667
## 6    Spain Moderately offensive         3    12  0.2500000
## 7    Spain        Not offensive         2    12  0.1666667
## 8    Spain   Slightly offensive         7    12  0.5833333
## 9      USA Moderately offensive        13    27  0.4814815
## 10     USA        Not offensive         7    27  0.2592593
## 11     USA   Slightly offensive         7    27  0.2592593
p<-ggplot(Q8_normalized, aes(x=factor(Answer, levels=c("Extremely offensive", 
  "Moderately offensive", "Slightly offensive", "Not offensive")), 
  y=Percentage, fill=Country)) + geom_bar(stat="identity", position=position_dodge())

p + labs(title="Question 8", 
  subtitle="We could not be seated at the restaurant. It was filled with Americans.") + 
  xlab("Rank") + ylab("Normalized Percentage")