ProblemSet12

Make a new column based on subsetting or grouping the original data. Use string searches to help with this.

Fosen <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Fosen-tweets.csv")
SamiRights <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/SamiRights-tweets.csv")
StandwithSápmi <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/StandWithSápmi-tweets.csv")
Sami<- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/Sami-tweets.csv")

Sápmi <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/sápmi-tweets.csv")

WindMills <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/WindFarms-tweets.csv")

IndigenousPeoples <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/IndigenousPeoples-tweets.csv")

Samilandrights <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/saamilandrights-tweets.csv")


Samilandrights1 <-Samilandrights
IndigenousPeoples1 <- IndigenousPeoples
WindMills1 <- WindMills
Sápmi1 <- Sápmi
Sami1 <- Sami
StandwithSápmi1 <- StandwithSápmi
SamiRights1 <- SamiRights
Fosen1 <- Fosen

#str(Samilandrights1)
#str(IndigenousPeoples1)
#str(WindMills1)
#str(Sápmi1)
#str(StandwithSápmi1)
#str(SamiRights1)
#str(Fosen1)

# Split "date" into two columns that will separate the time and the date values 
# Tag 1 
Fosen1 <- Fosen1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# Fosen1[c("date1", "time")] <-sapply(strsplit(Fosen1$date, " "),c)


Fosen1$time <- substr(Fosen1$time, 1, 8)
# Remove original datetime column

### Tag 2 

SamiRights1 <- SamiRights1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# SamiRights1[c("date1", "time")] <-sapply(strsplit(SamiRights1$date, " "),c)


SamiRights1$time <- substr(SamiRights1$time, 1, 8)
# Remove original datetime column

# Tag 3 
StandwithSápmi1 <- StandwithSápmi1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# StandwithSápmi1[c("date1", "time")] <-sapply(strsplit(StandwithSápmi1$date, " "),c)


StandwithSápmi1$time <- substr(StandwithSápmi1$time, 1, 8)
# Remove original datetime column


# Tag 4 
Sami1 <- Sami1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# Sami1[c("date1", "time")] <-sapply(strsplit(Sami1$date, " "),c)


Sami1$time <- substr(Sami1$time, 1, 8)
# Remove original datetime column


# Tag 5 
Sápmi1 <- Sápmi1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# Sápmi1[c("date1", "time")] <-sapply(strsplit(Sápmi1$date, " "),c)


Sápmi1$time <- substr(Sápmi1$time, 1, 8)
# Remove original datetime column


# Tag 6 
WindMills1  <- WindMills1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# WindMills1 [c("date1", "time")] <-sapply(strsplit(WindMills1 $date, " "),c)


WindMills1 $time <- substr(WindMills1 $time, 1, 8)
# Remove original datetime column



# Tag 7 
IndigenousPeoples1  <- IndigenousPeoples1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# IndigenousPeoples1 [c("date1", "time")] <-sapply(strsplit(IndigenousPeoples1$date, " "),c)


IndigenousPeoples1$time <- substr(IndigenousPeoples1$time, 1, 8)
# Remove original datetime column


# Tag 8 

Samilandrights1 <- Samilandrights1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# Samilandrights1 [c("date1", "time")] <-sapply(strsplit(Samilandrights1$date, " "),c)


Samilandrights1$time <- substr(Samilandrights1$time, 1, 8)
# Remove original datetime column

Convert ‘content’ txt column to lowercase

Fosen1$content <- tolower(Fosen1$content)
Sami1$content <- tolower(Sami1$content)
Samilandrights1$content <- tolower(Samilandrights1$content)
SamiRights1$content <- tolower(SamiRights1$content)
IndigenousPeoples1$content <- tolower(IndigenousPeoples1$content)
WindMills1$content <- tolower(WindMills1$content)
Sápmi1$content <- tolower(Sápmi1$content)
StandwithSápmi1$content <- tolower(StandwithSápmi1$content)

# Defined the keywords to search for
keywords <- c("statsministeren", "politiet", "protestors", "protesting", "windmillparks", "vindmølleparker", "distriktspolitiet", "solidarity", "vindmøllene", "regjeringen", "government")

# Defined countries to search for in the tweets
country <- c("Finland", "America", "Norway", "Sweden", "Fosen", "Norwegian", "American", "Swedish", "Finish")

# Create a new topic column based on the matches for the different tags dataset

matches_fosen <- sapply(keywords, grepl, x = Fosen1$content, ignore.case = TRUE)
Fosen1$topic <- ifelse(rowSums(matches_fosen) > 0, colnames(matches_fosen)[max.col(matches_fosen)], "Other")

matches_sami <- sapply(keywords, grepl, x = Sami1$content, ignore.case = TRUE)
Sami1$topic <- ifelse(rowSums(matches_sami) > 0, colnames(matches_sami)[max.col(matches_sami)], "Other")

matches_samilandrights <- sapply(keywords, grepl, x = Samilandrights1$content, ignore.case = TRUE)
Samilandrights1$topic <- ifelse(rowSums(matches_samilandrights) > 0, colnames(matches_samilandrights)[max.col(matches_samilandrights)], "Other")

matches_samirights <- sapply(keywords, grepl, x = SamiRights1$content, ignore.case = TRUE)
SamiRights1$topic <- ifelse(rowSums(matches_samirights) > 0, colnames(matches_samirights)[max.col(matches_samirights)], "Other")

matches_indigenous <- sapply(keywords, grepl, x = IndigenousPeoples1$content, ignore.case = TRUE)
IndigenousPeoples1$topic <- ifelse(rowSums(matches_indigenous) > 0, colnames(matches_indigenous)[max.col(matches_indigenous)], "Other")

matches_windmills <- sapply(keywords, grepl, x = WindMills1$content, ignore.case = TRUE)
WindMills1$topic <- ifelse(rowSums(matches_windmills) > 0, colnames(matches_windmills)[max.col(matches_windmills)], "Other")

matches_sapmi <- sapply(keywords, grepl, x = Sápmi1$content, ignore.case = TRUE)
Sápmi1$topic <- ifelse(rowSums(matches_sapmi) > 0, colnames(matches_sapmi)[max.col(matches_sapmi)], "Other")

matches_standwithsapmi <- sapply(keywords, grepl, x = StandwithSápmi1$content, ignore.case = TRUE)
StandwithSápmi1$topic <- ifelse(rowSums(matches_standwithsapmi) > 0, colnames(matches_standwithsapmi)[max.col(matches_standwithsapmi)], "Other")

Pivot all or part of the dataframe into either wide or long format.

# Use pivot_wider to convert the dataframe to wide format
Fosen1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
Sami1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
Samilandrights1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
SamiRights1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
IndigenousPeoples1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
WindMills1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
Sápmi1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
StandwithSápmi1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")

Create a custom stopword list and augment with existing Norwegian stopword list

#knitr::opts_chunk$set(echo = TRUE) Using this arugment did not help present the data in nice columns like you can have in the markdown before knitting. 


library(tidytext)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Define your custom stop words
my_stopwords_en <- c("the", "and", "or", "but", "a", "an", "to", "of", "in", "is", "this", "that", "it", "with", "for", "https", "http", "t.co")

# Create a new data frame with the custom stop words
my_stopwords_en_df <- data.frame(word = my_stopwords_en, language = "en")

no_stopwords <- stopwords::stopwords(language = "norwegian")


no_stopwords_df <- data.frame(word = no_stopwords, lexicon = "no_stopwords")

my_stopwords_en_df <- my_stopwords_en_df %>% mutate(lexicon = "my_stopwords_en")
no_stopwords_df <- no_stopwords_df %>% mutate(language = "no")


# Combine the custom stop word list with the default stop word list
combined_stopwords <- rbind(my_stopwords_en_df, no_stopwords_df)


# Use the combined stop word list in your analysis
head(combined_stopwords)

##   word language         lexicon
## 1  the       en my_stopwords_en
## 2  and       en my_stopwords_en
## 3   or       en my_stopwords_en
## 4  but       en my_stopwords_en
## 5    a       en my_stopwords_en
## 6   an       en my_stopwords_en

library(dplyr)
library(tidytext)


# Convert the matrix to a dataframe
fosen_df <- as.data.frame(matches_fosen)
sami_df <- as.data.frame(matches_sami)
samilandrights_df <-as.data.frame(matches_samilandrights)
samirights_df <- as.data.frame(matches_samirights)
indigenous_df <- as.data.frame(matches_indigenous)
windmills_df<- as.data.frame(matches_windmills)
sapmi_df<- as.data.frame(matches_sapmi)
standwithsapmi_df <- as.data.frame(matches_standwithsapmi)

# Reshape the data into a tidy format
fosen_tidy <- fosen_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_fosen")


sami_tidy <- sami_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_sami")



samilandrights_tidy <- samilandrights_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_samilandrights")


samirights_tidy <- samirights_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "samirights_fosen")


indigenous_tidy <- indigenous_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_indigenous")


windmills_tidy <- windmills_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_windmills")


sapmi_tidy <- sapmi_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_sapmi")


standwithsapmi_tidy <- standwithsapmi_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_standwithsapmi")

Remove stop words from column “content”

library(dplyr)

# Remove stop words from the content column
fosen1_tidy <- Fosen1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords)

## Joining with `by = join_by(word)`

# Remove stop words from the content column
sami1_tidy <- Sami1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords)

## Joining with `by = join_by(word)`

# Remove stop words from the content column
samilandrights1_tidy <- Samilandrights1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords)

## Joining with `by = join_by(word)`

# Remove stop words from the content column
samirights1_tidy <- SamiRights1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords)

## Joining with `by = join_by(word)`

# Remove stop words from the content column
indigenouspeoples1_tidy <- IndigenousPeoples1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords)

## Joining with `by = join_by(word)`

# Remove stop words from the content column
windmills1_tidy <- WindMills1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords)

## Joining with `by = join_by(word)`

# Remove stop words from the content column
sápmi1_tidy <- Sápmi1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords)

## Joining with `by = join_by(word)`

# Remove stop words from the content column
StandwithSápmi1_tidy <- StandwithSápmi1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords)

## Joining with `by = join_by(word)`

library(ggplot2)
library(dplyr)

# Read in all datasets and add a column specifying the dataset name
samirights1_tidy <- samirights1_tidy %>% 
  mutate(dataset = "SamiRights")
StandwithSápmi1_tidy <- StandwithSápmi1_tidy %>% 
  mutate(dataset = "StandwithSápmi")
sami1_tidy <- sami1_tidy %>% 
  mutate(dataset = "Sami")
sápmi1_tidy <- sápmi1_tidy %>% 
  mutate(dataset = "Sápmi")
windmills1_tidy <- windmills1_tidy %>% 
  mutate(dataset = "WindMills")
indigenouspeoples1_tidy <- indigenouspeoples1_tidy %>% 
  mutate(dataset = "IndigenousPeoples")
samilandrights1_tidy <- samilandrights1_tidy %>% 
  mutate(dataset = "Samilandrights")
fosen1_tidy <- fosen1_tidy %>% 
  mutate(dataset = "Fosen")

# Combine all datasets into a single dataframe
all_tweets <- bind_rows(samilandrights1_tidy, indigenouspeoples1_tidy, windmills1_tidy, sápmi1_tidy, StandwithSápmi1_tidy, samirights1_tidy, fosen1_tidy, sami1_tidy)

# Set the seed for reproducibility
set.seed(123)

# Create a sample of approximately 100 rows per dataset
sample_tweets <- all_tweets %>%
  group_by(dataset) %>%
  sample_n(min(100, n()))

Toggle down Wordclounds per Tweeter Hashtag

library(shiny)
library(wordcloud)
library(dplyr)

# UI
ui <- fluidPage(
  titlePanel("Word Cloud That shows the most frequently used Words for each Twitter Hashtag Showing the key words being used in association with Sámi enviromental justice"),
  sidebarLayout(
    sidebarPanel(
      selectInput("dataset", "Select Dataset:", choices = unique(sample_tweets$dataset))
    ),
    mainPanel(
      plotOutput("wordcloud")
    )
  )
)

# Server
server <- function(input, output) {
  
  # Filter the sample_tweets dataframe based on the selected dataset
  filtered_tweets <- reactive({
    sample_tweets %>% 
      filter(dataset == input$dataset)
  })
  
  # Generate the word cloud
  output$wordcloud <- renderPlot({
    word_freq <- filtered_tweets() %>% 
      count(word, sort = TRUE)
    
    wordcloud(words = word_freq$word, freq = word_freq$n, 
              min.freq = 1, max.words = 100, random.order = FALSE, rot.per = 0.35,
              scale = c(4, 0.3), colors = brewer.pal(8, "Dark2"))
  })
}

# Run the app
shinyApp(ui = ui, server = server)

Shiny applications not supported in static R Markdown documents

library(circlize)

## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================

library(dplyr)

# Calculate the count of unique usernames for each dataset
dataset_counts <- sample_tweets %>% 
  distinct(username, dataset) %>% 
  count(dataset) %>%
  arrange(desc(n))

# Create a matrix of dataset counts
matrix_counts <- matrix(dataset_counts$n, nrow = 1)

# Set the names for each dataset
colnames(matrix_counts) <- dataset_counts$dataset

# Create the chord diagram
chordDiagram(matrix_counts, transparency = 0.5)

ProblemSet12

Ellie Van Hattem

2023-05-12

Make a new column based on subsetting or grouping the original data. Use string searches to help with this.

Convert ‘content’ txt column to lowercase

Pivot all or part of the dataframe into either wide or long format.

Create a custom stopword list and augment with existing Norwegian stopword list

Remove stop words from column “content”

Toggle down Wordclounds per Tweeter Hashtag