Make a new column based on subsetting or grouping the original data. Use string searches to help with this.

Fosen <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Fosen-tweets.csv")
SamiRights <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/SamiRights-tweets.csv")
StandwithSápmi <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/StandWithSápmi-tweets.csv")
Sami<- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/Sami-tweets.csv")

Sápmi <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/sápmi-tweets.csv")

WindMills <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/WindFarms-tweets.csv")

IndigenousPeoples <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/IndigenousPeoples-tweets.csv")

Samilandrights <- read.csv("/Users/ellievanhattem/Desktop/Anthro630/Twitter_Hashtags_ANTH630/saamilandrights-tweets.csv")


Samilandrights1 <-Samilandrights
IndigenousPeoples1 <- IndigenousPeoples
WindMills1 <- WindMills
Sápmi1 <- Sápmi
Sami1 <- Sami
StandwithSápmi1 <- StandwithSápmi
SamiRights1 <- SamiRights
Fosen1 <- Fosen
#str(Samilandrights1)
#str(IndigenousPeoples1)
#str(WindMills1)
#str(Sápmi1)
#str(StandwithSápmi1)
#str(SamiRights1)
#str(Fosen1)
# Split "date" into two columns that will separate the time and the date values 
# Tag 1 
Fosen1 <- Fosen1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# Fosen1[c("date1", "time")] <-sapply(strsplit(Fosen1$date, " "),c)


Fosen1$time <- substr(Fosen1$time, 1, 8)
# Remove original datetime column

### Tag 2 

SamiRights1 <- SamiRights1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# SamiRights1[c("date1", "time")] <-sapply(strsplit(SamiRights1$date, " "),c)


SamiRights1$time <- substr(SamiRights1$time, 1, 8)
# Remove original datetime column

# Tag 3 
StandwithSápmi1 <- StandwithSápmi1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# StandwithSápmi1[c("date1", "time")] <-sapply(strsplit(StandwithSápmi1$date, " "),c)


StandwithSápmi1$time <- substr(StandwithSápmi1$time, 1, 8)
# Remove original datetime column


# Tag 4 
Sami1 <- Sami1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# Sami1[c("date1", "time")] <-sapply(strsplit(Sami1$date, " "),c)


Sami1$time <- substr(Sami1$time, 1, 8)
# Remove original datetime column


# Tag 5 
Sápmi1 <- Sápmi1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# Sápmi1[c("date1", "time")] <-sapply(strsplit(Sápmi1$date, " "),c)


Sápmi1$time <- substr(Sápmi1$time, 1, 8)
# Remove original datetime column


# Tag 6 
WindMills1  <- WindMills1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# WindMills1 [c("date1", "time")] <-sapply(strsplit(WindMills1 $date, " "),c)


WindMills1 $time <- substr(WindMills1 $time, 1, 8)
# Remove original datetime column



# Tag 7 
IndigenousPeoples1  <- IndigenousPeoples1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# IndigenousPeoples1 [c("date1", "time")] <-sapply(strsplit(IndigenousPeoples1$date, " "),c)


IndigenousPeoples1$time <- substr(IndigenousPeoples1$time, 1, 8)
# Remove original datetime column


# Tag 8 

Samilandrights1 <- Samilandrights1 %>% 
  tidyr::separate(date, into = c("date1", "time"), sep = 11)
# Samilandrights1 [c("date1", "time")] <-sapply(strsplit(Samilandrights1$date, " "),c)


Samilandrights1$time <- substr(Samilandrights1$time, 1, 8)
# Remove original datetime column

Convert ‘content’ txt column to lowercase

Fosen1$content <- tolower(Fosen1$content)
Sami1$content <- tolower(Sami1$content)
Samilandrights1$content <- tolower(Samilandrights1$content)
SamiRights1$content <- tolower(SamiRights1$content)
IndigenousPeoples1$content <- tolower(IndigenousPeoples1$content)
WindMills1$content <- tolower(WindMills1$content)
Sápmi1$content <- tolower(Sápmi1$content)
StandwithSápmi1$content <- tolower(StandwithSápmi1$content)
# Defined the keywords to search for
keywords <- c("statsministeren", "politiet", "protestors", "protesting", "windmillparks", "vindmølleparker", "distriktspolitiet", "solidarity", "vindmøllene", "regjeringen", "government")

# Defined countries to search for in the tweets
country <- c("Finland", "America", "Norway", "Sweden", "Fosen", "Norwegian", "American", "Swedish", "Finish")
# Create a new topic column based on the matches for the different tags dataset

matches_fosen <- sapply(keywords, grepl, x = Fosen1$content, ignore.case = TRUE)
Fosen1$topic <- ifelse(rowSums(matches_fosen) > 0, colnames(matches_fosen)[max.col(matches_fosen)], "Other")

matches_sami <- sapply(keywords, grepl, x = Sami1$content, ignore.case = TRUE)
Sami1$topic <- ifelse(rowSums(matches_sami) > 0, colnames(matches_sami)[max.col(matches_sami)], "Other")

matches_samilandrights <- sapply(keywords, grepl, x = Samilandrights1$content, ignore.case = TRUE)
Samilandrights1$topic <- ifelse(rowSums(matches_samilandrights) > 0, colnames(matches_samilandrights)[max.col(matches_samilandrights)], "Other")

matches_samirights <- sapply(keywords, grepl, x = SamiRights1$content, ignore.case = TRUE)
SamiRights1$topic <- ifelse(rowSums(matches_samirights) > 0, colnames(matches_samirights)[max.col(matches_samirights)], "Other")

matches_indigenous <- sapply(keywords, grepl, x = IndigenousPeoples1$content, ignore.case = TRUE)
IndigenousPeoples1$topic <- ifelse(rowSums(matches_indigenous) > 0, colnames(matches_indigenous)[max.col(matches_indigenous)], "Other")

matches_windmills <- sapply(keywords, grepl, x = WindMills1$content, ignore.case = TRUE)
WindMills1$topic <- ifelse(rowSums(matches_windmills) > 0, colnames(matches_windmills)[max.col(matches_windmills)], "Other")

matches_sapmi <- sapply(keywords, grepl, x = Sápmi1$content, ignore.case = TRUE)
Sápmi1$topic <- ifelse(rowSums(matches_sapmi) > 0, colnames(matches_sapmi)[max.col(matches_sapmi)], "Other")

matches_standwithsapmi <- sapply(keywords, grepl, x = StandwithSápmi1$content, ignore.case = TRUE)
StandwithSápmi1$topic <- ifelse(rowSums(matches_standwithsapmi) > 0, colnames(matches_standwithsapmi)[max.col(matches_standwithsapmi)], "Other")

Pivot all or part of the dataframe into either wide or long format.

# Use pivot_wider to convert the dataframe to wide format
Fosen1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
Sami1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
Samilandrights1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
SamiRights1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
IndigenousPeoples1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
WindMills1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
Sápmi1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")
StandwithSápmi1_wide <- tidyr::pivot_wider(Fosen1, names_from = "topic", values_from = "username")

Create a custom stopword list and augment with existing Norwegian stopword list

#knitr::opts_chunk$set(echo = TRUE) Using this arugment did not help present the data in nice columns like you can have in the markdown before knitting. 


library(tidytext)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Define your custom stop words
my_stopwords_en <- c("the", "and", "or", "but", "a", "an", "to", "of", "in", "is", "this", "that", "it", "with", "for", "https", "http", "t.co")

# Create a new data frame with the custom stop words
my_stopwords_en_df <- data.frame(word = my_stopwords_en, language = "en")

no_stopwords <- stopwords::stopwords(language = "norwegian")


no_stopwords_df <- data.frame(word = no_stopwords, lexicon = "no_stopwords")

my_stopwords_en_df <- my_stopwords_en_df %>% mutate(lexicon = "my_stopwords_en")
no_stopwords_df <- no_stopwords_df %>% mutate(language = "no")


# Combine the custom stop word list with the default stop word list
combined_stopwords <- rbind(my_stopwords_en_df, no_stopwords_df)


# Use the combined stop word list in your analysis
head(combined_stopwords)
##   word language         lexicon
## 1  the       en my_stopwords_en
## 2  and       en my_stopwords_en
## 3   or       en my_stopwords_en
## 4  but       en my_stopwords_en
## 5    a       en my_stopwords_en
## 6   an       en my_stopwords_en
library(dplyr)
library(tidytext)


# Convert the matrix to a dataframe
fosen_df <- as.data.frame(matches_fosen)
sami_df <- as.data.frame(matches_sami)
samilandrights_df <-as.data.frame(matches_samilandrights)
samirights_df <- as.data.frame(matches_samirights)
indigenous_df <- as.data.frame(matches_indigenous)
windmills_df<- as.data.frame(matches_windmills)
sapmi_df<- as.data.frame(matches_sapmi)
standwithsapmi_df <- as.data.frame(matches_standwithsapmi)

# Reshape the data into a tidy format
fosen_tidy <- fosen_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_fosen")


sami_tidy <- sami_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_sami")



samilandrights_tidy <- samilandrights_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_samilandrights")


samirights_tidy <- samirights_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "samirights_fosen")


indigenous_tidy <- indigenous_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_indigenous")


windmills_tidy <- windmills_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_windmills")


sapmi_tidy <- sapmi_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_sapmi")


standwithsapmi_tidy <- standwithsapmi_df %>%
  pivot_longer(cols = everything(), names_to = "word", values_to = "count") %>%
  mutate(lexicon = ifelse(word %in% combined_stopwords, "combined_stopwords", "none"), dataset = "matches_standwithsapmi")

Remove stop words from column “content”

library(dplyr)

# Remove stop words from the content column
fosen1_tidy <- Fosen1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords) 
## Joining with `by = join_by(word)`
# Remove stop words from the content column
sami1_tidy <- Sami1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords) 
## Joining with `by = join_by(word)`
# Remove stop words from the content column
samilandrights1_tidy <- Samilandrights1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords) 
## Joining with `by = join_by(word)`
# Remove stop words from the content column
samirights1_tidy <- SamiRights1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords) 
## Joining with `by = join_by(word)`
# Remove stop words from the content column
indigenouspeoples1_tidy <- IndigenousPeoples1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords) 
## Joining with `by = join_by(word)`
# Remove stop words from the content column
windmills1_tidy <- WindMills1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords) 
## Joining with `by = join_by(word)`
# Remove stop words from the content column
sápmi1_tidy <- Sápmi1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords) 
## Joining with `by = join_by(word)`
# Remove stop words from the content column
StandwithSápmi1_tidy <- StandwithSápmi1 %>%
  unnest_tokens(word, content) %>%
  anti_join(combined_stopwords) 
## Joining with `by = join_by(word)`
library(ggplot2)
library(dplyr)

# Read in all datasets and add a column specifying the dataset name
samirights1_tidy <- samirights1_tidy %>% 
  mutate(dataset = "SamiRights")
StandwithSápmi1_tidy <- StandwithSápmi1_tidy %>% 
  mutate(dataset = "StandwithSápmi")
sami1_tidy <- sami1_tidy %>% 
  mutate(dataset = "Sami")
sápmi1_tidy <- sápmi1_tidy %>% 
  mutate(dataset = "Sápmi")
windmills1_tidy <- windmills1_tidy %>% 
  mutate(dataset = "WindMills")
indigenouspeoples1_tidy <- indigenouspeoples1_tidy %>% 
  mutate(dataset = "IndigenousPeoples")
samilandrights1_tidy <- samilandrights1_tidy %>% 
  mutate(dataset = "Samilandrights")
fosen1_tidy <- fosen1_tidy %>% 
  mutate(dataset = "Fosen")

# Combine all datasets into a single dataframe
all_tweets <- bind_rows(samilandrights1_tidy, indigenouspeoples1_tidy, windmills1_tidy, sápmi1_tidy, StandwithSápmi1_tidy, samirights1_tidy, fosen1_tidy, sami1_tidy)

# Set the seed for reproducibility
set.seed(123)

# Create a sample of approximately 100 rows per dataset
sample_tweets <- all_tweets %>%
  group_by(dataset) %>%
  sample_n(min(100, n()))

Toggle down Wordclounds per Tweeter Hashtag

library(shiny)
library(wordcloud)
library(dplyr)

# UI
ui <- fluidPage(
  titlePanel("Word Cloud That shows the most frequently used Words for each Twitter Hashtag Showing the key words being used in association with Sámi enviromental justice"),
  sidebarLayout(
    sidebarPanel(
      selectInput("dataset", "Select Dataset:", choices = unique(sample_tweets$dataset))
    ),
    mainPanel(
      plotOutput("wordcloud")
    )
  )
)

# Server
server <- function(input, output) {
  
  # Filter the sample_tweets dataframe based on the selected dataset
  filtered_tweets <- reactive({
    sample_tweets %>% 
      filter(dataset == input$dataset)
  })
  
  # Generate the word cloud
  output$wordcloud <- renderPlot({
    word_freq <- filtered_tweets() %>% 
      count(word, sort = TRUE)
    
    wordcloud(words = word_freq$word, freq = word_freq$n, 
              min.freq = 1, max.words = 100, random.order = FALSE, rot.per = 0.35,
              scale = c(4, 0.3), colors = brewer.pal(8, "Dark2"))
  })
}

# Run the app
shinyApp(ui = ui, server = server)
Shiny applications not supported in static R Markdown documents
library(circlize)
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
library(dplyr)

# Calculate the count of unique usernames for each dataset
dataset_counts <- sample_tweets %>% 
  distinct(username, dataset) %>% 
  count(dataset) %>%
  arrange(desc(n))

# Create a matrix of dataset counts
matrix_counts <- matrix(dataset_counts$n, nrow = 1)

# Set the names for each dataset
colnames(matrix_counts) <- dataset_counts$dataset

# Create the chord diagram
chordDiagram(matrix_counts, transparency = 0.5)