lyrik_tweets <- search_tweets("#Lyrik", n=10000,
lang = "de",
include_rts = TRUE)
head(lyrik_tweets)
## # A tibble: 6 × 90
## user_id status_id created_at screen_name text source
## <chr> <chr> <dttm> <chr> <chr> <chr>
## 1 173733987 1538056013718462465 2022-06-18 07:08:24 mf_media "Leono… Twitt…
## 2 173733987 1538055949960851457 2022-06-18 07:08:08 mf_media "Klaus… Twitt…
## 3 173733987 1538055935880482816 2022-06-18 07:08:05 mf_media "Rolan… Twitt…
## 4 361919278 1538054736519233536 2022-06-18 07:03:19 tagescollagen "tages… Twitt…
## 5 361919278 1535149739347419136 2022-06-10 06:39:54 tagescollagen "tages… Twitt…
## 6 361919278 1535901235089375233 2022-06-12 08:26:04 tagescollagen "tages… Twitt…
## # … with 84 more variables: display_text_width <dbl>, reply_to_status_id <chr>,
## # reply_to_user_id <chr>, reply_to_screen_name <chr>, is_quote <lgl>,
## # is_retweet <lgl>, favorite_count <int>, retweet_count <int>,
## # quote_count <int>, reply_count <int>, hashtags <list>, symbols <list>,
## # urls_url <list>, urls_t.co <list>, urls_expanded_url <list>,
## # media_url <list>, media_t.co <list>, media_expanded_url <list>,
## # media_type <list>, ext_media_url <list>, ext_media_t.co <list>, …
lyrik_text <- lyrik_tweets %>%
select(screen_name,text)
head(lyrik_text$text)
## [1] "Leonore Dubach für #kkl17 „Begegnung“\nZügig\nGedanken verloren\naus dem Fenster schauen\nBlicke austauschen\nZug um... https://t.co/CBTbikJLtY #lyrik #literatur #kkl https://t.co/QQlfriG4ZK"
## [2] "Klaus Pelster für #kkl17 „Begegnung“\nSpiegelMeinerSeele\nLeidenSchaftliche\nIneinswerdung\nEinAtem EinHerz\nÜberm... https://t.co/rvXvVvf02P #literatur #lyrik #seele"
## [3] "Roland Schmidlin für #kkl17 „Begegnung“\nMORGENGRUSS\nDie aufgehende Sonne ohne Frag\nLichtet die letzten Fetzen Traum Tag\nZeige mir den Zeitenspiegel Schein\nUmarmungen ... https://t.co/Mscf4s8THt #literatur #kkl_kunstkulturliteratur #kkl #kunst #lyrik #poet #gedicht https://t.co/e8fE7EMrqJ"
## [4] "tagescollage 18.06.2022 • samstag \nhttps://t.co/jJsExxegnO \n#haiku #collagen #collageart #kunst #collageartwork #zeitung #headlines #artistoninstagram #sprache #stopWarInUkraine #ukrainekrieg #poetry #lyrik #sachslehner #millionaire #wahnsinn #dummheit #ard #zdf #orf #zib2 #geld https://t.co/0tf0gZs5XZ"
## [5] "tagescollage 10.06.2022 • freitag \nhttps://t.co/jJsExxegnO \n#haiku #collagen #collageart #kunst #collageartwork #zeitung #headlines #artistoninstagram #sprache #stopWarInUkraine #ukrainekrieg #poetry #lyrik #fische #alles #ArtistOnTwitter #artist #Klimakrise https://t.co/Eo7P90M2Vh"
## [6] "tagescollage 12.06.2022 • sonntag \nhttps://t.co/jJsExxegnO\n#haiku #collagen #collageart #kunst #collageartwork #zeitung #headlines #artistoninstagram #sprache #stopWarInUkraine #ukrainekrieg #poetry #lyrik #viennapride #oevpkrise #artist #artistspoken #ArtistOnTwitter https://t.co/bUGUls0COS"
# Bereinigung URL
lyrik_text$stripped_text <- gsub("http.*","",lyrik_text$text)
lyrik_text$stripped_text <- gsub("https.*","",lyrik_text$text)
lyrik_text_stem <- lyrik_text %>%
select(stripped_text) %>%
unnest_tokens(word, stripped_text, to_lower=F) # to_lower kann F (FALSE) oder T (TRUE) sein
data(stopwords_de, package = "lsa")
stopwords_de <- data_frame(word = stopwords_de)
clean_lyrik_text <- lyrik_text_stem %>%
anti_join(stopwords_de, by="word")
clean_lyrik_text<-subset(clean_lyrik_text,word != "lyrik")
clean_lyrik_text<-subset(clean_lyrik_text,word != "Lyrik")
head(clean_lyrik_text)
## # A tibble: 6 × 1
## word
## <chr>
## 1 Leonore
## 2 Dubach
## 3 kkl17
## 4 Begegnung
## 5 Zügig
## 6 Gedanken
clean_lyrik_text %>%
count(word, sort = TRUE) %>%
top_n(15) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x = word, y = n)) +
geom_col(fill='steelblue4') +
xlab(NULL) +
coord_flip() +
labs(x = "Anzahl",
y = "Wörter",
title = "Anzahl der häufigsten Wörter",
subtitle="Tweets mit #Lyrik, 13. Juni 2022", # hier selbst was eingeben
caption = "\nSource: Data collected from TwitterAPI")
library(data.table) # Definition einer Funktion namens "get_sentiws()"
get_sentiws <- function(){
sentiws_tmp_dir <- file.path(tempdir(), "sentiws")
if (!file.exists(sentiws_tmp_dir)) dir.create(sentiws_tmp_dir)
sentiws_zipfile <- file.path(sentiws_tmp_dir, "SentiWS_v2.0c.zip")
sentiws_url <- "http://pcai056.informatik.uni-leipzig.de/downloads/etc/SentiWS/SentiWS_v2.0.zip"
download.file(url = sentiws_url, destfile = sentiws_zipfile)
unzip(zipfile = sentiws_zipfile, exdir = sentiws_tmp_dir)
.unfold <- function(.SD){
pos <- gsub("^([A-Z]+)\\s+.*$", "\\1", .SD[["data"]][1])
weight <- as.numeric(gsub("^[A-Z]+\\s+(-?\\d\\.\\d+).*$", "\\1", .SD[["data"]][1]))
words <- gsub("^[A-Z]+\\s+-?\\d\\.\\d+\\s*(.*?)\\s*$", "\\1", .SD[["data"]][1])
words <- if (!grepl("^\\s*$", words)) strsplit(x = words, split = ",")[[1]] else NULL
list(
word = c(.SD[["word"]][1], words),
base = c(TRUE, rep(FALSE, times = length(words))),
lemma = .SD[["word"]][1],
pos = pos,
weight = weight
)
}
dts <- lapply(
c(positive = "SentiWS_v2.0_Positive.txt", negative = "SentiWS_v2.0_Negative.txt"),
function(filename){
dt <- fread(file.path(sentiws_tmp_dir, filename), sep = "|")
colnames(dt) <- c("word", "data")
dt[, "id" := 1L:nrow(dt)]
dt[, .unfold(.SD), by = c("id")]
}
)
rbindlist(dts)
}
all_sentiments <- get_sentiws()
all_sentiments_lexikon_neg <- all_sentiments %>%
filter(weight <= 0) %>%
mutate(sentiment = "negativ")
all_sentiments_lexikon_pos <- all_sentiments %>%
filter(weight > 0) %>%
mutate(sentiment = "positiv")
all_sentiments_lexikon <- merge(all_sentiments_lexikon_neg, all_sentiments_lexikon_pos, all=T)
head(all_sentiments_lexikon)
## id word base lemma pos weight sentiment
## 1: 1 Abbruch TRUE Abbruch NN -0.0048 negativ
## 2: 1 Abbruche FALSE Abbruch NN -0.0048 negativ
## 3: 1 Abbruches FALSE Abbruch NN -0.0048 negativ
## 4: 1 Abbruchs FALSE Abbruch NN -0.0048 negativ
## 5: 1 Abbrüche FALSE Abbruch NN -0.0048 negativ
## 6: 1 Abbrüchen FALSE Abbruch NN -0.0048 negativ
all_sentiments_lexikon %>%
filter (sentiment=="positiv")
## id word base lemma pos weight sentiment
## 1: 1 Abschluss FALSE Abschluß NN 0.004 positiv
## 2: 1 Abschlusse FALSE Abschluß NN 0.004 positiv
## 3: 1 Abschlusses FALSE Abschluß NN 0.004 positiv
## 4: 1 Abschluß TRUE Abschluß NN 0.004 positiv
## 5: 1 Abschlüsse FALSE Abschluß NN 0.004 positiv
## ---
## 16561: 1643 üppigste FALSE üppig ADJX 0.201 positiv
## 16562: 1643 üppigstem FALSE üppig ADJX 0.201 positiv
## 16563: 1643 üppigsten FALSE üppig ADJX 0.201 positiv
## 16564: 1643 üppigster FALSE üppig ADJX 0.201 positiv
## 16565: 1643 üppigstes FALSE üppig ADJX 0.201 positiv
all_sentiments_lexikon %>%
filter (sentiment=="negativ")
## id word base lemma pos weight sentiment
## 1: 1 Abbruch TRUE Abbruch NN -0.0048 negativ
## 2: 1 Abbruche FALSE Abbruch NN -0.0048 negativ
## 3: 1 Abbruches FALSE Abbruch NN -0.0048 negativ
## 4: 1 Abbruchs FALSE Abbruch NN -0.0048 negativ
## 5: 1 Abbrüche FALSE Abbruch NN -0.0048 negativ
## ---
## 18026: 1826 überwältigt FALSE überwältigen VVINF -0.0048 negativ
## 18027: 1826 überwältigte FALSE überwältigen VVINF -0.0048 negativ
## 18028: 1826 überwältigten FALSE überwältigen VVINF -0.0048 negativ
## 18029: 1826 überwältigtest FALSE überwältigen VVINF -0.0048 negativ
## 18030: 1826 überwältigtet FALSE überwältigen VVINF -0.0048 negativ
sentiment_analyse_lyrik<- clean_lyrik_text %>%
inner_join(all_sentiments_lexikon) %>%
count(word, sentiment, sort = T) %>%
ungroup()
head(sentiment_analyse_lyrik)
## # A tibble: 6 × 3
## word sentiment n
## <chr> <chr> <int>
## 1 Geduld positiv 22
## 2 Gelassenheit positiv 22
## 3 Hoffnung positiv 10
## 4 verzweifelten negativ 10
## 5 führen positiv 9
## 6 unabhängiges positiv 8
#sentiment_analyse_lyrik\(sentiment <- as.factor(sentiment_analysis_lyrik\)sentiment)
g <- sentiment_analyse_lyrik %>%
group_by(sentiment) %>%
top_n(15) %>%
ungroup() %>%
mutate(word = reorder(word,n)) %>%
ggplot(aes(word,n, fill=sentiment)) +
geom_col(show.legend = F) +
facet_wrap(~sentiment, scales="free_y") +
labs(title="Tweets #Lyrik",
y= "Anteil am Sentiment",
x= NULL)+
coord_flip() + theme_bw()
g
sentiment_score_lyrik<- clean_lyrik_text %>%
inner_join(all_sentiments_lexikon) %>%
count(word, sentiment, weight, sort = T) %>%
ungroup()
sentiment_score_lyrik <- sentiment_score_lyrik %>%
mutate(score = weight*n)
g_score_1 <- ggplot(sentiment_score_lyrik, aes(x=weight)) +
geom_histogram(bins = 9, alpha = 0.6) +
theme_bw() +
labs(title="Histogram Twitter - Sentiment Score",
y="Anzahl",
x="Sentiment Score")
g_score_1
g_score_2 <- ggplot(sentiment_score_lyrik, aes(x=weight)) +
geom_histogram(bins = 18, alpha = 0.6) +
theme_bw() +
labs(title="Histogram Twitter - Sentiment Score",
y="Anzahl",
x="Sentiment Score")
g_score_2
save.image("Sentiment_Analyse_SentiWS.R")
#Dafür wir das Paket “wordcloud” benötigt
# install.packages(“wordcloud”)
library (wordcloud)
lyrik_tweets_wordcloud <- clean_lyrik_text %>%
count(word, sort = TRUE)
lyrik_tweets_wordcloud<-lyrik_tweets_wordcloud[-1,]
wordcloud(words = lyrik_tweets_wordcloud$word,
freq = lyrik_tweets_wordcloud$n,
max.words = 100,
scale = c(2,.5),
colors=brewer.pal(6, "Dark2"))