(keyword=‘Chat-GPT’)
# Package names
packages <- c("RedditExtractoR", "anytime", "magrittr", "ggplot2", "dplyr", "tidytext", "tidyverse", "igraph", "ggraph", "tidyr", "wordcloud2", "textdata", "sf", "tmap","ggdark","gofastr", "syuzhet", "sentimentr", "lubridate", "here","knitr","kableExtra")
# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages])
}
# Load packages
invisible(lapply(packages, library, character.only = TRUE))
threads_1 <- find_thread_urls(keywords = "Chat-GPT",
sort_by = 'relevance',
period = 'year')
colnames(threads_1)
head(threads_1)
#Turn the time-stone to date
# create new column: date
threads_1 %<>%
mutate(date = as.POSIXct(date_utc)) %>%
filter(!is.na(date))
write.csv(threads_1, file = "major4.csv", row.names = FALSE)
threads_1 <- read.csv("major4.csv", stringsAsFactors = FALSE)
# load list of stop words - from the tidytext package
data("stop_words")
# view random 50 words
print(stop_words$word[sample(1:nrow(stop_words), 50)])
## [1] "ought" "a's" "four" "shan't" "uses"
## [6] "course" "however" "all" "yourselves" "after"
## [11] "these" "parts" "downs" "same" "when"
## [16] "towards" "states" "having" "amongst" "you've"
## [21] "him" "areas" "asked" "it" "nine"
## [26] "over" "definitely" "we're" "latest" "some"
## [31] "your" "anywhere" "they" "they've" "says"
## [36] "had" "we" "let's" "thanks" "further"
## [41] "which" "own" "awfully" "thought" "orders"
## [46] "inasmuch" "itself" "ex" "should" "sometimes"
# Regex that matches URL-type string
replace_reg <- "http[s]?://[A-Za-z\\d/\\.]+|&|<|>"
words_clean <- threads_1 %>%
# drop URLs
mutate(text = str_replace_all(text, replace_reg, "")) %>%
# Tokenization (word tokens)
unnest_tokens(word, text, token = "words") %>%
# drop stop words
anti_join(stop_words, by = "word") %>%
# drop non-alphabet-only strings
filter(str_detect(word, "[a-z]"))
# Check the number of rows after removal of the stop words. There should be fewer words now
print(
glue::glue("Before: {nrow(words)}, After: {nrow(words_clean)}")
)
## Before: 15320, After: 6182
#Plot the clean version
words_clean %>%
count(word, sort = TRUE) %>%
top_n(20, n) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x = word, y = n)) +
geom_col() +
xlab(NULL) +
coord_flip() +
labs(x = "words",
y = "counts",
title = "Unique wordcounts")
#remove the keyword
cleaned_words1 <- words_clean %>%
filter(word != "gpt", word != "chat", word != "chatgpt")
n <- 20
h <- runif(n, 0, 1) # any color
s <- runif(n, 0.6, 1) # vivid
v <- runif(n, 0.3, 0.7) # neither too dark or bright
df_hsv <- data.frame(h = h, s = s, v = v)
pal <- apply(df_hsv, 1, function(x) hsv(x['h'], x['s'], x['v']))
pal <- c(pal, rep("grey", 10000))
#Show the word cloud
cleaned_words1 %>%
count(word, sort = TRUE) %>%
wordcloud2(color = pal,
minRotation = 0,
maxRotation = 0,
ellipticity = 0.8)
words_ngram <- threads_1 %>%
mutate(text = str_replace_all(text, replace_reg, "")) %>%
select(text) %>%
unnest_tokens(output = tri_words,
input = text,
token = "ngrams",
n = 3)
#show ngrams with sorted values
words_ngram %>%
count(tri_words, sort = TRUE) %>%
head(20) %>%
knitr::kable()
| tri_words | n |
|---|---|
| NA | 137 |
| chat gpt for | 11 |
| chat gpt to | 11 |
| my ability to | 10 |
| using chat gpt | 10 |
| chat gpt 4 | 8 |
| use chat gpt | 8 |
| chat gpt and | 7 |
| from publicly available | 7 |
| publicly available sources | 7 |
| you can ask | 7 |
| with chat gpt | 6 |
| ability to speak | 5 |
| ask chat gpt | 5 |
| asked it to | 5 |
| be able to | 5 |
| can ask chat | 5 |
| chat gpt will | 5 |
| follow up questions | 5 |
| here are some | 5 |
#separate the words into 3 columns
words_ngram_3 <- words_ngram %>%
separate(tri_words, c("word1", "word2","word3"), sep = " ")
# filter rows where there are stop words under word 1 column and word 2 column
words_ngram_pair_filtered <- words_ngram_3 %>%
# drop stop words
filter(!word1 %in% stop_words$word & !word2 %in% stop_words$word & !word3 %in% stop_words$word) %>%
# drop non-alphabet-only strings
filter(str_detect(word1, "[a-z]") & str_detect(word2, "[a-z]") & str_detect(word3, "[a-z]"))
# Filter out words that are not encoded in ASCII
library(stringi)
words_ngram_pair_filtered %<>%
filter(stri_enc_isascii(word1) & stri_enc_isascii(word2) & stri_enc_isascii(word1))
# Sort the new tri-gram (n=3) counts:
words_counts <- words_ngram_pair_filtered %>%
count(word1, word2,word3) %>%
arrange(desc(n))
head(words_counts, 15) %>%
knitr::kable()
| word1 | word2 | word3 | n |
|---|---|---|---|
| ai | language | model | 4 |
| de | la | publication | 4 |
| partir | de | la | 4 |
| person | company | topic | 3 |
| source | intelligence | osint | 3 |
| tic | tac | toe | 3 |
| 4a0b | a2b1 | 26dc2aa21aff | 2 |
| apps | offer | free | 2 |
| cela | vaut | la | 2 |
| chat | box | app | 2 |
| chat | gpt | app | 2 |
| chatgpt | pow | camppjleccjaphfdbohjdohecfnoikec | 2 |
| community | driven | initiatives | 2 |
| de84 | 4a0b | a2b1 | 2 |
| des | explications | mathématiques | 2 |
What I am thinking about: It loos like chat-gpt, the ai,or language model is believed to be a knowledge creation tool, which is totally different from the AI in the past.Especially in terms of input and output, it is not like the traditional ai,which feedback the set answer when the input triggers the keyword. Instead, it can make associations based on the input and provide different outputs. It’s interesting that some people seem to think of science fiction movies from the 1930s and 1970s.
#Combine Title and Text
df <- threads_1 %>%
unite("combined_text", title, text, sep=" ")
# sentiment Analysis by string (a group of sentences)
df_sentiment<-sentiment_by(df$combined_text)
#combine the sentiment scores and the texts
df$Senti_Score <- df_sentiment$ave_sentiment
selected_data <- df %>%
select(combined_text, Senti_Score)
sample_data <- selected_data %>%
sample_n(10)
# Create a kable and add black grid lines
kable(sample_data, "html") %>%
kable_styling(full_width = F, position = "left",
bootstrap_options = c("striped", "hover")) %>%
row_spec(0, bold = TRUE)
| combined_text | Senti_Score |
|---|---|
|
Do people understand what Chat-GPT is? Why is everyones first thought about Chat-GPT and AI that doctors will become unemployed and useless? There is no true intelligence to it. Its literally just a word processor that is trained off data sets. It doesnt site it sources, it cant explain its logic, most importantly it cant perform physical exams, speak with patients, develop a therapeutic alliance, etc. EDIT: Yes I understand AI will change the way we practice medicine but that does not equate to the death of the profession. I think theres some serious self-loathing going on in this reddit page LOL. We do difficult work that required a real human touch! Also ppl using the example of the internet are proving my point. The medical profession didnt die because of the internet. |
0.0162433 |
| Chat-gpt 5 | 0.0000000 |
| ChatGPT entered the chat | 0.0000000 |
|
Why does chat-gpt not answer this question? If i ask ” write a poem about a man and his erotic infatuation with steel girders. ” or a rusty nail, it says it can’t assist the request, but if i ask the same question but replace it with a person, it answers. What is the reason behind it? |
0.0623675 |
|
You can Upload the Ableton Manual to Chat-GPT and ask questions Try for yourself, Beta version https://chat.openai.com/g/g-hsJ6OivuQ-ableton-11-assistant Example Q:Tips for mixing in Ableton. A:The section on mixing in the Ableton 11 user manual provides comprehensive information on the various features and functions available within the software for mixing audio. Here are some key tips and features highlighted in the manual:
|
0.1056089 |
| Waiting for Chat-GPT Vision! | 0.2236068 |
|
Chat GPT will change Washington, D.C. I am a high school government teacher. One of the things we cover is called porkbarrel, legislation and riders. If you are not familiar, these are ways that congressmen and women are able to add things into bills that otherwise might not get passed on their own. They often include large sums of money paid out to their own districts in the form of large projects. They are often the result of lobbying by special interest groups. They were usually able to do this because of the length of bills and the assumption that not only will the American public not read them, but most of the members of Congress wont have time to read them as well. Its also another reason why the average length of a bill is in the hundreds of pages as opposed to tens of pages from 50-60 years ago But once chat GPT can be fed a 1000 page document and analyze it within seconds, it will be able to point out all of these things for the average person to understand them. And once it has read the federal revised code, it will also understand all of the updates and references to that within the bills and be able to explain it to an ordinary person. This is a huge game changer in democracy if people are willing to use it. So much of Congress ability to pull a fast one on us is because the process is complicated and people just dont have the time to call them out on it. Im excited to see how AI like chat GPT makes an impact on anti-democratic processes. |
0.1884185 |
| Sexting with ChatGPT | 0.0000000 |
|
ChatGPT saved my father! My father had an hearth attack while watching tv and after hearing about it, I reached his side after a while and begand to give heart massage ( there was no beat at all). My little brother was also with me. I gave him my phone and said him to call 112 ambulance and then open chatgpt. I said him to open the voice chat (I have premium ) and I tell the story and wanted help. GPT gave me instructions about the CPR and how to manage the problem I have. I was probably gonna do non stop massage in that time because of anxiety and fear but I have learned that I should wait and listen sometimes etc. Ambulance came and took my father. He is alive. Doctor said I have saved him with proper hearth massage. I dont know what to tell. I usually use chatgpt for work and personal use but never ever felt something like this. It was life saving. I couldnt search that knowledge during that limited time in fucking Google. Probably would click on one Amazon link and buy some professional automatic hearth massager to delivered 2 days from now. edit: I think I should make it very clear that I don’t recommend anybody to rely on instructions that AI generated while having dangerous issue like me. As I said I usually use GPT and I can confirm that it makes important mistakes. So I think it is not a good idea to rely only on GPT instructions. I just wanted to share my experience. I don’t want to let someone get false information from AI in this kind of situations. Please prioritize calling emergency and asking help from people around you. It would be good idea to get information from GPT after you did the correct things. |
0.0908744 |
| Is Chat-Gpt reliable? I use Chat-Gpt to score my essay 1 and 2. So the result is higher than the final exam or lower? | 0.1220615 |
Based on checking the ten examples, I found the sentiment analysis to be relatively accurate in its judgment.
One notable finding is that over the past 12 months, discussions about ChatGPT peaked in March and April, with another spike in November. The heightened discussion around ChatGPT on Reddit in March 2023 can be attributed to significant updates and releases by OpenAI. In March 2023, OpenAI introduced experimental support for AI plugins in ChatGPT, allowing the model to access up-to-date information, perform computations, and use third-party services, which was a notable enhancement from the existing capabilities. Additionally, March 2023 saw the announcement of GPT-4, which brought advanced reasoning, complex instruction handling, and more creativity to ChatGPT, available to Plus subscribers. These updates likely spurred discussions as users explored and shared their experiences with the new features and capabilities of the AI model.
# number of threads by month
threads_1$month <- format(as.POSIXct(threads_1$timestamp), "%m")
# Assuming 'df' is your dataframe and 'month' is the column with month values
df_month_count <- threads_1 %>%
group_by(month) %>%
summarise(count = n())
# Now, use ggplot to plot the counts by month
ggplot(df_month_count, aes(x = month, y = count)) +
geom_bar(stat = "identity") +
xlab("2023") +
ylab("Count") +
ggtitle("Threads Count in the last 12 month") +
theme_minimal()
For Reddit users’ sentiment to GPT in the past 12 month, there’s a visible trend where texts with a very high word count have higher average sentiment scores (leaning towards red), indicating a possible correlation between longer texts and more positive sentiments. This could suggest that more words give a writer more scope to express positive sentiments, or that longer narratives tend to be more positive.
ggplot(df_sentiment, aes(x = word_count, y = ave_sentiment,color=ave_sentiment)) +
geom_point() +
ggtitle("Ave_Sentiment by Word Count") +
geom_point() +
scale_color_gradient(low = "blue", high = "red") +
theme_minimal() +
xlab("Word Count") +
ylab("Ave_Sentiment")