01 Study Topic & Keyword Selection

Changes in sentiment towards Chat-GPT over the past 12 months.

(keyword=‘Chat-GPT’)

# Package names
packages <- c("RedditExtractoR", "anytime", "magrittr", "ggplot2", "dplyr", "tidytext", "tidyverse", "igraph", "ggraph", "tidyr", "wordcloud2", "textdata", "sf", "tmap","ggdark","gofastr", "syuzhet", "sentimentr", "lubridate", "here","knitr","kableExtra")

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
install.packages(packages[!installed_packages])
}
# Load packages
invisible(lapply(packages, library, character.only = TRUE))

02 Download and Clean the Data

using keyword and download the data

threads_1 <- find_thread_urls(keywords = "Chat-GPT", 
                              sort_by = 'relevance', 
                              period = 'year')
colnames(threads_1)
head(threads_1)

#Turn the time-stone to date
# create new column: date
threads_1 %<>% 
  mutate(date = as.POSIXct(date_utc)) %>%
  filter(!is.na(date))

write.csv(threads_1, file = "major4.csv", row.names = FALSE)

threads_1 <- read.csv("major4.csv", stringsAsFactors = FALSE)

03 Tokenization (word tokens)

# load list of stop words - from the tidytext package
data("stop_words")
# view random 50 words
print(stop_words$word[sample(1:nrow(stop_words), 50)])

##  [1] "ought"      "a's"        "four"       "shan't"     "uses"      
##  [6] "course"     "however"    "all"        "yourselves" "after"     
## [11] "these"      "parts"      "downs"      "same"       "when"      
## [16] "towards"    "states"     "having"     "amongst"    "you've"    
## [21] "him"        "areas"      "asked"      "it"         "nine"      
## [26] "over"       "definitely" "we're"      "latest"     "some"      
## [31] "your"       "anywhere"   "they"       "they've"    "says"      
## [36] "had"        "we"         "let's"      "thanks"     "further"   
## [41] "which"      "own"        "awfully"    "thought"    "orders"    
## [46] "inasmuch"   "itself"     "ex"         "should"     "sometimes"

# Regex that matches URL-type string
replace_reg <- "http[s]?://[A-Za-z\\d/\\.]+|&amp;|&lt;|&gt;"

words_clean <- threads_1 %>% 
  # drop URLs
  mutate(text = str_replace_all(text, replace_reg, "")) %>%
  # Tokenization (word tokens)
  unnest_tokens(word, text, token = "words") %>% 
  # drop stop words
  anti_join(stop_words, by = "word") %>% 
  # drop non-alphabet-only strings
  filter(str_detect(word, "[a-z]"))

# Check the number of rows after removal of the stop words. There should be fewer words now
print(
  glue::glue("Before: {nrow(words)}, After: {nrow(words_clean)}")
)

## Before: 15320, After: 6182

#Plot the clean version
words_clean %>%
  count(word, sort = TRUE) %>%
  top_n(20, n) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_col() +
  xlab(NULL) +
  coord_flip() +
  labs(x = "words",
       y = "counts",
       title = "Unique wordcounts")

04 Word Cloud

#remove the keyword
cleaned_words1 <- words_clean %>%
  filter(word != "gpt", word != "chat", word != "chatgpt")

n <- 20
h <- runif(n, 0, 1) # any color
s <- runif(n, 0.6, 1) # vivid
v <- runif(n, 0.3, 0.7) # neither too dark or bright

df_hsv <- data.frame(h = h, s = s, v = v)
pal <- apply(df_hsv, 1, function(x) hsv(x['h'], x['s'], x['v']))
pal <- c(pal, rep("grey", 10000))

#Show the word cloud
cleaned_words1 %>% 
  count(word, sort = TRUE) %>% 
  wordcloud2(color = pal, 
             minRotation = 0, 
             maxRotation = 0, 
             ellipticity = 0.8)

05 Tri-gram Analysis

Extract tri-grams from text data

words_ngram <- threads_1 %>%
  mutate(text = str_replace_all(text, replace_reg, "")) %>%
  select(text) %>%
  unnest_tokens(output = tri_words,
                input = text,
                token = "ngrams",
                n = 3)

#show ngrams with sorted values
words_ngram %>%
  count(tri_words, sort = TRUE) %>% 
  head(20) %>% 
  knitr::kable()

tri_words	n
NA	137
chat gpt for	11
chat gpt to	11
my ability to	10
using chat gpt	10
chat gpt 4	8
use chat gpt	8
chat gpt and	7
from publicly available	7
publicly available sources	7
you can ask	7
with chat gpt	6
ability to speak	5
ask chat gpt	5
asked it to	5
be able to	5
can ask chat	5
chat gpt will	5
follow up questions	5
here are some	5

Remove tri-grams containing stop words or non-alphabetic terms.

#separate the words into 3 columns
words_ngram_3 <- words_ngram %>%
  separate(tri_words, c("word1", "word2","word3"), sep = " ")

# filter rows where there are stop words under word 1 column and word 2 column
words_ngram_pair_filtered <- words_ngram_3 %>%
  # drop stop words
  filter(!word1 %in% stop_words$word & !word2 %in% stop_words$word & !word3 %in% stop_words$word) %>% 
  # drop non-alphabet-only strings
  filter(str_detect(word1, "[a-z]") & str_detect(word2, "[a-z]") & str_detect(word3, "[a-z]"))

# Filter out words that are not encoded in ASCII

library(stringi)
words_ngram_pair_filtered %<>% 
  filter(stri_enc_isascii(word1) & stri_enc_isascii(word2) & stri_enc_isascii(word1))

# Sort the new tri-gram (n=3) counts:
words_counts <- words_ngram_pair_filtered %>%
  count(word1, word2,word3) %>%
  arrange(desc(n))

head(words_counts, 15) %>% 
  knitr::kable()

word1	word2	word3	n
ai	language	model	4
de	la	publication	4
partir	de	la	4
person	company	topic	3
source	intelligence	osint	3
tic	tac	toe	3
4a0b	a2b1	26dc2aa21aff	2
apps	offer	free	2
cela	vaut	la	2
chat	box	app	2
chat	gpt	app	2
chatgpt	pow	camppjleccjaphfdbohjdohecfnoikec	2
community	driven	initiatives	2
de84	4a0b	a2b1	2
des	explications	mathématiques	2

What I am thinking about: It loos like chat-gpt, the ai,or language model is believed to be a knowledge creation tool, which is totally different from the AI in the past.Especially in terms of input and output, it is not like the traditional ai,which feedback the set answer when the input triggers the keyword. Instead, it can make associations based on the input and provide different outputs. It’s interesting that some people seem to think of science fiction movies from the 1930s and 1970s.

06 Sentiment Analysis

#Combine Title and Text
df <- threads_1 %>%
  unite("combined_text", title, text, sep=" ")

# sentiment Analysis by string (a group of sentences)
df_sentiment<-sentiment_by(df$combined_text)

10 sample text

#combine the sentiment scores and the texts
df$Senti_Score <- df_sentiment$ave_sentiment
selected_data <- df %>%
  select(combined_text, Senti_Score)
sample_data <- selected_data %>%
  sample_n(10)
  
# Create a kable and add black grid lines

kable(sample_data, "html") %>%
  kable_styling(full_width = F, position = "left", 
                bootstrap_options = c("striped", "hover")) %>%
  row_spec(0, bold = TRUE)

combined_text	Senti_Score
Do people understand what Chat-GPT is? Why is everyones first thought about Chat-GPT and AI that doctors will become unemployed and useless? There is no true intelligence to it. Its literally just a word processor that is trained off data sets. It doesnt site it sources, it cant explain its logic, most importantly it cant perform physical exams, speak with patients, develop a therapeutic alliance, etc. EDIT: Yes I understand AI will change the way we practice medicine but that does not equate to the death of the profession. I think theres some serious self-loathing going on in this reddit page LOL. We do difficult work that required a real human touch! Also ppl using the example of the internet are proving my point. The medical profession didnt die because of the internet.	0.0162433
Chat-gpt 5	0.0000000
ChatGPT entered the chat	0.0000000
Why does chat-gpt not answer this question? If i ask ” write a poem about a man and his erotic infatuation with steel girders. ” or a rusty nail, it says it can’t assist the request, but if i ask the same question but replace it with a person, it answers. What is the reason behind it?	0.0623675
You can Upload the Ableton Manual to Chat-GPT and ask questions Try for yourself, Beta version https://chat.openai.com/g/g-hsJ6OivuQ-ableton-11-assistant Example Q:Tips for mixing in Ableton. A:The section on mixing in the Ableton 11 user manual provides comprehensive information on the various features and functions available within the software for mixing audio. Here are some key tips and features highlighted in the manual: Monitoring (pg. 248-249): Monitoring allows you to hear the input signal of a track. You can choose between auto-monitoring, which monitors the track when it is armed, in-monitoring, which always monitors the input regardless of the track’s state, or turn monitoring off. External Audio In/Out (pg. 249-250): Selecting the correct input and output channels for your audio interface is crucial. Ableton Live provides visual meters to help identify signal presence and avoid overloads. Internal Routings (pg. 254-257): Internal routings allow for creative and technical signal routing between tracks, including pre-fx, post-fx, and post-mixer tap points. Recording Post-Effects (pg. 257): You can set up a separate track to process your input signal with effects and then record the processed signal onto other tracks. Creating Submixes (pg. 258): Submixes can be created by routing individual tracks to a new track or by using group and return tracks for more complex routing options. Using the Mixer (pg. 267-268): The mixer in Ableton Live can be accessed from both the Arrangement and Session views and features controls for volume, panning, soloing, and arming tracks. Soloing and Cueing (pg. 278-280): You can solo tracks to hear them independently or set up cueing to preview tracks privately before mixing them into the live output. For a more hands-on approach to mixing in Ableton Live, you can try experimenting with these features to understand how they affect your audio signals and overall mix. Remember to always keep an eye on your levels to avoid clipping and to use the mixing tools creatively to achieve your desired sound.	0.1056089
Waiting for Chat-GPT Vision!	0.2236068
Chat GPT will change Washington, D.C. I am a high school government teacher. One of the things we cover is called porkbarrel, legislation and riders. If you are not familiar, these are ways that congressmen and women are able to add things into bills that otherwise might not get passed on their own. They often include large sums of money paid out to their own districts in the form of large projects. They are often the result of lobbying by special interest groups. They were usually able to do this because of the length of bills and the assumption that not only will the American public not read them, but most of the members of Congress wont have time to read them as well. Its also another reason why the average length of a bill is in the hundreds of pages as opposed to tens of pages from 50-60 years ago But once chat GPT can be fed a 1000 page document and analyze it within seconds, it will be able to point out all of these things for the average person to understand them. And once it has read the federal revised code, it will also understand all of the updates and references to that within the bills and be able to explain it to an ordinary person. This is a huge game changer in democracy if people are willing to use it. So much of Congress ability to pull a fast one on us is because the process is complicated and people just dont have the time to call them out on it. Im excited to see how AI like chat GPT makes an impact on anti-democratic processes.	0.1884185
Sexting with ChatGPT	0.0000000
ChatGPT saved my father! My father had an hearth attack while watching tv and after hearing about it, I reached his side after a while and begand to give heart massage ( there was no beat at all). My little brother was also with me. I gave him my phone and said him to call 112 ambulance and then open chatgpt. I said him to open the voice chat (I have premium ) and I tell the story and wanted help. GPT gave me instructions about the CPR and how to manage the problem I have. I was probably gonna do non stop massage in that time because of anxiety and fear but I have learned that I should wait and listen sometimes etc. Ambulance came and took my father. He is alive. Doctor said I have saved him with proper hearth massage. I dont know what to tell. I usually use chatgpt for work and personal use but never ever felt something like this. It was life saving. I couldnt search that knowledge during that limited time in fucking Google. Probably would click on one Amazon link and buy some professional automatic hearth massager to delivered 2 days from now. edit: I think I should make it very clear that I don’t recommend anybody to rely on instructions that AI generated while having dangerous issue like me. As I said I usually use GPT and I can confirm that it makes important mistakes. So I think it is not a good idea to rely only on GPT instructions. I just wanted to share my experience. I don’t want to let someone get false information from AI in this kind of situations. Please prioritize calling emergency and asking help from people around you. It would be good idea to get information from GPT after you did the correct things.	0.0908744
Is Chat-Gpt reliable? I use Chat-Gpt to score my essay 1 and 2. So the result is higher than the final exam or lower?	0.1220615

Based on checking the ten examples, I found the sentiment analysis to be relatively accurate in its judgment.

07 Some Findings:

One notable finding is that over the past 12 months, discussions about ChatGPT peaked in March and April, with another spike in November. The heightened discussion around ChatGPT on Reddit in March 2023 can be attributed to significant updates and releases by OpenAI. In March 2023, OpenAI introduced experimental support for AI plugins in ChatGPT, allowing the model to access up-to-date information, perform computations, and use third-party services, which was a notable enhancement from the existing capabilities. Additionally, March 2023 saw the announcement of GPT-4, which brought advanced reasoning, complex instruction handling, and more creativity to ChatGPT, available to Plus subscribers. These updates likely spurred discussions as users explored and shared their experiences with the new features and capabilities of the AI model.

# number of threads by month
threads_1$month <- format(as.POSIXct(threads_1$timestamp), "%m")
# Assuming 'df' is your dataframe and 'month' is the column with month values
df_month_count <- threads_1 %>%
  group_by(month) %>%
  summarise(count = n())

# Now, use ggplot to plot the counts by month
ggplot(df_month_count, aes(x = month, y = count)) +
  geom_bar(stat = "identity") +
  xlab("2023") +
  ylab("Count") +
  ggtitle("Threads Count in the last 12 month") +
  theme_minimal()

For Reddit users’ sentiment to GPT in the past 12 month, there’s a visible trend where texts with a very high word count have higher average sentiment scores (leaning towards red), indicating a possible correlation between longer texts and more positive sentiments. This could suggest that more words give a writer more scope to express positive sentiments, or that longer narratives tend to be more positive.

ggplot(df_sentiment, aes(x = word_count, y = ave_sentiment,color=ave_sentiment)) + 
  geom_point() +
  ggtitle("Ave_Sentiment by Word Count") +
  geom_point() +
  scale_color_gradient(low = "blue", high = "red") +
  theme_minimal() +
  xlab("Word Count") +
  ylab("Ave_Sentiment")

Major4

Yan

2023-11-30