Objective: Create a data frame with as many articles reasonably possible as permitted by the API.

library(httr)
library(jsonlite)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Make API calls
apicall1 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-05-06&to=2024-05-06&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall2 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-05&to=2024-04-05&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall3 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-06&to=2024-04-06&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall4 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-07&to=2024-04-07&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall5 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-08&to=2024-04-08&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall6 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-09&to=2024-04-09&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall7 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-10&to=2024-04-10&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall8 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-11&to=2024-04-11&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall9 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-12&to=2024-04-12&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall10 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-13&to=2024-04-13&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall11 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-14&to=2024-04-14&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall12 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-15&to=2024-04-15&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall13 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-16&to=2024-04-16&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall14 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-17&to=2024-04-17&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall15 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-18&to=2024-04-18&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall16 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-19&to=2024-04-19&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall17 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-20&to=2024-04-20&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall18 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-21&to=2024-04-21&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall19 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-22&to=2024-04-22&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall20 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-23&to=2024-04-23&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall21 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-24&to=2024-04-24&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall22 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-25&to=2024-04-25&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall23 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-26&to=2024-04-26&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall24 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-27&to=2024-04-27&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall25 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-28&to=2024-04-28&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall26 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-29&to=2024-04-29&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall27 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-04-30&to=2024-04-30&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall28 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-05-01&to=2024-05-01&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall29 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-05-02&to=2024-05-02&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall30 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-05-03&to=2024-05-03&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall31 <- GET('https://newsapi.org/v2/everything?q="a"OR"the"&from=2024-05-04to=2024-05-04&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall32 <- GET('https://newsapi.org/v2/top-headlines?country=us&category=business&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall33 <- GET('https://newsapi.org/v2/top-headlines?country=us&category=entertainment&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall34 <- GET('https://newsapi.org/v2/top-headlines?country=us&category=general&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall35 <- GET('https://newsapi.org/v2/top-headlines?country=us&category=health&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall36 <- GET('https://newsapi.org/v2/top-headlines?country=us&category=science&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall37 <- GET('https://newsapi.org/v2/top-headlines?country=us&category=sports&apiKey=1aa93e1bd87d492599e7c67c703a6f19')
apicall38 <- GET('https://newsapi.org/v2/top-headlines?country=us&category=technology&apiKey=1aa93e1bd87d492599e7c67c703a6f19')

# Convert JSON into usable data frame
data38 = fromJSON(rawToChar(apicall38$content))
data37 = fromJSON(rawToChar(apicall37$content))
data36 = fromJSON(rawToChar(apicall36$content))
data35 = fromJSON(rawToChar(apicall35$content))
data34 = fromJSON(rawToChar(apicall34$content))
data33 = fromJSON(rawToChar(apicall33$content))
data32 = fromJSON(rawToChar(apicall32$content))
data31 = fromJSON(rawToChar(apicall31$content))
data32 = fromJSON(rawToChar(apicall32$content))
data30 = fromJSON(rawToChar(apicall30$content))
data29 = fromJSON(rawToChar(apicall31$content))
data28 = fromJSON(rawToChar(apicall32$content))
data27 = fromJSON(rawToChar(apicall30$content))
data26 = fromJSON(rawToChar(apicall31$content))
data25 = fromJSON(rawToChar(apicall32$content))
data24 = fromJSON(rawToChar(apicall30$content))
data23 = fromJSON(rawToChar(apicall31$content))
data22 = fromJSON(rawToChar(apicall32$content))
data21 = fromJSON(rawToChar(apicall30$content))
data20 = fromJSON(rawToChar(apicall31$content))
data19 = fromJSON(rawToChar(apicall32$content))
data18 = fromJSON(rawToChar(apicall30$content))
data17 = fromJSON(rawToChar(apicall31$content))
data16 = fromJSON(rawToChar(apicall32$content))
data15 = fromJSON(rawToChar(apicall30$content))
data14 = fromJSON(rawToChar(apicall31$content))
data13 = fromJSON(rawToChar(apicall32$content))
data12 = fromJSON(rawToChar(apicall30$content))
data11 = fromJSON(rawToChar(apicall31$content))
data10 = fromJSON(rawToChar(apicall32$content))
data9 = fromJSON(rawToChar(apicall30$content))
data8 = fromJSON(rawToChar(apicall31$content))
data7 = fromJSON(rawToChar(apicall32$content))
data6 = fromJSON(rawToChar(apicall30$content))
data5 = fromJSON(rawToChar(apicall31$content))
data4 = fromJSON(rawToChar(apicall32$content))
data3 = fromJSON(rawToChar(apicall30$content))
data2 = fromJSON(rawToChar(apicall31$content))
data1 = fromJSON(rawToChar(apicall32$content))

# Create articles data frame
base_df31 <- data31$articles
# Subset articles data frame
base_df31 <- select(base_df31, source, author, title, description, publishedAt)
# Change source data type to char by taking only 'name' column
base_df31$source <- base_df31$source$name
base_df32 <- data32$articles
base_df32 <- select(base_df32, source, author, title, description, publishedAt)
base_df32$source <- base_df32$source$name
base_df30 <- data30$articles
base_df30 <- select(base_df30, source, author, title, description, publishedAt)
base_df30$source <- base_df30$source$name
base_df29 <- data29$articles
base_df29 <- select(base_df29, source, author, title, description, publishedAt)
base_df29$source <- base_df29$source$name
base_df28 <- data28$articles
base_df28 <- select(base_df28, source, author, title, description, publishedAt)
base_df28$source <- base_df28$source$name
base_df27 <- data27$articles
base_df27 <- select(base_df27, source, author, title, description, publishedAt)
base_df27$source <- base_df27$source$name
base_df26 <- data26$articles
base_df26 <- select(base_df26, source, author, title, description, publishedAt)
base_df26$source <- base_df26$source$name
base_df25 <- data25$articles
base_df25 <- select(base_df25, source, author, title, description, publishedAt)
base_df25$source <- base_df25$source$name
base_df24 <- data24$articles
base_df24 <- select(base_df24, source, author, title, description, publishedAt)
base_df24$source <- base_df24$source$name
base_df23 <- data23$articles
base_df23 <- select(base_df23, source, author, title, description, publishedAt)
base_df23$source <- base_df23$source$name
base_df22 <- data22$articles
base_df22 <- select(base_df22, source, author, title, description, publishedAt)
base_df22$source <- base_df22$source$name
base_df21 <- data21$articles
base_df21 <- select(base_df21, source, author, title, description, publishedAt)
base_df21$source <- base_df21$source$name
base_df20 <- data20$articles
base_df20 <- select(base_df20, source, author, title, description, publishedAt)
base_df20$source <- base_df20$source$name
base_df19 <- data19$articles
base_df19 <- select(base_df19, source, author, title, description, publishedAt)
base_df19$source <- base_df19$source$name
base_df18 <- data18$articles
base_df18 <- select(base_df18, source, author, title, description, publishedAt)
base_df18$source <- base_df18$source$name
base_df19 <- data19$articles
base_df19 <- select(base_df19, source, author, title, description, publishedAt)
base_df19$source <- base_df19$source$name
base_df18 <- data18$articles
base_df18 <- select(base_df18, source, author, title, description, publishedAt)
base_df18$source <- base_df18$source$name
base_df17 <- data17$articles
base_df17 <- select(base_df17, source, author, title, description, publishedAt)
base_df17$source <- base_df17$source$name
base_df16 <- data16$articles
base_df16 <- select(base_df16, source, author, title, description, publishedAt)
base_df16$source <- base_df16$source$name
base_df15 <- data15$articles
base_df15 <- select(base_df15, source, author, title, description, publishedAt)
base_df15$source <- base_df15$source$name
base_df14 <- data14$articles
base_df14 <- select(base_df14, source, author, title, description, publishedAt)
base_df14$source <- base_df14$source$name
base_df13 <- data13$articles
base_df13 <- select(base_df13, source, author, title, description, publishedAt)
base_df13$source <- base_df13$source$name
base_df12 <- data12$articles
base_df12 <- select(base_df12, source, author, title, description, publishedAt)
base_df12$source <- base_df12$source$name
base_df11 <- data11$articles
base_df11 <- select(base_df11, source, author, title, description, publishedAt)
base_df11$source <- base_df11$source$name
base_df10 <- data10$articles
base_df10 <- select(base_df10, source, author, title, description, publishedAt)
base_df10$source <- base_df10$source$name
base_df9 <- data9$articles
base_df9 <- select(base_df9, source, author, title, description, publishedAt)
base_df9$source <- base_df9$source$name
base_df8 <- data8$articles
base_df8 <- select(base_df8, source, author, title, description, publishedAt)
base_df8$source <- base_df8$source$name
base_df7 <- data7$articles
base_df7 <- select(base_df7, source, author, title, description, publishedAt)
base_df7$source <- base_df7$source$name
base_df6 <- data6$articles
base_df6 <- select(base_df6, source, author, title, description, publishedAt)
base_df6$source <- base_df6$source$name
base_df5 <- data5$articles
base_df5 <- select(base_df5, source, author, title, description, publishedAt)
base_df5$source <- base_df5$source$name
base_df4 <- data4$articles
base_df4 <- select(base_df4, source, author, title, description, publishedAt)
base_df4$source <- base_df4$source$name
base_df3 <- data3$articles
base_df3 <- select(base_df3, source, author, title, description, publishedAt)
base_df3$source <- base_df3$source$name
base_df2 <- data2$articles
base_df2 <- select(base_df2, source, author, title, description, publishedAt)
base_df2$source <- base_df2$source$name
base_df1 <- data1$articles
base_df1 <- select(base_df1, source, author, title, description, publishedAt)
base_df1$source <- base_df1$source$name
base_df38 <- data38$articles
base_df38 <- select(base_df38, source, author, title, description, publishedAt)
base_df38$source <- base_df38$source$name
base_df37 <- data37$articles
base_df37 <- select(base_df37, source, author, title, description, publishedAt)
base_df37$source <- base_df37$source$name
base_df35 <- data35$articles
base_df35 <- select(base_df35, source, author, title, description, publishedAt)
base_df35$source <- base_df35$source$name
base_df34 <- data34$articles
base_df34 <- select(base_df34, source, author, title, description, publishedAt)
base_df34$source <- base_df34$source$name
base_df33 <- data33$articles
base_df33 <- select(base_df33, source, author, title, description, publishedAt)
base_df33$source <- base_df33$source$name
base_df32 <- data32$articles
base_df32 <- select(base_df32, source, author, title, description, publishedAt)
base_df32$source <- base_df32$source$name
base_df36 <- data36$articles
base_df36 <- select(base_df36, source, author, title, description, publishedAt)
base_df36$source <- base_df36$source$name

# Combine all data frames into a list
dataframes <- list(base_df1, base_df2, base_df3, base_df4, base_df5, base_df6, base_df7, base_df8, base_df9, base_df10,
                   base_df11, base_df12, base_df13, base_df14, base_df15, base_df16, base_df17, base_df18, base_df19, base_df20,
                   base_df21, base_df22, base_df23, base_df24, base_df25, base_df26, base_df27, base_df28, base_df29, base_df30,
                   base_df31, base_df32, base_df33, base_df34, base_df35, base_df36, base_df37, base_df38, base_df36)

# Modify the 'source' column and select desired columns
for (i in 1:length(dataframes)) {
  dataframes[[i]] <- dataframes[[i]][, c("source", "author", "title", "description", "publishedAt")]
  names(dataframes[[i]])[1] <- "source"  # Rename the first column to 'source'
}

# Combine all data frames into one
combined_df <- do.call(rbind, dataframes)
head(combined_df, 20)

##                     source                            author
## 1      The Washington Post                    Nicolás Rivero
## 2  The Wall Street Journal                Sarah E. Needleman
## 3              WCVB Boston              Jamy Pombo Sesselman
## 4                     CNBC            Pia Singh, Brian Evans
## 5              Google News                      Fox Business
## 6                TheStreet                         TheStreet
## 7      Yahoo Entertainment                           Reuters
## 8              Google News                              CNBC
## 9      Yahoo Entertainment                           Reuters
## 10                 Reuters                           Reuters
## 11                    CNBC                      Amelia Lucas
## 12   Efinancialcareers.com                     Sarah Butcher
## 13     Yahoo Entertainment                      Josh Schafer
## 14               [Removed]                              <NA>
## 15        Business Insider                     George Glover
## 16               The Verge                       Umar Shakir
## 17             Google News                The New York Times
## 18                    CNBC                    CNBC US Source
## 19      Detroit Free Press             Kristen Jordan Shamus
## 20                    CNBC Gabrielle Fonrouge, Brandon Gomez
##                                                                                                                                title
## 1                                            The world's biggest plant to pull carbon from the sky just opened - The Washington Post
## 2                                                           Roblox Stock Tumbles on Disappointing Guidance - The Wall Street Journal
## 3                                          Paychecks delayed for Steward Health Care employees after bankruptcy filing - WCVB Boston
## 4                                    Dow jumps more than 150 points, on track for seventh straight positive day: Live updates - CNBC
## 5                                                                            GM ending production of Chevrolet Malibu - Fox Business
## 6                                                         Will stocks set record gains during a Fed interest rate pause? - TheStreet
## 7                                                               Airbnb slumps as gloomy forecast fans slowdown fears - Yahoo Finance
## 8                               CFPB rule to save Americans $10 billion a year in late fees faces possible last-minute freeze - CNBC
## 9                                                      Arm shares fall as tepid forecast takes shine off AI optimism - Yahoo Finance
## 10                                               Wall St muted as weak earnings overshadow signs of softening labor market - Reuters
## 11                                    McDonald's is betting on its mobile business with new franchisee digital marketing fund - CNBC
## 12                                                   Bank of America says Leo Lukenas didn't work 120-hour weeks - eFinancialCareers
## 13                                                 Recession-proof stocks are leading the market's latest leg higher - Yahoo Finance
## 14                                                                                                                         [Removed]
## 15 Donald Trump spends day off from his hush-money trial attacking Biden's crypto knowledge and pushing mugshot NFTs - Yahoo! Voices
## 16                                           TikTok is adding an “AI-generated” label to watermarked third-party content - The Verge
## 17                                Activist Ancora Wins Three Norfolk Southern Board Seats but Will Not Oust CEO - The New York Times
## 18                                                            Weekly jobless claims jump to 231,000, the highest since August - CNBC
## 19                                            Ascension hospitals hit by cyberattack; investigating data breach - Detroit Free Press
## 20                                  Fitness startup that Peloton once tried to buy is growing as the pandemic darling shrinks - CNBC
##                                                                                                                                                                       description
## 1                                                     Experts say we’ll need carbon capture to offset stubborn emissions that can’t easily be cut with existing green technology.
## 2  Investors watch as more earnings come in. Follow along for live updates on stocks and other markets, including the Dow Jones Industrial Average, S&P 500 and Nasdaq Composite.
## 3                                                               The president of the company explained the reason for the delay and has assured employees that they will be paid.
## 4                          Stocks were little changed as traders weighed disappointing earnings against new data that gave hope of easier Federal Reserve policy later this year.
## 5                                                                                                                                                                            <NA>
## 6                                                                                                                                                                            <NA>
## 7                    The Easter holiday occurring in the first quarter rather than the second and currency-exchange impacts were partly to blame for Airbnb projecting current...
## 8                                                                                                                                                                            <NA>
## 9                Bets that Arm will benefit from a surge in AI computing have nearly doubled the chipmaker's share price since its initial public offer last September, giving...
## 10                                                                                                                                                                           <NA>
## 11                                                                                  McDonald's has previously said it wants to reach 100 million loyalty program members by 2027.
## 12                                                                                             Bank of America's records do not suggest that Leo Lukenas worked 120 hours a week.
## 13                                                                          The utilities and consumer staples sectors have popped since mid-April as investors search for value.
## 14                                                                                                                                                                      [Removed]
## 15                                                Donald Trump hosted supporters who bought NFTs from his "Mugshot Edition" collection at a dinner in Mar-a-Lago, Axios reported.
## 16                                    TikTok is the first social media platform to automatically label AI-generated content that contains Adobe’s “Content Credentials” metadata.
## 17                                                                                                                                                                           <NA>
## 18                                                                                                                                                                           <NA>
## 19                           "It's affecting everything," a doctor said when computer networks shut down at 15 Ascension Michigan hospitals amid a national cybersecurity breach.
## 20                                                Connected rowing company Hydrow is growing and Peloton is shrinking as fitness buffs move away from cardio in favor of weights.
##             publishedAt
## 1  2024-05-09T16:42:00Z
## 2  2024-05-09T16:26:00Z
## 3  2024-05-09T16:12:00Z
## 4  2024-05-09T16:11:00Z
## 5  2024-05-09T15:14:00Z
## 6  2024-05-09T14:33:00Z
## 7  2024-05-09T14:13:04Z
## 8  2024-05-09T14:08:55Z
## 9  2024-05-09T14:08:26Z
## 10 2024-05-09T14:07:00Z
## 11 2024-05-09T14:00:01Z
## 12 2024-05-09T13:53:54Z
## 13 2024-05-09T13:51:38Z
## 14 1970-01-01T00:00:00Z
## 15 2024-05-09T13:38:47Z
## 16 2024-05-09T13:31:05Z
## 17 2024-05-09T13:21:23Z
## 18 2024-05-09T12:32:47Z
## 19 2024-05-09T12:30:41Z
## 20 2024-05-09T12:30:01Z

# Export the combined_df to a CSV file
write.csv(combined_df, file = "combined_df_final_project.csv", row.names = FALSE)

final_project_607_API_call

2024-05-10

Objective: Create a data frame with as many articles reasonably possible as permitted by the API.

Finished data frame, albeit small, that will allow analysis for sentiment.