library(robotstxt)
## Warning: package 'robotstxt' was built under R version 4.4.2
library(rvest)
## Warning: package 'rvest' was built under R version 4.4.2
library(quantmod)
## Warning: package 'quantmod' was built under R version 4.4.2
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.4.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.4.2
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
getSymbols("MSFT", src = "yahoo", from = "2024-10-01", to = "2025-01-31")
## [1] "MSFT"
head(MSFT)  
##            MSFT.Open MSFT.High MSFT.Low MSFT.Close MSFT.Volume MSFT.Adjusted
## 2024-10-01    428.45    428.48   418.81     420.69    19092900      419.8496
## 2024-10-02    422.58    422.82   416.71     417.13    16582300      416.2967
## 2024-10-03    417.63    419.55   414.29     416.54    13686400      415.7079
## 2024-10-04    418.24    419.75   414.97     416.06    19169700      415.2289
## 2024-10-07    416.00    417.11   409.00     409.54    20919800      408.7219
## 2024-10-08    410.90    415.66   408.17     414.71    19229300      413.8816
num_days <- nrow(MSFT)  
print(paste("Number of stock prices pulled:", num_days))
## [1] "Number of stock prices pulled: 83"
library(pageviews)
## Warning: package 'pageviews' was built under R version 4.4.2
microsoft_views <- article_pageviews(
  project = "en.wikipedia",   
  article = "Microsoft",      
  start = "2024100100",         
  end = "2025013100",           
  granularity = "daily"       
)
high_view_days <- subset(microsoft_views, views > 30000)
num_high_view_days <- nrow(high_view_days)
print(paste("Number of days with Wikipedia views over 30,000:", num_high_view_days))
## [1] "Number of days with Wikipedia views over 30,000: 1"
library(dplyr)
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
url <- "https://www.espn.com/nba/stats/player/_/season/2025/seasontype/2"
page <- read_html(url)


stats_table <- page %>%
  html_node("table") %>%
  html_table(fill = TRUE)