library(robotstxt)
## Warning: package 'robotstxt' was built under R version 4.4.2
library(rvest)
## Warning: package 'rvest' was built under R version 4.4.2
library(quantmod)
## Warning: package 'quantmod' was built under R version 4.4.2
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.4.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.4.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
getSymbols("MSFT", src = "yahoo", from = "2024-10-01", to = "2025-01-31")
## [1] "MSFT"
head(MSFT)
## MSFT.Open MSFT.High MSFT.Low MSFT.Close MSFT.Volume MSFT.Adjusted
## 2024-10-01 428.45 428.48 418.81 420.69 19092900 419.8496
## 2024-10-02 422.58 422.82 416.71 417.13 16582300 416.2967
## 2024-10-03 417.63 419.55 414.29 416.54 13686400 415.7079
## 2024-10-04 418.24 419.75 414.97 416.06 19169700 415.2289
## 2024-10-07 416.00 417.11 409.00 409.54 20919800 408.7219
## 2024-10-08 410.90 415.66 408.17 414.71 19229300 413.8816
num_days <- nrow(MSFT)
print(paste("Number of stock prices pulled:", num_days))
## [1] "Number of stock prices pulled: 83"
library(pageviews)
## Warning: package 'pageviews' was built under R version 4.4.2
microsoft_views <- article_pageviews(
project = "en.wikipedia",
article = "Microsoft",
start = "2024100100",
end = "2025013100",
granularity = "daily"
)
high_view_days <- subset(microsoft_views, views > 30000)
num_high_view_days <- nrow(high_view_days)
print(paste("Number of days with Wikipedia views over 30,000:", num_high_view_days))
## [1] "Number of days with Wikipedia views over 30,000: 1"
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
url <- "https://www.espn.com/nba/stats/player/_/season/2025/seasontype/2"
page <- read_html(url)
stats_table <- page %>%
html_node("table") %>%
html_table(fill = TRUE)