packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(robotstxt)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
library(nycflights13)
library(rvest)
library(ggplot2)
library(jsonlite)

check paths

webpage <- "https://en.wikipedia.org/wiki/List_of_current_United_States_governors"
paths_allowed(webpage)
##  en.wikipedia.org
## [1] TRUE

table from webpage

page <- read_html(webpage)
tbls <- html_elements(page, "table")
current_gov <- html_table(tbls[[2]])
current_gov
## # A tibble: 50 × 10
##    State              Image `Governor[14]`         `Party[14]` `Party[14]` Born 
##    <chr>              <lgl> <chr>                  <lgl>       <chr>       <chr>
##  1 Alabama (list)     NA    Kay Ivey               NA          Republican  (194…
##  2 Alaska (list)      NA    Mike Dunleavy          NA          Republican  (196…
##  3 Arizona (list)     NA    Katie Hobbs            NA          Democratic  (196…
##  4 Arkansas (list)    NA    Sarah Huckabee Sanders NA          Republican  (198…
##  5 California (list)  NA    Gavin Newsom           NA          Democratic  (196…
##  6 Colorado (list)    NA    Jared Polis            NA          Democratic  (197…
##  7 Connecticut (list) NA    Ned Lamont             NA          Democratic  (195…
##  8 Delaware (list)    NA    Matt Meyer             NA          Democratic  (197…
##  9 Florida (list)     NA    Ron DeSantis           NA          Republican  (197…
## 10 Georgia (list)     NA    Brian Kemp             NA          Republican  (196…
## # ℹ 40 more rows
## # ℹ 4 more variables: `Prior public experience[15]` <chr>,
## #   `Inauguration[14]` <chr>, `End of term[14]` <chr>, Ref. <chr>

empty columns

colSums(is.na(current_gov)) > 0
##                       State                       Image 
##                       FALSE                        TRUE 
##                Governor[14]                   Party[14] 
##                       FALSE                        TRUE 
##                   Party[14]                        Born 
##                       FALSE                       FALSE 
## Prior public experience[15]            Inauguration[14] 
##                       FALSE                       FALSE 
##             End of term[14]                        Ref. 
##                       FALSE                       FALSE

select and rename columns

new_current_gov <- current_gov %>%
  select(-Image, -`Party[14]`)
print(new_current_gov)
## # A tibble: 50 × 7
##    State          `Governor[14]` Born  Prior public experie…¹ `Inauguration[14]`
##    <chr>          <chr>          <chr> <chr>                  <chr>             
##  1 Alabama (list) Kay Ivey       (194… Lieutenant GovernorSt… April 10, 2017    
##  2 Alaska (list)  Mike Dunleavy  (196… Alaska Senate          December 3, 2018  
##  3 Arizona (list) Katie Hobbs    (196… Secretary of StateMin… January 2, 2023   
##  4 Arkansas (lis… Sarah Huckabe… (198… White House Press Sec… January 10, 2023  
##  5 California (l… Gavin Newsom   (196… Lieutenant GovernorMa… January 7, 2019   
##  6 Colorado (lis… Jared Polis    (197… U.S. HouseColorado St… January 8, 2019   
##  7 Connecticut (… Ned Lamont     (195… Chair of the State In… January 9, 2019   
##  8 Delaware (lis… Matt Meyer     (197… Executive of New Cast… January 21, 2025  
##  9 Florida (list) Ron DeSantis   (197… U.S. House             January 8, 2019   
## 10 Georgia (list) Brian Kemp     (196… Secretary of StateGeo… January 14, 2019  
## # ℹ 40 more rows
## # ℹ abbreviated name: ¹​`Prior public experience[15]`
## # ℹ 2 more variables: `End of term[14]` <chr>, Ref. <chr>
new_current_gov <- new_current_gov %>%
  rename('Governor' = 'Governor[14]', 
         'Prior public experience' = 'Prior public experience[15]', 
         'Inaguration' = 'Inauguration[14]', 
         'End of term' = 'End of term[14]')

print(new_current_gov)
## # A tibble: 50 × 7
##    State   Governor Born  Prior public experie…¹ Inaguration `End of term` Ref. 
##    <chr>   <chr>    <chr> <chr>                  <chr>       <chr>         <chr>
##  1 Alabam… Kay Ivey (194… Lieutenant GovernorSt… April 10, … 2027 (term l… [17] 
##  2 Alaska… Mike Du… (196… Alaska Senate          December 3… 2026 (term l… [18] 
##  3 Arizon… Katie H… (196… Secretary of StateMin… January 2,… 2027          [19] 
##  4 Arkans… Sarah H… (198… White House Press Sec… January 10… 2027          [20] 
##  5 Califo… Gavin N… (196… Lieutenant GovernorMa… January 7,… 2027 (term l… [21] 
##  6 Colora… Jared P… (197… U.S. HouseColorado St… January 8,… 2027 (term l… [22] 
##  7 Connec… Ned Lam… (195… Chair of the State In… January 9,… 2027          [23] 
##  8 Delawa… Matt Me… (197… Executive of New Cast… January 21… 2029          [24] 
##  9 Florid… Ron DeS… (197… U.S. House             January 8,… 2027 (term l… [25] 
## 10 Georgi… Brian K… (196… Secretary of StateGeo… January 14… 2027 (term l… [26] 
## # ℹ 40 more rows
## # ℹ abbreviated name: ¹​`Prior public experience`

gather, clean, read crypto data

data <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=XMR&tsym=USD&limit=150"
raw_data <- fromJSON(data)
xmr_data <- raw_data$Data$Data

xmr_data <- xmr_data %>%
  mutate(time = as.POSIXct(time, origin = "1970-01-01"))

min_open <- min(xmr_data$open, na.rm = TRUE)
min_time <- xmr_data$time[which.min(xmr_data$open)]

read data

data(airquality)
summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 

how many hot days

hot_days <- subset(airquality,c((Wind < 8)&(Temp > 90)))
nrow(hot_days)
## [1] 9

create plot

plot(airquality$Month, airquality$Temp,
    main = "Temp by Month",
    xlab = "Month",
    ylab = "Temperature")

total departure delays

flights %>%
  group_by(origin) %>%                        
  summarize(total_dep_delay = sum(dep_delay, na.rm = TRUE)) %>%  
  arrange(desc(total_dep_delay))
## # A tibble: 3 × 2
##   origin total_dep_delay
##   <chr>            <dbl>
## 1 EWR            1776635
## 2 JFK            1325264
## 3 LGA            1050301

total distance and # flights

flights %>%
  group_by(origin) %>%                           
  summarize(total_distance = sum(distance, na.rm = TRUE)) %>%  
  arrange(desc(total_distance)) 
## # A tibble: 3 × 2
##   origin total_distance
##   <chr>           <dbl>
## 1 JFK         140906931
## 2 EWR         127691515
## 3 LGA          81619161
flights %>%
  group_by(origin) %>%                  
  summarize(num_flights = n()) %>%       
  arrange(desc(num_flights)) 
## # A tibble: 3 × 2
##   origin num_flights
##   <chr>        <int>
## 1 EWR         120835
## 2 JFK         111279
## 3 LGA         104662