- Data activity (10 min)
- Coding! Coding! Coding! (50 min)
- Break (5 min)
- Discuss readings (10 min)
- More coding! (40 min)
- Introduce Problem Set (Remainder)
2023-07-06
print.data.frame(groups)
## group 1 group 2 group 3 ## 1 Su, Barry Gnanam, Akash Y Gupta, Umang ## 2 Ng, Michelle Premkrishna, Shrish Saccone, Alexander Connor ## 3 Crawford, John Alexander Tian, Zerui Jun, Ernest Ng Wei ## 4 Knutson, Blue C Albertini, Federico ## group 4 group 5 group 6 ## 1 Andrew Yu Ming Xin, Dotson, Bianca Ciara ## 2 Widodo, Ignazio Marco Wan Rosli, Nadia Spindler, Laine Addison ## 3 Alsayegh, Aisha E H M I Cai, Qingyuan ## 4 Ning, Zhi Yan Tan, Zheng Yang Leong, Wen Hou Lester ## group 7 ## 1 Lim, Fang Jan ## 2 Huynh Le Hue Tam, Vivian ## 3 Shah, Jainam ## 4 Cortez, Hugo Alexander
message = FALSE removes messages from displaywarning = FALSE removes warnings from display{r example1}library(dplyr)
## ## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats': ## ## filter, lag
## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union
message = FALSE removes messages from displaywarning = FALSE removes warnings from display{r example2, message = FALSE}library(dplyr)
message = FALSE removes messages from displaywarning = FALSE removes warnings from display##################################################### ## title: a new R script! ## author: you! ## purpose: to try out R ## date: today's date ##################################################### # you can start coding below
install.packages("dplyr")
library(dplyr)
# create 3x3 matrix mat <- matrix(c(1,2,3, 1,2,3, 1,2,3), nrow = 3) # look at matrix mat
## [,1] [,2] [,3] ## [1,] 1 1 1 ## [2,] 2 2 2 ## [3,] 3 3 3
# create dataframe df <- as.data.frame(mat) # look at dataframe df
## V1 V2 V3 ## 1 1 1 1 ## 2 2 2 2 ## 3 3 3 3
# add character variable
df$V4 <- c("one", "two", "three")
# look at our dataframe
df
## V1 V2 V3 V4 ## 1 1 1 1 one ## 2 2 2 2 two ## 3 3 3 3 three
-We’ll now create our own dataframe
# create a dataframe (note that : returns a sequence between the numbers, by 1) df <- data.frame(year = 2000:2020, temp = 40:60) # look at the first 6 rows head(df)
## year temp ## 1 2000 40 ## 2 2001 41 ## 3 2002 42 ## 4 2003 43 ## 5 2004 44 ## 6 2005 45
# create a dataframe (note that : returns a sequence between the numbers, by 1) df <- data.frame(year = 2000:2020, temp = 40:60)
$)df$month <- "july", we would write df %>% mutate(month = "july")%>% doesfourth_function(third_function(second_function(first_function(x))))
So instead, we opt for the dplyr method, which is written as follows:
x %>% first_function() %>% second_function() %>% third_function() %>% fourth_function()
df$month <- "july", we would write df %>% mutate(month = "july"))%>%, what would be another way to write df %>% mutate(month = "july")?%>% puts whatever precedes it into the first argument of the function that follows itdf %>% mutate(month = "july") is equivalent to mutate(df, month = "july")# first, let's write a simple function
add_2 <- function(x){
return(x+2)
}
# check that it is working
add_2(5)
## [1] 7
# now try the pipe method 5 %>% add_2()
## [1] 7
# try adding 2, twice 5 %>% add_2() %>% add_2()
## [1] 9
mutate is a function we will use a lot, which allows us to create and modify variables in a dataframeAnd now, returning to the dataframe from earlier:
# look at the first 6 rows head(df)
## year temp ## 1 2000 40 ## 2 2001 41 ## 3 2002 42 ## 4 2003 43 ## 5 2004 44 ## 6 2005 45
library(dplyr)
# function to get from C to F
c_to_f <- function(c){
f <- 9/5*c+32
return(f)
}
# run function on temperature variable to create new variable
df %>%
mutate(temp_f = c_to_f(temp)) %>%
head()
## year temp temp_f ## 1 2000 40 104.0 ## 2 2001 41 105.8 ## 3 2002 42 107.6 ## 4 2003 43 109.4 ## 5 2004 44 111.2 ## 6 2005 45 113.0
temp_f! What happened?# take a look at the first 6 rows, again head(df)
## year temp ## 1 2000 40 ## 2 2001 41 ## 3 2002 42 ## 4 2003 43 ## 5 2004 44 ## 6 2005 45
df <- df %>% mutate(temp_f = c_to_f(temp))df$temp_f <- c_to_f(df$temp) would also work%<>%df %<>% mutate(temp_f = c_to_f(temp)) is equivalent to those abovegutenbergr R package#install.packages("gutenbergr")
library(gutenbergr)
# download the book - notice that the number is taken from the gutenberg website
vanishing_wl <- gutenberg_download(c(13249), meta_fields = "title")
gutenberg_download() with the E-book numbergtrendsR packagelibrary(gtrendsR)
hur_wf <- gtrends(c("wildfire", "hurricane"),
geo = c("US"))
plot())?gtrendsr for more information on the function.guardianapilibrary(guardianapi)
gu_api_key() and enter your API Keyca_wf <- gu_content('"Canada" AND "wildfire" AND "smoke" AND "air quality" AND "New York City"',
from_date = "2023-06-01")
select() functionrvest package to help us scrape Wikipedia datalibrary(rvest)
# read in html
us_disasters <- read_html("https://en.wikipedia.org/wiki/List_of_natural_disasters_in_the_United_States")
# take a look
us_disasters
## {html_document}
## <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-enabled vector-feature-main-menu-pinned-disabled vector-feature-limited-width-enabled vector-feature-limited-width-content-enabled vector-feature-zebra-design-disabled" lang="en" dir="ltr">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
## [2] <body class="skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr ...
## Year Disaster Death.toll Damage.costUS. ## 1 2023 Tornado outbreak 33 $4.3 billion ## 2 2023 Tornado outbreak 25 $1.9 billion ## 3 2023 Flooding and Tornado outbreak 13 $4.5 billion ## 4 2023 Derecho, Tornado outbreak and Winter storm 14 ## 5 2022 Winter storm 106 $5.4 billion ## 6 2022 Earthquake 2 ## Main.article ## 1 Tornado outbreak of March 31 – April 1, 2023 ## 2 Tornado outbreak of March 24–27, 2023 ## 3 Early-March 2023 North American storm complex ## 4 February 2023 North American storm complex ## 5 December 2022 North American winter storm ## 6 2022 Ferndale earthquake ## Location ## 1 Southern United States, Midwestern United States ## 2 Southern United States ## 3 Southwestern United States, Southeastern United States ## 4 Western United States, Southern United States and Midwestern United States ## 5 Western United States, Midwestern United States, Great Lakes region (especially the Buffalo-Niagara Falls metropolitan area), Canada ## 6 North Coast, California, United States ## Notes ## 1 ## 2 ## 3 ## 4 ## 5 ## 6