mtcars %>%
filter(cyl == 4) %>%
select(mpg, cyl)
## mpg cyl
## Datsun 710 22.8 4
## Merc 240D 24.4 4
## Merc 230 22.8 4
## Fiat 128 32.4 4
## Honda Civic 30.4 4
## Toyota Corolla 33.9 4
## Toyota Corona 21.5 4
## Fiat X1-9 27.3 4
## Porsche 914-2 26.0 4
## Lotus Europa 30.4 4
## Volvo 142E 21.4 4
We can make each car be shown in one line with this code
mtcars %>%
filter(cyl == 4) %>%
group_by(mpg, cyl) %>%
nest()
## # A tibble: 9 × 3
## # Groups: mpg, cyl [9]
## mpg cyl data
## <dbl> <dbl> <list>
## 1 22.8 4 <tibble [2 × 9]>
## 2 24.4 4 <tibble [1 × 9]>
## 3 32.4 4 <tibble [1 × 9]>
## 4 30.4 4 <tibble [2 × 9]>
## 5 33.9 4 <tibble [1 × 9]>
## 6 21.5 4 <tibble [1 × 9]>
## 7 27.3 4 <tibble [1 × 9]>
## 8 26 4 <tibble [1 × 9]>
## 9 21.4 4 <tibble [1 × 9]>
url <- "https://en.wikipedia.org/wiki/Asheville,_North_Carolina"
kept_columns <- c("Record high °F (°C)","Record low °F (°C)")
webpage <- read_html(url)
tbls <- html_nodes(webpage, "table")
tbls[grep("Climate data",tbls,ignore.case = T)]
## {xml_nodeset (1)}
## [1] <table class="wikitable mw-collapsible" style="width:auto; text-align:cen ...
df <- html_table(tbls[grep("Climate data",tbls,ignore.case = T)],fill = T)[[1]]
names(df) <- df[1,]
## Warning: The `value` argument of `names<-` must be a character vector as of
## tibble 3.0.0.
names(df)[1] <- "Measurement"
df <- df %>%
melt(id.vars = c("Measurement")) %>%
filter(Measurement %in% kept_columns)
df$value <- gsub('(\\(.*\\))','',df$value)
df$Measurement <- trimws(gsub('(\\(.*\\))','',df$Measurement))
df$value <- gsub('−','-',df$value)
df$value <- as.numeric(as.character(df$value))
nc_climate <- ggplot(df, aes(variable, value)) +
geom_line(aes(variable, value)) +
geom_point(aes(colour = Measurement),size = 5)
nc_climate <- nc_climate + theme_fivethirtyeight() +
scale_y_continuous(breaks = seq(-60,130,10)) +
labs(title = "Asheville Temperatures", subtitle = url)
nc_climate
This is a cool graph to look and is a much better visual than the climate table shown on the wiki. What is interesting to note is that Asheville can reach the negatives, but many people don’t take it for that cold of a place. It is a good mild area with four full seasons based in part on this graph.
HELPfull %>%
filter(ID <= 3) %>%
select(ID, TIME, DRUGRISK, SEXRISK)
## ID TIME DRUGRISK SEXRISK
## 1 1 0 0 4
## 2 1 6 0 1
## 3 1 18 0 1
## 4 1 24 0 3
## 5 2 0 0 7
## 6 2 6 0 0
## 7 3 0 20 2
## 8 3 6 13 4
## 9 3 24 19 4
Measurements were only taken at time periods of (0), (6), and (24) for subject 3.
HELPfull_wide <- HELPfull %>%
pivot_wider(names_from = TIME, ID, values_from = c(DRUGRISK, SEXRISK)) %>%
filter(ID <= 3) %>%
group_by(ID) %>%
select(ID, DRUGRISK_0, DRUGRISK_6, SEXRISK_0, SEXRISK_6)
HELPfull_wide
## # A tibble: 3 × 5
## # Groups: ID [3]
## ID DRUGRISK_0 DRUGRISK_6 SEXRISK_0 SEXRISK_6
## <int> <int> <int> <int> <int>
## 1 1 0 0 4 1
## 2 2 0 0 7 0
## 3 3 20 13 2 4
HELPfull_partd <- HELPfull %>%
pivot_wider(names_from = TIME, ID, values_from = c(DRUGRISK, SEXRISK)) %>%
group_by(ID) %>%
select(ID, DRUGRISK_0, DRUGRISK_6, SEXRISK_0, SEXRISK_6)
cor(x=HELPfull_partd$DRUGRISK_0, y=HELPfull_partd$DRUGRISK_6, use = "complete.obs", method = "pearson")
## [1] 0.5991146
cor(x=HELPfull_partd$SEXRISK_0, y=HELPfull_partd$SEXRISK_6, use = "complete.obs", method = "pearson")
## [1] 0.5076615
ds1 <- data.frame(id = c(1,2,3,1,2,3),
group = c("T", "T", "T", "C", "C", "C"),
vals = c(4, 6, 8, 5, 6, 10))
ds1
## id group vals
## 1 1 T 4
## 2 2 T 6
## 3 3 T 8
## 4 1 C 5
## 5 2 C 6
## 6 3 C 10
Treat <- filter(ds1, group == "T")
Control <- filter(ds1, group == "C")
all <- mutate(Treat, diff = Treat$vals - Control$vals)
all
## id group vals diff
## 1 1 T 4 -1
## 2 2 T 6 0
## 3 3 T 8 -2
This code works and creates the values of -1, 0, -2. If there are NA’s in the dataset then you cannot calculate this without providing a function to remove those values.
ds1_wider <- ds1 %>%
pivot_wider(names_from = group, values_from = vals) %>%
mutate(diff = T-C)
ds1_wider
## # A tibble: 3 × 4
## id T C diff
## <dbl> <dbl> <dbl> <dbl>
## 1 1 4 5 -1
## 2 2 6 6 0
## 3 3 8 10 -2
count_seasons <- function(team) {
Teams %>%
filter(teamID == team) %>%
nrow()
}
Teams %>%
gather(key=type, value = home_runs, HR, HRA) %>%
filter(teamID == "CHN") %>%
select(yearID, type, home_runs) %>%
mutate(type= recode(type, "HR" = "hit", "HRA" = "allowed")) %>%
ggplot(aes(x=yearID, y=home_runs)) +
geom_line(aes(color = type)) +
theme(legend.position="top")