library(Sleuth3)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.6
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data(case1302)
head(case1302)
Units : platoons
Variables: Company, Treat and Score
library(Sleuth3)
library(tidyverse)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
case1302 %>%
group_by(Company,Treat) %>% #calculate the mean Score of Company and Treat
summarize(average_Score = mean(Score, na.rm = TRUE), n= n())
## `summarise()` has grouped output by 'Company'. You can override using the `.groups` argument.
library(ggthemes)
case1302 %>%
group_by(Company,Treat) %>%
summarize(mean_Score = mean(Score, na.rm = TRUE), n = n()) ->
sumdf
## `summarise()` has grouped output by 'Company'. You can override using the `.groups` argument.
head(sumdf)
sumdf %>%
ggplot(aes(x = Company, y = mean_Score)) +
theme_bw() +
geom_point(aes(size = n, color = Treat)) +
scale_color_colorblind(name = "Treatment") +
scale_size(name = "Number of \nObersavtions", range = c(2,4), breaks = c(1,2)) +
guides("Treatment", "Number of Obersavtions") +
ylab("Mean Score") +
guides(color = guide_legend(order=1),size = guide_legend(order=2))
## Warning: Duplicated aesthetics after name standardisation: NA
library(ggthemes)
case1302 %>%
group_by(Company,Treat) %>%
summarize(mean_Score = mean(Score, na.rm = TRUE), n = n()) %>%
ggplot(aes(x = Company, y = mean_Score, size = n, color = Treat)) +
theme_bw() +
geom_point() +
scale_color_colorblind(name = "Treatment") +
scale_size(name = "Number of \nObersavtions", range = c(2,4), breaks = c(1,2)) +
guides("Treatment", "Number of Obersavtions") +
ylab("Mean Score") +
guides(color = guide_legend(order=1),size = guide_legend(order=2))
## `summarise()` has grouped output by 'Company'. You can override using the `.groups` argument.
## Warning: Duplicated aesthetics after name standardisation: NA
I suggest we cannot add a geom_smooth line. Although we added the function, the spot of this chart is too separate. In this case, it is not make sense to add a loess smoother to the above plot.
real ans: because a loess smoother would only make sense if the explanatory variable was also quantitative, but it is categorical
• For this exercise, we’ll use the midwest data from ggplot2.
data(midwest)
head(midwest)
Ans: counties
midwest %>%
group_by(state) %>%
mutate(sum_poptotal = sum(poptotal, na.rm = TRUE))%>%
ggplot(aes(x = state, y = sum_poptotal)) +
geom_col()
midwest %>%
group_by(state) %>%
mutate(sum_poptotal = sum(poptotal, na.rm = TRUE)) %>%
ggplot(aes(x = state, y = sum_poptotal))+
theme_economist_white()+
geom_col()
midwest %>%
filter(poptotal > quantile(poptotal, probs = 0.1)) %>%
ggplot( mapping = aes(x = popdensity, y = percollege, color = state)) +
geom_point(size = 1) +
geom_smooth(method = "lm", se = FALSE, linetype = "dotted", color = "black")+
theme_bw() +
scale_x_log10() +
scale_y_log10()
## `geom_smooth()` using formula 'y ~ x'
midwest %>%
mutate(percadults = popadults / poptotal) %>%
arrange(desc(percadults)) %>%
ggplot(mapping = aes(x = state, y = percadults, fill = state)) +
theme_bw() +
geom_boxplot() +
ylab("Percentage of Adults") +
xlab("States of midwest")
unique(midwest$state)
## [1] "IL" "IN" "MI" "OH" "WI"
midwest %>%
mutate(state = recode(state, IL = "Illinois", IN = "Indiana", MI = "Michigan", OH = "Ohio", WI = "Wisconsin")) %>%
tail()
lsumfun <- function(x){
stopifnot(is.numeric(x))
if (length(x) %% 2 == 0){
return(sum(x[x], na.rm = TRUE))
} else {
return(sum(x[x %% 2 == 0], na.rm = TRUE))
}
}
lsumfun(c(1, 2, 3, NA))
## [1] 6
lsumfun(c(1, 2, 3))
## [1] 2
lsumfun(c(2, 3, 4, 5, 6))
## [1] 12
lsumfun(c(2, 3, 4, 5, NA))
## [1] 6
lsumfun(c(2, 3, 4, NA, 6))
## [1] 12
• The year can be evenly divided by 4, is a leap year, unless:
• The year can be evenly divided by 100, it is NOT a leap year, unless:
• The year is also evenly divisible by 400. Then it is a leap year.
This means that in the Gregorian calendar, the years 2000 and 2400 are leap years, while 1800, 1900, 2100, 2200, 2300 and 2500 are NOT leap years
Write a function that takes the year as input and returns TRUE if it is a leap year and FALSE if it is not a leap year. Evaluate your function at 2, 12, 200, 800.
Gregorian_calendar <- function(y){
if (y %% 400 == 0){return(" TRUE")}
else if (y %% 100 ==0){return(" FALSE")}
else if (y %% 4 ==0){return(" TRUE")}
else {return("FALSE")}
}
Gregorian_calendar(2)
## [1] "FALSE"
Gregorian_calendar(12)
## [1] " TRUE"
Gregorian_calendar(200)
## [1] " FALSE"
Gregorian_calendar(800)
## [1] " TRUE"
• Your vector should throw an error if either a or b contain repeated elements.
• If a number is in a and not b you add 1 to your score.
• If a number is in b and not a you subtract 1 from your score.
• If a number is in both a and b you do nothing to your score.
• The function returns the final score.
• Hint: help(“%in%”)
score2 <- function(a,b){
stopifnot(is.numeric(a))
stopifnot(is.numeric(b))
if(length(a) != length(unique(a))){
stop("a has some repeated elements ")
}else if(length(b) != length(unique(b))){
stop("b has some repeated elements")
}
return(sum(!(a %in% b)) - sum(!(b %in% a)))
}
score2(c(1, 2, 3), c(3, 4))
## [1] 1
score2(c(1, 2, 3), c(1, 2, 3, 4))
## [1] -1
score2(c(1, 2, 3), c(3, 4, 4))
## Error in score2(c(1, 2, 3), c(3, 4, 4)): b has some repeated elements