Source file ⇒ Assignment_3.Rmd
5.4
ggplot(CPS85, aes(exper,wage)) +
geom_point(aes(alpha = married)) +
facet_wrap(~sector, ncol = 4) +
scale_x_log10() +
scale_y_log10()
6.3
Star glyph
* x-position
* y-poition
* number of stars (0, 1, or 2)
Range/Error bar
* x-position - protein
* y-poition of center dot/value
* color - polarity
* label - protein name
* length of bars
The x-axis is the categorical variable “protein”. The y-axis is labelled “cell density”.
Color is not an attribute of **.
Guides: There are tick marks on the y-axis. There is no guide directly on the x-axis but the protein names act as a guide for he x-axis
6.6
6.8
Small <-
NCHS %>%
sample_n(size=5000)
ggplot(Small, aes(bmi,weight)) +
geom_point(aes(alpha = smoker))
6.2
7.2
diamonds %>%
group_by(color) %>%
summarise(avg = mean(carat)) %>%
arrange(desc(avg)) %>%
head(1)
## Source: local data frame [1 x 2]
##
## color avg
## (fctr) (dbl)
## 1 J 1.162137
J seems to be largest on average.diamonds %>%
group_by(clarity) %>%
summarise(tablesPerCarat = mean(table/carat)) %>%
arrange(desc(tablesPerCarat)) %>%
head(1)
## Source: local data frame [1 x 2]
##
## clarity tablesPerCarat
## (fctr) (dbl)
## 1 VVS1 141.4822
VVS1 seems to have the largest “table” per carat.7.4
The column “first” does not exist in BabyNames.
Should be written as:
Tmp <- group_by( BabyNames, year, sex ) %>%
summarise(totalBirths=mean(count))
c)Should be written as:
Tmp <- group_by(BabyNames, year, sex)
summarise(Tmp, totalBirths=sum(count))
## Source: local data frame [268 x 3]
## Groups: year [?]
##
## year sex totalBirths
## (int) (chr) (int)
## 1 1880 F 90993
## 2 1880 M 110491
## 3 1881 F 91954
## 4 1881 M 100746
## 5 1882 F 107850
## 6 1882 M 113687
## 7 1883 F 112322
## 8 1883 M 104630
## 9 1884 F 129022
## 10 1884 M 114446
## .. ... ... ...
7.5
data("BabyNames")
BabyNames %>%
arrange(sex,count) %>%
head()
## name sex count year
## 1 Adelle F 5 1880
## 2 Adina F 5 1880
## 3 Adrienne F 5 1880
## 4 Albertine F 5 1880
## 5 Alys F 5 1880
## 6 Ana F 5 1880
BabyNames %>%
filter(sex =="F") %>%
head()
## name sex count year
## 1 Mary F 7065 1880
## 2 Anna F 2604 1880
## 3 Emma F 2003 1880
## 4 Elizabeth F 1939 1880
## 5 Minnie F 1746 1880
## 6 Margaret F 1578 1880
BabyNames %>%
filter(sex == "M", count > 10) %>%
head()
## name sex count year
## 1 John M 9655 1880
## 2 William M 9532 1880
## 3 James M 5927 1880
## 4 Charles M 5348 1880
## 5 George M 5126 1880
## 6 Frank M 3242 1880
BabyNames %>%
summarise(total = sum(count)) %>%
head()
## total
## 1 333417770
BabyNames %>%
select(name, count) %>%
head()
## name count
## 1 Mary 7065
## 2 Anna 2604
## 3 Emma 2003
## 4 Elizabeth 1939
## 5 Minnie 1746
## 6 Margaret 1578
7.6
data("Minneapolis2013")
There are 80101 cases.
Minneapolis2013 %>%
group_by(Second) %>%
summarise(count = n()) %>%
rename(candidate = Second) %>%
arrange(desc(count)) %>%
head(5)
## Source: local data frame [5 x 2]
##
## candidate count
## (chr) (int)
## 1 BETSY HODGES 14399
## 2 DON SAMUELS 14170
## 3 MARK ANDREW 12757
## 4 undervote 10598
## 5 JACKIE CHERRYHOMES 6470
Minneapolis2013 %>%
group_by(First) %>%
filter(First == "undervote") %>%
summarise(count=n())
## Source: local data frame [1 x 2]
##
## First count
## (chr) (int)
## 1 undervote 834
Minneapolis2013 %>%
group_by(Second) %>%
filter(Second == "undervote") %>%
summarise(count=n())
## Source: local data frame [1 x 2]
##
## Second count
## (chr) (int)
## 1 undervote 10598
Minneapolis2013 %>%
group_by(Third) %>%
filter(Third == "undervote") %>%
summarise(count=n())
## Source: local data frame [1 x 2]
##
## Third count
## (chr) (int)
## 1 undervote 19210
8.1
ggplot()
geom_point(), geom_segment, geom_histogram
ylab()
facet_grid() , facet_grid()
xlim(), scale_y_log10()
8.2
frame <- CPS85 %>%
ggplot(aes(x=age,y=wage))
frame +
geom_point(aes(shape=married)) +
facet_wrap(~sector) +
ylim(0,30) +
theme(legend.position = "top")
## Warning: Removed 1 rows containing missing values (geom_point).
frame <- CPS85 %>%
ggplot(aes(x=age, y=wage))
frame +
geom_point() +
facet_grid(sex~married) +
ylim(0,40)
## Warning: Removed 1 rows containing missing values (geom_point).