More information could check: https://www.safaribooksonline.com/library/view/r-graphics-cookbook/9781449363086/ch04.html ### uspopage
library(gcookbook)
?uspopage
str(uspopage)
## 'data.frame': 824 obs. of 3 variables:
## $ Year : int 1900 1900 1900 1900 1900 1900 1900 1900 1901 1901 ...
## $ AgeGroup : Factor w/ 8 levels "<5","5-14","15-24",..: 1 2 3 4 5 6 7 8 1 2 ...
## $ Thousands: int 9181 16966 14951 12161 9273 6437 4026 3099 9336 17158 ...
head(uspopage,10)
## Year AgeGroup Thousands
## 1 1900 <5 9181
## 2 1900 5-14 16966
## 3 1900 15-24 14951
## 4 1900 25-34 12161
## 5 1900 35-44 9273
## 6 1900 45-54 6437
## 7 1900 55-64 4026
## 8 1900 >64 3099
## 9 1901 <5 9336
## 10 1901 5-14 17158
Subset(dataframe,conditions),should use == in subset.
young<-subset(uspopage,AgeGroup=="<5")
head(young)
## Year AgeGroup Thousands
## 1 1900 <5 9181
## 9 1901 <5 9336
## 17 1902 <5 9502
## 25 1903 <5 9645
## 33 1904 <5 9791
## 41 1905 <5 9944
ggplot(young,aes(x=Year,y=Thousands)) ####fill by color install ggplot2 library ggplot2
library(ggplot2)
ggplot(young,aes(x=Year,y=Thousands))+
geom_area(fill="lightblue")+
geom_line()
ggplot(young,aes(x=Year,y=Thousands))+
geom_area(fill="lightblue",alpha=0.4)+
geom_line()
ggplot(young,aes(x=Year,y=Thousands))+
geom_area(fill="lightblue",alpha=0.4)+
geom_line()+
theme_bw()
library(gcookbook)
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup))+
geom_area()
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup))+
geom_area(alpha=0.2)
The picture above could compare the amount of each group. To change the order and compare each group, we could use “plyr”
library(plyr) ##to change the order and compare each group ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup,order=desc(AgeGroup)))+ geom_area()
library(plyr)
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup,order=desc(AgeGroup)))+
geom_area()
Make a proportional stacked area graph to fill the whole cubic by percentage changes. The advantage is to compare the growth of each group. To improve the plot to reorder the stack graph in ggplot2. To create new column for percentage by ddply() ddply(dataframe,column focus on years seperately,what to do,create new value)
head(uspopage)
## Year AgeGroup Thousands
## 1 1900 <5 9181
## 2 1900 5-14 16966
## 3 1900 15-24 14951
## 4 1900 25-34 12161
## 5 1900 35-44 9273
## 6 1900 45-54 6437
library(gcookbook) # For the data set
library(plyr) # For the ddply() function
data<-ddply(uspopage,"Year",transform,Percent=(Thousands/sum(Thousands))*100)
str(data)
## 'data.frame': 824 obs. of 4 variables:
## $ Year : int 1900 1900 1900 1900 1900 1900 1900 1900 1901 1901 ...
## $ AgeGroup : Factor w/ 8 levels "<5","5-14","15-24",..: 1 2 3 4 5 6 7 8 1 2 ...
## $ Thousands: int 9181 16966 14951 12161 9273 6437 4026 3099 9336 17158 ...
## $ Percent : num 12.1 22.3 19.6 16 12.2 ...
ggplot(data,aes(x=Year,y=Percent,fill=AgeGroup,order=desc(AgeGroup)))+
geom_area()
library(gcookbook)
library(ggplot2)
?heightweight()
str(heightweight)
## 'data.frame': 236 obs. of 5 variables:
## $ sex : Factor w/ 2 levels "f","m": 1 1 1 1 1 1 1 1 1 1 ...
## $ ageYear : num 11.9 12.9 12.8 13.4 15.9 ...
## $ ageMonth: int 143 155 153 161 191 171 185 142 160 140 ...
## $ heightIn: num 56.3 62.3 63.3 59 62.5 62.5 59 56.5 62 53.8 ...
## $ weightLb: num 85 105 108 92 112 ...
ggplot(heightweight,aes(x=ageYear,
y=heightIn))+
geom_point()
attach(heightweight)
cor(ageYear,heightIn)
## [1] 0.6518749
ggplot(heightweight,
aes(x=ageYear,
y=heightIn))+
geom_point(shape=4,size=2)
ggplot(heightweight,
aes(x=ageYear,
y=heightIn,
color=sex))+
geom_point()
We could divided weight. But the result is not clear. Normally we think darker color is heavier
ggplot(heightweight,
aes(x=ageYear,
y=heightIn,
color=weightLb))+
geom_point()
However, the default = heavier =lighter. Have to turn it over
ggplot(heightweight,
aes(x=ageYear,
y=heightIn,
color=weightLb))+
geom_point()+
scale_color_gradient(low="lightblue",high="darkred")
Change the size of the dots
ggplot(heightweight,
aes(x=ageYear,
y=heightIn,
color=weightLb,
size=weightLb))+
geom_point()+
scale_color_gradient(low="lightblue",high="darkblue")
If not differentiate dots by color, only by size. or print into white and black.
ggplot(heightweight,
aes(x=ageYear,
y=heightIn,
size=weightLb))+
geom_point()
Problem: when value of height is double, the size of dot is not. Should add a layer to fix it.
ggplot(heightweight,
aes(x=ageYear,
y=heightIn,
size=weightLb,
color=sex))+
geom_point(alpha=0.5)+
scale_size_area()
AS LINUR regression model by add a layer: +stat_smooth(method=lm)
ggplot(heightweight,
aes(x=ageYear,
y=heightIn))+
geom_point()+
stat_smooth(method=lm)
Add confidence
ggplot(heightweight,
aes(x=ageYear,
y=heightIn))+
geom_point()+
stat_smooth(method=lm,level=0.99)
No confidence
ggplot(heightweight,
aes(x=ageYear,
y=heightIn))+
geom_point()+
stat_smooth(method=lm,se=F)
The line become smooth
ggplot(heightweight,
aes(x=ageYear,
y=heightIn))+
geom_point()+
stat_smooth(method=loess,se=F)
Group by Sex
ggplot(heightweight,
aes(x=ageYear,
y=heightIn,
color=sex))+
geom_point()+
stat_smooth(method=loess,se=F)