More information could check: https://www.safaribooksonline.com/library/view/r-graphics-cookbook/9781449363086/ch04.html ### uspopage

library(gcookbook)
?uspopage
str(uspopage)
## 'data.frame':    824 obs. of  3 variables:
##  $ Year     : int  1900 1900 1900 1900 1900 1900 1900 1900 1901 1901 ...
##  $ AgeGroup : Factor w/ 8 levels "<5","5-14","15-24",..: 1 2 3 4 5 6 7 8 1 2 ...
##  $ Thousands: int  9181 16966 14951 12161 9273 6437 4026 3099 9336 17158 ...
head(uspopage,10)
##    Year AgeGroup Thousands
## 1  1900       <5      9181
## 2  1900     5-14     16966
## 3  1900    15-24     14951
## 4  1900    25-34     12161
## 5  1900    35-44      9273
## 6  1900    45-54      6437
## 7  1900    55-64      4026
## 8  1900      >64      3099
## 9  1901       <5      9336
## 10 1901     5-14     17158

Subset(dataframe,conditions),should use == in subset.

young<-subset(uspopage,AgeGroup=="<5")
head(young)
##    Year AgeGroup Thousands
## 1  1900       <5      9181
## 9  1901       <5      9336
## 17 1902       <5      9502
## 25 1903       <5      9645
## 33 1904       <5      9791
## 41 1905       <5      9944

ggplot(young,aes(x=Year,y=Thousands)) ####fill by color install ggplot2 library ggplot2

library(ggplot2)
ggplot(young,aes(x=Year,y=Thousands))+
  geom_area(fill="lightblue")+
  geom_line()

see the background. alpha=? for color

ggplot(young,aes(x=Year,y=Thousands))+
  geom_area(fill="lightblue",alpha=0.4)+
  geom_line()

add the ouline of the whole plot

ggplot(young,aes(x=Year,y=Thousands))+
  geom_area(fill="lightblue",alpha=0.4)+
  geom_line()+
  theme_bw()

Stacked graph

Seperate into different groups by AgeFroup the height of different area means the population of each group.No overlap check by alpha=0.2

library(gcookbook)
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup))+
  geom_area()

ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup))+
  geom_area(alpha=0.2)

The picture above could compare the amount of each group. To change the order and compare each group, we could use “plyr”

library(plyr) ##to change the order and compare each group ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup,order=desc(AgeGroup)))+ geom_area()

library(plyr)
ggplot(uspopage,aes(x=Year,y=Thousands,fill=AgeGroup,order=desc(AgeGroup)))+
  geom_area()

Make a proportional stacked area graph to fill the whole cubic by percentage changes. The advantage is to compare the growth of each group. To improve the plot to reorder the stack graph in ggplot2. To create new column for percentage by ddply() ddply(dataframe,column focus on years seperately,what to do,create new value)

head(uspopage)
##   Year AgeGroup Thousands
## 1 1900       <5      9181
## 2 1900     5-14     16966
## 3 1900    15-24     14951
## 4 1900    25-34     12161
## 5 1900    35-44      9273
## 6 1900    45-54      6437
library(gcookbook) # For the data set
library(plyr)      # For the ddply() function
data<-ddply(uspopage,"Year",transform,Percent=(Thousands/sum(Thousands))*100)
str(data)
## 'data.frame':    824 obs. of  4 variables:
##  $ Year     : int  1900 1900 1900 1900 1900 1900 1900 1900 1901 1901 ...
##  $ AgeGroup : Factor w/ 8 levels "<5","5-14","15-24",..: 1 2 3 4 5 6 7 8 1 2 ...
##  $ Thousands: int  9181 16966 14951 12161 9273 6437 4026 3099 9336 17158 ...
##  $ Percent  : num  12.1 22.3 19.6 16 12.2 ...
ggplot(data,aes(x=Year,y=Percent,fill=AgeGroup,order=desc(AgeGroup)))+
  geom_area()

Dataframe(heighweight)

ggplot(heightweight,aes(x=,y=))+geom_point()

library(gcookbook)
library(ggplot2)
?heightweight()
str(heightweight)
## 'data.frame':    236 obs. of  5 variables:
##  $ sex     : Factor w/ 2 levels "f","m": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ageYear : num  11.9 12.9 12.8 13.4 15.9 ...
##  $ ageMonth: int  143 155 153 161 191 171 185 142 160 140 ...
##  $ heightIn: num  56.3 62.3 63.3 59 62.5 62.5 59 56.5 62 53.8 ...
##  $ weightLb: num  85 105 108 92 112 ...
ggplot(heightweight,aes(x=ageYear,
                        y=heightIn))+
  geom_point()

attach(heightweight)
cor(ageYear,heightIn)
## [1] 0.6518749
ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn))+
  geom_point(shape=4,size=2)

ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn,
           color=sex))+
  geom_point()

We could divided weight. But the result is not clear. Normally we think darker color is heavier

ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn,
           color=weightLb))+
  geom_point()

However, the default = heavier =lighter. Have to turn it over

ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn,
           color=weightLb))+
  geom_point()+
  scale_color_gradient(low="lightblue",high="darkred")

Change the size of the dots

ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn,
           color=weightLb,
           size=weightLb))+
  geom_point()+
  scale_color_gradient(low="lightblue",high="darkblue")

If not differentiate dots by color, only by size. or print into white and black.

ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn,
           size=weightLb))+
  geom_point()

Problem: when value of height is double, the size of dot is not. Should add a layer to fix it.

ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn,
           size=weightLb,
           color=sex))+
  geom_point(alpha=0.5)+
  scale_size_area()

AS LINUR regression model by add a layer: +stat_smooth(method=lm)

ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn))+
  geom_point()+
  stat_smooth(method=lm)

Add confidence

ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn))+
  geom_point()+
  stat_smooth(method=lm,level=0.99)

No confidence

ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn))+
  geom_point()+
  stat_smooth(method=lm,se=F)

The line become smooth

ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn))+
  geom_point()+
  stat_smooth(method=loess,se=F)

Group by Sex

ggplot(heightweight,
       aes(x=ageYear,
           y=heightIn,
           color=sex))+
  geom_point()+
  stat_smooth(method=loess,se=F)