The data

We will start with a preload data set called mtcars

RStudio Community is a great place to get help:
https://community.rstudio.com/c/tidyverse.
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
manufacturer model displ year cyl trans drv cty hwy fl class
mercury mountaineer 4wd 4.0 2008 6 auto(l5) 4 13 19 r suv
volkswagen gti 2.0 1999 4 auto(l4) f 19 26 r compact
dodge ram 1500 pickup 4wd 4.7 2008 8 manual(m6) 4 9 12 e pickup
dodge durango 4wd 4.7 2008 8 auto(l5) 4 13 17 r suv
volkswagen new beetle 1.9 1999 4 manual(m5) f 35 44 d subcompact
dodge caravan 2wd 4.0 2008 6 auto(l6) f 16 23 r minivan
volkswagen new beetle 2.5 2008 5 manual(m5) f 20 28 r subcompact
volkswagen new beetle 2.5 2008 5 auto(s6) f 20 29 r subcompact
ford explorer 4wd 4.0 1999 6 auto(l5) 4 14 17 r suv
audi a4 quattro 1.8 1999 4 auto(l5) 4 16 25 p compact

Simple plot

Boxplots

  • Box plot group by year as numbers
ggplot(mpg) + geom_boxplot(mapping=aes(group=year,y=displ,
                                       fill=year))

  • Box plot group by year as factors
ggplot(mpg) + geom_boxplot(mapping=aes(group=year,y=displ,
                                       fill=factor(year)))

ggplot(mpg, aes(drv))+geom_bar(color=c("red","blue","green"))  + facet_grid(rows=vars(factor(cyl)),cols=vars(year))
Error: Aesthetics must be either length 1 or the same as the data (16): colour

ggplot(mpg) + geom_point(mapping=aes(x=hwy,y=cty,size=displ,color=factor(cyl))) + facet_grid(rows=vars(drv),cols=vars(year))

Plotting distribution

Simple plots

  • Histogram
  • Histogram of densities
ggplot(mpg,aes(x=hwy,y=..density..)) + geom_histogram(col="#FF000099", fill="#CCCC0099",bins=16)

  • Density plot
ggplot(mpg,aes(x=hwy)) + geom_density(col="#FF000099", fill="#99000099")

require(gridExtra) # also loads grid
 require(lattice)
 x <- seq(pi/4, 5 * pi, length.out = 100)
 y <- seq(pi/4, 5 * pi, length.out = 100)
 r <- as.vector(sqrt(outer(x^2, y^2, "+")))

 grid <- expand.grid(x=x, y=y)
 grid$z <- cos(r^2) * exp(-r/(pi^3))
 plot1 <- levelplot(z~x*y, grid, cuts = 50, scales=list(log="e"), xlab="",
           ylab="", main="Weird Function", sub="with log scales",
           colorkey = FALSE, region = TRUE)

 plot2 <- levelplot(z~x*y, grid, cuts = 50, scales=list(log="e"), xlab="",
           ylab="", main="Weird Function", sub="with log scales",
           colorkey = FALSE, region = TRUE)
 grid.arrange(plot1,plot2, ncol=2)

require(gridExtra) # also loads grid
require(lattice)
x <- seq(pi/4, 5 * pi, length.out = 100)
y <- seq(pi/4, 5 * pi, length.out = 100)
r <- as.vector(sqrt(outer(x^2, y^2, "+")))

grid <- expand.grid(x=x, y=y)


s = 10000
pop = 1:s
h = rnorm(s,50,10)

plist = ggplot()
est = rep(0,7)
sizes = c("005","010","020","040","080","160","320")
ss = c(5,10,20,40,80,160,320)

p <- ggplot(ylim=c(0,8))

for (i in 1:7) {
  meanest = rep(0,20)
  
  for(j in 1:20) {
    d = density(h[sample(pop,ss[i])])
    meanest[j] = mean(sum(d$x * d$y) / sum(d$y))
    p <- p + geom_line(mapping=aes(x= d$x,
          y = (d$y + (j/10.0)), color=factor(j)),
           alpha=0.8,color=i)
  }

  

  if (i > 1)
  {
    est = cbind(est,meanest)
  }
  else
  {
    est = meanest
  }
}

p + scale_color_hue() + labs(color="Sample size") + 
    ggtitle(paste("Distribution of Samples (sized",i,")")) +
            xlab("values") + ylab("density")

colnames(est) = sizes
ggplot(mpg,aes(x=hwy)) + geom_density(col="#FF000099", fill="#99000099")

h = rnorm(160,50,10)

ggplot()+
  geom_line(mapping=aes(x=h[1:5],y=..density..,color=5),stat="density",lwd=3.5)+
  geom_line(mapping=aes(x=h[1:10],y=..density..,color=10),stat="density",lwd=3) +
  geom_line(mapping=aes(x=h[1:20],y=..density..,color=20),stat="density",lwd=2.5) +
  geom_line(mapping=aes(x=h[1:40],y=..density..,color=40),stat="density",lwd=2) +
  labs(color="Sample size")

Comparison of distributions

  • Parallel plots
ggplot(mpg,aes(x=hwy)) + geom_density(col="#FF000099", fill="#99000099") + facet_grid(rows=vars(year))

  • Superimposed density curves
ggplot(mpg,aes(x=hwy,group=year)) + geom_density(mapping=aes(color=factor(year)),size=1.2)

  • Density curve over a histogram
ggplot(mpg,aes(x=hwy, y=..density..)) + 
geom_histogram(color="red", fill="orange", bins=16) +
  geom_line(stat='density', color="red", lwd=1.2, lty=1, adjust=.4)

  • Comparison of superimposed density plots for different years
ggplot(mpg) + geom_density(mapping =aes(x=hwy), col="#FF000099", fill="#99000099") + geom_density(mapping =aes(x=cty), col="#0000FF99", fill="#0000FF99") + facet_grid(rows=vars(year))

** Mapping


ggplot(nz,aes(long,lat,group=group)) + geom_polygon(fill="red", color ="black")+coord_quickmap()

library(sp)
library(GADMTools)
library(ggplot2)
MAP <- gadm_sf.loadCountries("THA", level = 0)
plotmap(MAP)

# thai = GADMTools::subset(MAP, level=0, regions=c("THA"))

THA2 <- gadm_getBackground(MAP, "THA", "osm")
plotmap(THA2,title='Thailand')

THA = gadm_sf_loadCountries("THA", level=1, basefile="./")
plotmap(THA)

listNames(THA,level=1)
 [1] "Amnat Charoen"           
 [2] "Ang Thong"               
 [3] "Bangkok Metropolis"      
 [4] "Bueng Kan"               
 [5] "Buri Ram"                
 [6] "Chachoengsao"            
 [7] "Chai Nat"                
 [8] "Chaiyaphum"              
 [9] "Chanthaburi"             
[10] "Chiang Mai"              
[11] "Chiang Rai"              
[12] "Chon Buri"               
[13] "Chumphon"                
[14] "Kalasin"                 
[15] "Kamphaeng Phet"          
[16] "Kanchanaburi"            
[17] "Khon Kaen"               
[18] "Krabi"                   
[19] "Lampang"                 
[20] "Lamphun"                 
[21] "Loei"                    
[22] "Lop Buri"                
[23] "Mae Hong Son"            
[24] "Maha Sarakham"           
[25] "Mukdahan"                
[26] "Nakhon Nayok"            
[27] "Nakhon Pathom"           
[28] "Nakhon Phanom"           
[29] "Nakhon Ratchasima"       
[30] "Nakhon Sawan"            
[31] "Nakhon Si Thammarat"     
[32] "Nan"                     
[33] "Narathiwat"              
[34] "Nong Bua Lam Phu"        
[35] "Nong Khai"               
[36] "Nonthaburi"              
[37] "Pathum Thani"            
[38] "Pattani"                 
[39] "Phangnga"                
[40] "Phatthalung"             
[41] "Phayao"                  
[42] "Phetchabun"              
[43] "Phetchaburi"             
[44] "Phichit"                 
[45] "Phitsanulok"             
[46] "Phra Nakhon Si Ayutthaya"
[47] "Phrae"                   
[48] "Phuket"                  
[49] "Prachin Buri"            
[50] "Prachuap Khiri Khan"     
[51] "Ranong"                  
[52] "Ratchaburi"              
[53] "Rayong"                  
[54] "Roi Et"                  
[55] "Sa Kaeo"                 
[56] "Sakon Nakhon"            
[57] "Samut Prakan"            
[58] "Samut Sakhon"            
[59] "Samut Songkhram"         
[60] "Saraburi"                
[61] "Satun"                   
[62] "Si Sa Ket"               
[63] "Sing Buri"               
[64] "Songkhla"                
[65] "Sukhothai"               
[66] "Suphan Buri"             
[67] "Surat Thani"             
[68] "Surin"                   
[69] "Tak"                     
[70] "Trang"                   
[71] "Trat"                    
[72] "Ubon Ratchathani"        
[73] "Udon Thani"              
[74] "Uthai Thani"             
[75] "Uttaradit"               
[76] "Yala"                    
[77] "Yasothon"                
lanna = gadm_subset(THA, 1, c("Chiang Rai","Chiang Mai","Mae Hong Son","Lamphun","Lampang"))
THA2 <- gadm_getBackground(lanna, "THA", "osm")
plotmap(THA2)

longitude <- runif(6, min=98.0, max = 100.5)
latitude <- runif(6, min=18.0, max = 20.5)
Cases <- runif(6, 25, 112)
DAT <- data.frame(longitude, latitude, Cases)
dots(lanna, DAT, color="red", size = 8, value = "Cases")

propDots(lanna, data = DAT, value="Cases",
         breaks=c(0, 12.5, 25, 50, 100), range = c(0, 100))


regions =  c("Chiang Rai","Chiang Mai","Mae Hong Son","Lamphun","Lampang")
values  = c(20,100,5,18,31)
DAT2 = data.frame(regions,values)
choropleth(lanna,DAT2,adm.join="regions",
            value = "values",
           breaks = "sd",
           palette="Oranges",
           legend = "Population",
           title="Population of Lanna Tai")
n same as number of different finite values\neach different finite value is a separate classColumn `NAME_1` joining character vector and factor, coercing into character vector


library(ggpubr)
Loading required package: magrittr
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
# Box plot
p <- ggplot(ToothGrowth, aes(x = dose, y = len)) + 
  geom_boxplot(aes(fill = supp), position = position_dodge(0.9)) +
  scale_fill_manual(values = c("#00AFBB", "#E7B800"))
p



library(ggplot2)
library("ggpubr")
theme_set(
  theme_bw() +
    theme(legend.position = "top")
  )


# Load data and convert dose to a factor variable
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
# Box plot
p <- ggplot(ToothGrowth, aes(x = dose, y = len)) + 
  geom_boxplot(aes(fill = supp), position = position_dodge(0.9)) +
  scale_fill_manual(values = c("#00AFBB", "#E7B800"))
p



# Split in vertical direction
p + facet_grid(rows = vars(supp))


# Split in horizontal direction
p + facet_grid(cols = vars(supp))


p + facet_grid(rows = vars(dose), cols = vars(supp))


p + facet_wrap(vars(dose))


p + facet_wrap(vars(dose), ncol=2)


# 1. Create a box plot (bp)
p <- ggplot(ToothGrowth, aes(x = dose, y = len))
bxp <- p + geom_boxplot(aes(color = dose)) +
  scale_color_manual(values = c("red","green","blue"))

# 2. Create a dot plot (dp)
dp <- p + geom_dotplot(aes(color = dose, fill = dose), 
                       binaxis='y', stackdir='center') +
  scale_color_manual(values = c("red","green","blue")) + 
  scale_fill_manual(values = c("red","green","blue"))

# 3. Create a line plot
lp <- ggplot(economics, aes(x = date, y = psavert)) + 
  geom_line(color = "#E46726") 


figure <- ggarrange(bxp, dp, lp,
                    labels = c("A", "B", "C"),
                    ncol = 2, nrow = 2)
`stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
figure


ggarrange(
  lp,                # First row with line plot
  # Second row with box and dot plots
  ggarrange(bxp, dp, ncol = 2, labels = c("B", "C")), 
  nrow = 2, 
  labels = "A"       # Label of the line plot
  ) 
`stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

h = rnorm(1000000,50,10)
sizes = c(5,10,20,40,80,160,320)
avglist = ""
for (s in sizes)
{
  dlist = ""
  
  for (i in 1:20)
  {
    d1 = density(h[sample(h,s)],adjust=4)
    
    d2 = cbind(d1$x,d1$y,rep(s,length(d1$x)),rep(i,length(d1$x)))
    a1 = c(s,mean(d1$x * d1$y))
    
    if (length(avglist) < 2)
    {
      avglist = a1
    }  
    else
    {
      avglist = rbind(avglist,a1)
    }
        
    if (length(dlist) < 4)
    {
      dlist = d2
    }  
    else
    {
      dlist = rbind(dlist,d2)
    }
  }

dlist = as.data.frame(dlist)
colnames(dlist) = c("x","y","s","g")
if (s == 5)
{
 grA = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1.5) + ylab("Density") + ylim(0,0.1) + ggtitle("(n = 5)")
}
else if (s==10)
{
  grB = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1.5) + ylab("Density") + ylim(0,0.1) + ggtitle("(n = 10)")
}
else if (s==20)
{
  grC = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1) + ylab("Density") + ylim(0,0.07) + ggtitle("(n = 20)")
}
else if (s==40)
{
  grD = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1) + ylab("Density") + ylim(0,0.07) + ggtitle("(n = 40)")
}
else if (s==80)
{
  grE = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1) + ylab("Density") + ylim(0,0.06) + ggtitle("(n = 80)")
}
else if (s==160)
{
  grF = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1) + ylab("Density") + ylim(0,0.06) + ggtitle("(n = 160)")
}
else if (s==320)
{
  grG = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1) + ylab("Density") +  ggtitle("(n = 320)")
}

}

d1 = density(h[sample(h,s)],adjust=4)
    
d2 = cbind(d1$x,d1$y,rep(s,length(d1$x)),rep(i,length(d1$x)))
a1 = c(1000000,mean(d1$x * d1$y))
avglist = rbind(avglist,a1)

grH = ggplot() + geom_line(mapping=aes(x=d1$x,y=d1$y),color="red",lwd=1) + ylab("Density") + xlab("x") + ggtitle("(n = 1,000,000)")


figure <- ggarrange(grA,grB, grC, grD, grE, grF, grG,grH,
      labels = c("A","B","C","D","E","F","G"),
      ncol = 2, nrow = 4)
figure


  
colnames(avglist) = c("siz","mean")
avglist = as.data.frame(avglist)

grA = ggplot(avglist,aes(x=factor(siz),y=mean)) + 
  geom_boxplot(mapping=aes(group=siz),color="red")+
  xlab("Sample size") + ylab("Calculated Mean")

grB = ggplot(avglist,aes(x=factor(siz),y=mean)) + 
  geom_violin(mapping=aes(group=siz),color="blue",fill="blue") +
  xlab("Sample size") + ylab("Calculated Mean")

grC = ggplot(avglist,aes(x=factor(siz),y=mean)) + 
  geom_boxplot(mapping=aes(group=siz),color="red")+
  geom_violin(mapping=aes(group=siz),color="#000099",fill="#00009999") +
  xlab("Sample size") + ylab("Calculated Mean")

figure <- ggarrange(grA,grB, grC,
      labels = c("A","B","C"),
      ncol = 1, nrow = 3)
figure

---
title: "Data Visualization"
output: html_notebook
fig.width: 7
fig.height: 3
---


# The data

We will start with a preload data set called `mtcars`


```{r,echo=FALSE,fig.cap="10 Random entries from the mtcars data framework"}
library(knitr)
library(ggplot2)
knitr::kable(head(mpg[sample(234),],10),caption ="10 Random entries from mpg")
```

# Simple plot

## Boxplots

* Box plot group by year as numbers

```{r}
ggplot(mpg) + geom_boxplot(mapping=aes(group=year,y=displ,
                                       fill=year))
```

* Box plot group by year as factors

```{r}
ggplot(mpg) + geom_boxplot(mapping=aes(group=year,y=displ,
                                       fill=factor(year)))
```





```{r}
ggplot(mpg, aes(drv))+geom_bar(color=c("red","blue","green"))  + facet_grid(rows=vars(factor(cyl)),cols=vars(year))

```

```{r}
ggplot(mpg) + geom_point(mapping=aes(x=hwy,y=cty,size=displ,color=factor(cyl))) + facet_grid(rows=vars(drv),cols=vars(year))
```

## Plotting distribution

### Simple plots



* Histogram
```{r,fig.width=7,fig.height=3,fig.cap="Simple Histogram"}
ggplot(mpg,aes(x=hwy)) + geom_histogram(col="#FF000099", fill="#99000099",bins=16)
```

* Histogram of densities
```{r,fig.width=7,fig.height=3,fig.cap="Simple Histogram"}
ggplot(mpg,aes(x=hwy,y=..density..)) + geom_histogram(col="#FF000099", fill="#CCCC0099",bins=16)
```




* Density plot

```{r}
ggplot(mpg,aes(x=hwy)) + geom_density(col="#FF000099", fill="#99000099")
```

```{r}
require(gridExtra) # also loads grid
 require(lattice)
 x <- seq(pi/4, 5 * pi, length.out = 100)
 y <- seq(pi/4, 5 * pi, length.out = 100)
 r <- as.vector(sqrt(outer(x^2, y^2, "+")))

 grid <- expand.grid(x=x, y=y)
 grid$z <- cos(r^2) * exp(-r/(pi^3))
 plot1 <- levelplot(z~x*y, grid, cuts = 50, scales=list(log="e"), xlab="",
           ylab="", main="Weird Function", sub="with log scales",
           colorkey = FALSE, region = TRUE)

 plot2 <- levelplot(z~x*y, grid, cuts = 50, scales=list(log="e"), xlab="",
           ylab="", main="Weird Function", sub="with log scales",
           colorkey = FALSE, region = TRUE)
 grid.arrange(plot1,plot2, ncol=2)
```


```{r,fig.height=7,fig.width=7,fig.cap="Effect of sample size"}
require(gridExtra) # also loads grid
require(lattice)
x <- seq(pi/4, 5 * pi, length.out = 100)
y <- seq(pi/4, 5 * pi, length.out = 100)
r <- as.vector(sqrt(outer(x^2, y^2, "+")))

grid <- expand.grid(x=x, y=y)


s = 10000
pop = 1:s
h = rnorm(s,50,10)

plist = ggplot()
est = rep(0,7)
sizes = c("005","010","020","040","080","160","320")
ss = c(5,10,20,40,80,160,320)

p <- ggplot(ylim=c(0,8))

for (i in 1:7) {
  meanest = rep(0,20)
  
  for(j in 1:20) {
    d = density(h[sample(pop,ss[i])])
    meanest[j] = mean(sum(d$x * d$y) / sum(d$y))
    p <- p + geom_line(mapping=aes(x= d$x,
          y = (d$y + (j/10.0)), color=factor(j)),
           alpha=0.8,color=i)
  }

  

  if (i > 1)
  {
    est = cbind(est,meanest)
  }
  else
  {
    est = meanest
  }
}

p + scale_color_hue() + labs(color="Sample size") + 
    ggtitle(paste("Distribution of Samples (sized",i,")")) +
            xlab("values") + ylab("density")

colnames(est) = sizes

```

```{r}
ggplot(mpg,aes(x=hwy)) + geom_density(col="#FF000099", fill="#99000099")
```

```{r,fig.height=3,fig.width=7}
h = rnorm(160,50,10)

ggplot()+
  geom_line(mapping=aes(x=h[1:5],y=..density..,color=5),stat="density",lwd=3.5)+
  geom_line(mapping=aes(x=h[1:10],y=..density..,color=10),stat="density",lwd=3) +
  geom_line(mapping=aes(x=h[1:20],y=..density..,color=20),stat="density",lwd=2.5) +
  geom_line(mapping=aes(x=h[1:40],y=..density..,color=40),stat="density",lwd=2) +
  labs(color="Sample size")

```




### Comparison of distributions

* Parallel plots

```{r}
ggplot(mpg,aes(x=hwy)) + geom_density(col="#FF000099", fill="#99000099") + facet_grid(rows=vars(year))
```

* Superimposed density curves

```{r,fig.cap="Comparison of MPG by Year",fig.width=7,fig.height=3}
ggplot(mpg,aes(x=hwy,group=year)) +
  geom_density(mapping=aes(color=factor(year)),size=1.2)
```

* Density curve over a histogram

```{r,fig.cap="Comparison of Density to Histogram",fig.width=7,fig.height=3}
ggplot(mpg,aes(x=hwy, y=..density..)) + 
geom_histogram(color="red", fill="orange", bins=16) +
  geom_line(stat='density', color="red", lwd=1.2, lty=1, adjust=.4) +
```

* Comparison of superimposed density plots for different years


```{r,fig.cap="Density of Cty and Hwy MPG by year",fig.width=7,fig.height=3}
ggplot(mpg) + geom_density(mapping =aes(x=hwy), col="#FF000099", fill="#99000099") + geom_density(mapping =aes(x=cty), col="#0000FF99", fill="#0000FF99") + facet_grid(rows=vars(year))
```
** Mapping

```{r}

ggplot(nz,aes(long,lat,group=group)) + geom_polygon(fill="red", color ="black")+coord_quickmap()
```

```{r}
library(sp)
library(GADMTools)
library(ggplot2)
MAP <- gadm_sf.loadCountries("THA", level = 0)
plotmap(MAP)
# thai = GADMTools::subset(MAP, level=0, regions=c("THA"))

THA2 <- gadm_getBackground(MAP, "THA", "osm")
plotmap(THA2,title='Thailand')
THA = gadm_sf_loadCountries("THA", level=1, basefile="./")
plotmap(THA)
listNames(THA,level=1)
lanna = gadm_subset(THA, 1, c("Chiang Rai","Chiang Mai","Mae Hong Son","Lamphun","Lampang"))
plotmap(lanna)
longitude <- runif(6, min=98.0, max = 100.5)
latitude <- runif(6, min=18.0, max = 20.5)
Cases <- runif(6, 25, 112)
DAT <- data.frame(longitude, latitude, Cases)
dots(lanna, DAT, color="red", size = 8, value = "Cases")
propDots(lanna, data = DAT, value="Cases",
         breaks=c(0, 12.5, 25, 50, 100), range = c(0, 100))

regions =  c("Chiang Rai","Chiang Mai","Mae Hong Son","Lamphun","Lampang")
values  = c(20,100,5,18,31)
DAT2 = data.frame(regions,values)
choropleth(lanna,DAT2,adm.join="regions",
            value = "values",
           breaks = "sd",
           palette="Oranges",
           legend = "Population",
           title="Population of Lanna Tai")

```


```{r}

library(ggpubr)
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
# Box plot
p <- ggplot(ToothGrowth, aes(x = dose, y = len)) + 
  geom_boxplot(aes(fill = supp), position = position_dodge(0.9)) +
  scale_fill_manual(values = c("#00AFBB", "#E7B800"))
p


library(ggplot2)
library("ggpubr")
theme_set(
  theme_bw() +
    theme(legend.position = "top")
  )


# Load data and convert dose to a factor variable
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
# Box plot
p <- ggplot(ToothGrowth, aes(x = dose, y = len)) + 
  geom_boxplot(aes(fill = supp), position = position_dodge(0.9)) +
  scale_fill_manual(values = c("#00AFBB", "#E7B800"))
p


# Split in vertical direction
p + facet_grid(rows = vars(supp))

# Split in horizontal direction
p + facet_grid(cols = vars(supp))

p + facet_grid(rows = vars(dose), cols = vars(supp))

p + facet_wrap(vars(dose))

p + facet_wrap(vars(dose), ncol=2)

# 1. Create a box plot (bp)
p <- ggplot(ToothGrowth, aes(x = dose, y = len))
bxp <- p + geom_boxplot(aes(color = dose)) +
  scale_color_manual(values = c("red","green","blue"))

# 2. Create a dot plot (dp)
dp <- p + geom_dotplot(aes(color = dose, fill = dose), 
                       binaxis='y', stackdir='center') +
  scale_color_manual(values = c("red","green","blue")) + 
  scale_fill_manual(values = c("red","green","blue"))

# 3. Create a line plot
lp <- ggplot(economics, aes(x = date, y = psavert)) + 
  geom_line(color = "#E46726") 


figure <- ggarrange(bxp, dp, lp,
                    labels = c("A", "B", "C"),
                    ncol = 2, nrow = 2)
figure

ggarrange(
  lp,                # First row with line plot
  # Second row with box and dot plots
  ggarrange(bxp, dp, ncol = 2, labels = c("B", "C")), 
  nrow = 2, 
  labels = "A"       # Label of the line plot
  ) 
```

```{r,fig.width=7, fig.height=7}
h = rnorm(1000000,50,10)
sizes = c(5,10,20,40,80,160,320)
avglist = ""
for (s in sizes)
{
  dlist = ""
  
  for (i in 1:20)
  {
    d1 = density(h[sample(h,s)],adjust=4)
    
    d2 = cbind(d1$x,d1$y,rep(s,length(d1$x)),rep(i,length(d1$x)))
    a1 = c(s,mean(d1$x * d1$y))
    
    if (length(avglist) < 2)
    {
      avglist = a1
    }  
    else
    {
      avglist = rbind(avglist,a1)
    }
        
    if (length(dlist) < 4)
    {
      dlist = d2
    }  
    else
    {
      dlist = rbind(dlist,d2)
    }
  }

dlist = as.data.frame(dlist)
colnames(dlist) = c("x","y","s","g")
if (s == 5)
{
 grA = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1.5) + ylab("Density") + ylim(0,0.1) + ggtitle("(n = 5)")
}
else if (s==10)
{
  grB = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1.5) + ylab("Density") + ylim(0,0.1) + ggtitle("(n = 10)")
}
else if (s==20)
{
  grC = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1) + ylab("Density") + ylim(0,0.07) + ggtitle("(n = 20)")
}
else if (s==40)
{
  grD = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1) + ylab("Density") + ylim(0,0.07) + ggtitle("(n = 40)")
}
else if (s==80)
{
  grE = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1) + ylab("Density") + ylim(0,0.06) + ggtitle("(n = 80)")
}
else if (s==160)
{
  grF = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1) + ylab("Density") + ylim(0,0.06) + ggtitle("(n = 160)")
}
else if (s==320)
{
  grG = ggplot(dlist, aes(x=x,y=y)) + geom_line(mapping=aes(group=g),color="#FF000033",lwd=1) + ylab("Density") +  ggtitle("(n = 320)")
}

}

d1 = density(h[sample(h,s)],adjust=4)
    
d2 = cbind(d1$x,d1$y,rep(s,length(d1$x)),rep(i,length(d1$x)))
a1 = c(1000000,mean(d1$x * d1$y))
avglist = rbind(avglist,a1)

grH = ggplot() + geom_line(mapping=aes(x=d1$x,y=d1$y),color="red",lwd=1) + ylab("Density") + xlab("x") + ggtitle("(n = 1,000,000)")


figure <- ggarrange(grA,grB, grC, grD, grE, grF, grG,grH,
      labels = c("A","B","C","D","E","F","G"),
      ncol = 2, nrow = 4)
figure

```

```{r,fig.width=7,fig.height=7}

  
colnames(avglist) = c("siz","mean")
avglist = as.data.frame(avglist)

grA = ggplot(avglist,aes(x=factor(siz),y=mean)) + 
  geom_boxplot(mapping=aes(group=siz),color="red")+
  xlab("Sample size") + ylab("Calculated Mean")

grB = ggplot(avglist,aes(x=factor(siz),y=mean)) + 
  geom_violin(mapping=aes(group=siz),color="blue",fill="blue") +
  xlab("Sample size") + ylab("Calculated Mean")

grC = ggplot(avglist,aes(x=factor(siz),y=mean)) + 
  geom_boxplot(mapping=aes(group=siz),color="red")+
  geom_violin(mapping=aes(group=siz),color="#000099",fill="#00009999") +
  xlab("Sample size") + ylab("Calculated Mean")

figure <- ggarrange(grA,grB, grC,
      labels = c("A","B","C"),
      ncol = 1, nrow = 3)
figure

```

