R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

# Load dataset
data_pop <- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/11_SevCatOneNumNestedOneObsPerGroup.csv", header=T, sep=";")
data_pop[which(data_pop$value==-1),"value"] <- 1
colnames(data_pop) <- c("Continent", "Region", "Country", "Pop")

data_pop %>%
  filter(!is.na(Pop)) %>%
  arrange(Pop) %>%
  tail(50) %>%
  arrange(Continent, Pop) %>%
  mutate(Country=factor(Country, Country)) %>%
  mutate(Pop=Pop/1000000) %>%
  ggplot( aes(x=Country, y=Pop, color=Continent) ) +
  geom_segment( aes(x=Country ,xend=Country, y=0, yend=Pop), color="grey") +
  geom_point(size=3) +
  scale_color_viridis(discrete=TRUE) +
  coord_flip() +
  theme_ipsum() +
  theme(
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position="none"
  ) +
  xlab("") +
  ylab("Population (M)")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning: package 'treemap' was built under R version 3.5.3
## Warning in txtGrbW > inchesW: longer object length is not a multiple of
## shorter object length
## Warning in mapply(txt, txtGrb$gp$cex, nlines, FUN = function(x, y, z,
## fontface) {: longer argument not a multiple of length of shorter

## Warning in mapply(txt, txtGrb$gp$cex, nlines, FUN = function(x, y, z,
## fontface) {: longer argument not a multiple of length of shorter
## Warning in mapply(txt, txtGrb$gp$cex, FUN = function(x, y, fontface) {:
## longer argument not a multiple of length of shorter

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

# Load dataset from github
data <- read.table("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv", header=T, sep=",") 
View(data)

# Counts the number of value per group and subgroup
countsi = data %>% 
  group_by(class, sex) %>% 
  summarize(
    n=n(),
    median=median(age)
  )

countsi
## # A tibble: 6 x 4
## # Groups:   class [3]
##   class  sex        n median
##   <fct>  <fct>  <int>  <dbl>
## 1 First  female    94     NA
## 2 First  male     122     NA
## 3 Second female    76     NA
## 4 Second male     108     NA
## 5 Third  female   144     NA
## 6 Third  male     347     NA
# Grouped
data %>%
  mutate(day = fct_reorder(class, age)) %>%
  mutate(day = factor(class, levels=c("First", "Second", "Third"))) %>%
  ggplot(aes(fill=sex, y=age, x=class)) + 
  geom_boxplot(position=position_dodge2(preserve = "total"), alpha=0.5, outlier.colour="transparent", varwidth = TRUE) +
  geom_point(color="grey", size=1, width=0.1, position=position_jitterdodge() , alpha=0.4) +
  scale_fill_viridis(discrete=T, name="") +
  geom_text(data=countsi, aes(label=paste0("n: ",n), y=median-2), position=position_dodge(1), hjust=0.5) +
  theme_ipsum()  +
  xlab("") +
  ylab("age (th)") +
  ylim(0,35)
## Warning: Can't preserve total widths when varwidth = TRUE.
## Warning: Ignoring unknown parameters: width
## Warning: Removed 394 rows containing non-finite values (stat_boxplot).
## Warning: Removed 394 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_text).
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database