This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
# Load dataset
data_pop <- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/11_SevCatOneNumNestedOneObsPerGroup.csv", header=T, sep=";")
data_pop[which(data_pop$value==-1),"value"] <- 1
colnames(data_pop) <- c("Continent", "Region", "Country", "Pop")
data_pop %>%
filter(!is.na(Pop)) %>%
arrange(Pop) %>%
tail(50) %>%
arrange(Continent, Pop) %>%
mutate(Country=factor(Country, Country)) %>%
mutate(Pop=Pop/1000000) %>%
ggplot( aes(x=Country, y=Pop, color=Continent) ) +
geom_segment( aes(x=Country ,xend=Country, y=0, yend=Pop), color="grey") +
geom_point(size=3) +
scale_color_viridis(discrete=TRUE) +
coord_flip() +
theme_ipsum() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("Population (M)")
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning: package 'treemap' was built under R version 3.5.3
## Warning in txtGrbW > inchesW: longer object length is not a multiple of
## shorter object length
## Warning in mapply(txt, txtGrb$gp$cex, nlines, FUN = function(x, y, z,
## fontface) {: longer argument not a multiple of length of shorter
## Warning in mapply(txt, txtGrb$gp$cex, nlines, FUN = function(x, y, z,
## fontface) {: longer argument not a multiple of length of shorter
## Warning in mapply(txt, txtGrb$gp$cex, FUN = function(x, y, fontface) {:
## longer argument not a multiple of length of shorter
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
# Load dataset from github
data <- read.table("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv", header=T, sep=",")
View(data)
# Counts the number of value per group and subgroup
countsi = data %>%
group_by(class, sex) %>%
summarize(
n=n(),
median=median(age)
)
countsi
## # A tibble: 6 x 4
## # Groups: class [3]
## class sex n median
## <fct> <fct> <int> <dbl>
## 1 First female 94 NA
## 2 First male 122 NA
## 3 Second female 76 NA
## 4 Second male 108 NA
## 5 Third female 144 NA
## 6 Third male 347 NA
# Grouped
data %>%
mutate(day = fct_reorder(class, age)) %>%
mutate(day = factor(class, levels=c("First", "Second", "Third"))) %>%
ggplot(aes(fill=sex, y=age, x=class)) +
geom_boxplot(position=position_dodge2(preserve = "total"), alpha=0.5, outlier.colour="transparent", varwidth = TRUE) +
geom_point(color="grey", size=1, width=0.1, position=position_jitterdodge() , alpha=0.4) +
scale_fill_viridis(discrete=T, name="") +
geom_text(data=countsi, aes(label=paste0("n: ",n), y=median-2), position=position_dodge(1), hjust=0.5) +
theme_ipsum() +
xlab("") +
ylab("age (th)") +
ylim(0,35)
## Warning: Can't preserve total widths when varwidth = TRUE.
## Warning: Ignoring unknown parameters: width
## Warning: Removed 394 rows containing non-finite values (stat_boxplot).
## Warning: Removed 394 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_text).
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database