Loading packages

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
library(ggExtra)
## Warning: package 'ggExtra' was built under R version 4.0.2
library(gridExtra)

Reading data

df = read.csv("~/Dropbox/_Conferences and Workshops/TDTU and others/TDTU 2019/Datasets/obesity data.csv")
df$ob[df$bmi < 18.5] = "Underweight"
df$ob[df$bmi >= 18.5 & df$bmi < 25.0] = "Normal"
df$ob[df$bmi >= 25.0 & df$bmi < 30.0] = "Overweight"
df$ob[df$bmi >= 30.0] = "Obese"
df$ob = factor(df$ob, levels=c("Underweight", "Normal", "Overweight", "Obese"))

Histogram

ggplot(data=df, aes(x=pcfat)) + geom_histogram(col="white", fill="blue") + labs(x="Percent body fat", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=df, aes(x=pcfat)) + geom_histogram(aes(y = ..density..), col="white", fill="blue") + geom_density(alpha=0.3) + labs(x="Percent body fat", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram by group
p1 = ggplot(data=df, aes(x=pcfat, fill=gender)) + geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') + labs(x="Percent body fat", y="Density")

p2 = ggplot(df) + geom_density(aes(x=pcfat, colour=gender, fill=gender), alpha=0.3) + labs(x="Percent body fat", y="Density")

grid.arrange(p1, p2, nrow=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Ridges 
library(ggridges)

ggplot(df, aes(x = pcfat, y = ob, fill = gender)) + geom_density_ridges() + theme_ridges() + theme(legend.position = "none") + labs(x="Percent body fat")
## Picking joint bandwidth of 1.59

Barplot

library(ggthemes)
Name = c("Bich Phuong", "Dieu Hoa", "Kieu Anh", "Thu Thuy", "Thien Nga", "Ngoc Khanh", "Thu Ngan", "Mai Phuong", "Thi Huyen", "Phuong Thuy", "Thuy Dung", "Ngoc Han", "Thu Thao", "Ky Duyen", "Mỹ Linh")
YoB = c(1971, 1969, 1976, 1976, 1976, 1976, 1980, 1985, 1985, 1988, 1990, 1989, 1991, 1996, 1996)
Height = c(157, 158, 174, 172, 170, 172, 169, 169, 172, 181, 182, 173, 173, 173, 171)
Weight = c(50, NA, NA, NA, NA, 50, 49, 49, 52, 60, 61.5, 55, 49, 49, NA)
hh = data.frame(Name, YoB, Height, Weight)

p = ggplot(data=hh, aes(x=Name, y=Height, fill=Name)) 
p = p + geom_bar(stat="identity", width=0.8) 
p = p + theme(axis.text.x=element_text(angle=0, color="black"), axis.text.y=element_text(size=15, color="black"))
p = p + coord_flip() + xlab(" ")
p = p + geom_text(aes(label=Height), hjust=1, size=3, color="white")
p + theme_economist() + theme(legend.position="none")

# Bar plot from summary data 
Age = c(rep("65+", 5), rep("0-14", 5), rep("15-64", 5))
Num = c(4.7, 4.8, 5.8, 6.4, 7.7, 42.6, 38.3, 33.1, 24.5, 23.1, 52.7, 56.9, 61.1, 69.1, 69.2)
Year = c(1979, 1989, 1999, 2009, 2019, 1979, 1989, 1999, 2009, 2019, 1979, 1989, 1999, 2009, 2019) 
dat = data.frame(Age, Num, Year)
dat$Age = factor(dat$Age, levels=c("65+", "15-64", "0-14"))

# Plotting data with values using geom_text
p = ggplot(data=dat, aes(x=Year, y=Num, fill=Age, label=Num))
p = p + geom_bar(stat="identity")
p + geom_text(size=3, position=position_stack(vjust=0.5)) + ylab("Percent") + theme(legend.position="top")

Box plot

ggplot(data=df, aes(y=pcfat, x=gender, fill=gender)) + geom_boxplot()

ggplot(data=df, aes(y=pcfat, x=gender, col=gender)) + geom_boxplot() + geom_jitter(alpha=0.2)

Scatter plot

ggplot(data=df, aes(x=bmi, y=pcfat, col=gender)) + geom_point() + geom_smooth(method="lm", formula=y~x+I(x^2))

# Adding rugs 
ggplot(data=df, aes(x=bmi, y=pcfat, col=gender)) + geom_point() + geom_smooth(method="lm", formula=y~x+I(x^2)) + geom_rug()

# Add marginal dist
library(ggExtra) 

p = ggplot(data=df, aes(x=bmi, y=pcfat, col=gender)) + geom_point() + geom_smooth(method="lm", formula=y~x+I(x^2)) + geom_rug()

ggMarginal(p, groupColour = T, groupFill = T)

Kaplan - Meier plot

# Arrest data
ar = read.csv("~/Dropbox/_Conferences and Workshops/TDTU and others/TDTU 4-2022/Datasets/Arrest dataset.csv")

library(survminer)
## Loading required package: ggpubr
library(survival)
fit = survfit(Surv(week, arrest) ~ finance, data=ar)
ggsurvplot(fit,risk.table = TRUE, fun="event", conf.int = TRUE)
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.