Loading packages
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
library(ggExtra)
## Warning: package 'ggExtra' was built under R version 4.0.2
library(gridExtra)
Reading data
df = read.csv("~/Dropbox/_Conferences and Workshops/TDTU and others/TDTU 2019/Datasets/obesity data.csv")
df$ob[df$bmi < 18.5] = "Underweight"
df$ob[df$bmi >= 18.5 & df$bmi < 25.0] = "Normal"
df$ob[df$bmi >= 25.0 & df$bmi < 30.0] = "Overweight"
df$ob[df$bmi >= 30.0] = "Obese"
df$ob = factor(df$ob, levels=c("Underweight", "Normal", "Overweight", "Obese"))
Histogram
ggplot(data=df, aes(x=pcfat)) + geom_histogram(col="white", fill="blue") + labs(x="Percent body fat", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data=df, aes(x=pcfat)) + geom_histogram(aes(y = ..density..), col="white", fill="blue") + geom_density(alpha=0.3) + labs(x="Percent body fat", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram by group
p1 = ggplot(data=df, aes(x=pcfat, fill=gender)) + geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') + labs(x="Percent body fat", y="Density")
p2 = ggplot(df) + geom_density(aes(x=pcfat, colour=gender, fill=gender), alpha=0.3) + labs(x="Percent body fat", y="Density")
grid.arrange(p1, p2, nrow=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Ridges
library(ggridges)
ggplot(df, aes(x = pcfat, y = ob, fill = gender)) + geom_density_ridges() + theme_ridges() + theme(legend.position = "none") + labs(x="Percent body fat")
## Picking joint bandwidth of 1.59

Barplot
library(ggthemes)
Name = c("Bich Phuong", "Dieu Hoa", "Kieu Anh", "Thu Thuy", "Thien Nga", "Ngoc Khanh", "Thu Ngan", "Mai Phuong", "Thi Huyen", "Phuong Thuy", "Thuy Dung", "Ngoc Han", "Thu Thao", "Ky Duyen", "Mỹ Linh")
YoB = c(1971, 1969, 1976, 1976, 1976, 1976, 1980, 1985, 1985, 1988, 1990, 1989, 1991, 1996, 1996)
Height = c(157, 158, 174, 172, 170, 172, 169, 169, 172, 181, 182, 173, 173, 173, 171)
Weight = c(50, NA, NA, NA, NA, 50, 49, 49, 52, 60, 61.5, 55, 49, 49, NA)
hh = data.frame(Name, YoB, Height, Weight)
p = ggplot(data=hh, aes(x=Name, y=Height, fill=Name))
p = p + geom_bar(stat="identity", width=0.8)
p = p + theme(axis.text.x=element_text(angle=0, color="black"), axis.text.y=element_text(size=15, color="black"))
p = p + coord_flip() + xlab(" ")
p = p + geom_text(aes(label=Height), hjust=1, size=3, color="white")
p + theme_economist() + theme(legend.position="none")

# Bar plot from summary data
Age = c(rep("65+", 5), rep("0-14", 5), rep("15-64", 5))
Num = c(4.7, 4.8, 5.8, 6.4, 7.7, 42.6, 38.3, 33.1, 24.5, 23.1, 52.7, 56.9, 61.1, 69.1, 69.2)
Year = c(1979, 1989, 1999, 2009, 2019, 1979, 1989, 1999, 2009, 2019, 1979, 1989, 1999, 2009, 2019)
dat = data.frame(Age, Num, Year)
dat$Age = factor(dat$Age, levels=c("65+", "15-64", "0-14"))
# Plotting data with values using geom_text
p = ggplot(data=dat, aes(x=Year, y=Num, fill=Age, label=Num))
p = p + geom_bar(stat="identity")
p + geom_text(size=3, position=position_stack(vjust=0.5)) + ylab("Percent") + theme(legend.position="top")

Box plot
ggplot(data=df, aes(y=pcfat, x=gender, fill=gender)) + geom_boxplot()

ggplot(data=df, aes(y=pcfat, x=gender, col=gender)) + geom_boxplot() + geom_jitter(alpha=0.2)

Scatter plot
ggplot(data=df, aes(x=bmi, y=pcfat, col=gender)) + geom_point() + geom_smooth(method="lm", formula=y~x+I(x^2))

# Adding rugs
ggplot(data=df, aes(x=bmi, y=pcfat, col=gender)) + geom_point() + geom_smooth(method="lm", formula=y~x+I(x^2)) + geom_rug()
# Add marginal dist
library(ggExtra)
p = ggplot(data=df, aes(x=bmi, y=pcfat, col=gender)) + geom_point() + geom_smooth(method="lm", formula=y~x+I(x^2)) + geom_rug()
ggMarginal(p, groupColour = T, groupFill = T)

Kaplan - Meier plot
# Arrest data
ar = read.csv("~/Dropbox/_Conferences and Workshops/TDTU and others/TDTU 4-2022/Datasets/Arrest dataset.csv")
library(survminer)
## Loading required package: ggpubr
library(survival)
fit = survfit(Surv(week, arrest) ~ finance, data=ar)
ggsurvplot(fit,risk.table = TRUE, fun="event", conf.int = TRUE)
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
