The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Loading the dataset.
gap <-read.csv("gapminderData5.csv")str(gap)
'data.frame': 1704 obs. of 6 variables:
$ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
$ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
$ pop : num 8425333 9240934 10267083 11537966 13079460 ...
$ continent: chr "Asia" "Asia" "Asia" "Asia" ...
$ lifeExp : num 28.8 30.3 32 34 36.1 ...
$ gdpPercap: num 779 821 853 836 740 ...
Filter for 2007 and not Oceania
gap07 <- gap |>filter(year ==2007& continent !="Oceania")
Making some scatterplots.
ggplot(gap07, aes(x = gdpPercap, y = lifeExp, col = continent)) +geom_point() +scale_x_log10("GDP per capita ($)") +scale_y_continuous("Life Expectancy (yrs)") +ggtitle("GapMinder Data 2007")
ggscatter(gap07, x ="gdpPercap", y ="lifeExp", col ="continent",xlab ="GDP per capita ($)", ylab ="Life expectancy (yrs)",main ="GapMinder Data 2007") +xscale("log10", .format =TRUE)
Adding labels to each/some points
ggscatter(gap07, x ="gdpPercap", y ="lifeExp", col ="continent",xlab ="GDP per capita ($)", ylab ="Life Expectancy (yrs)",main ="GapMinder Data 2007", label ="country", repel =TRUE) +xscale("log10", .format =TRUE)
sel_countries =c("United States", "China", "Germany")ggscatter(gap07, x ="gdpPercap", y ="lifeExp", col ="continent",xlab ="GDP per capita ($)", ylab ="Life expectancy (yrs)",main ="GapMinder Data 2007", label ="country",label.select = sel_countries, repel =TRUE) +xscale("log10", .format =TRUE)
ggscatter(gap07, x ="gdpPercap", y ="lifeExp", col ="continent",xlab ="GDP per capita ($)", ylab ="Life expectancy (yrs)",main ="GapMinder Data = 2007") +xscale("log10", .format =TRUE)
Creating a marginal histogram
library(ggExtra)p <-ggscatter(gap07, x ="gdpPercap", y ="lifeExp", col ="continent",xlab ="GDP per capita ($)", ylab ="Life expectancy (yrs)",main ="GapMinder Data 2007") +xscale("log10", .format =TRUE)ggMarginal(p, type ="histogram")
Adding a regression line
ggscatter(gap07, x ="gdpPercap", y ="lifeExp", col ="continent",xlab ="GDP per capita ($)", ylab ="Life expectancy (yrs)",main ="GapMinder Data 2007", add ="reg.line", conf.int =TRUE) +xscale("log10", .format =TRUE)
Adding correlations
ggscatter(gap07, x ="gdpPercap", y ="lifeExp", col ="continent",xlab ="GDP per capita ($)", ylab ="Life expectancy (yrs)",main ="GapMinder Data 2007", add ="reg.line", conf.int =TRUE) +xscale("log10", .format =TRUE) +stat_cor(aes(color = continent), method ="spearman")
Adding the regression line equations
ggscatter(gap07, x ="gdpPercap", y ="lifeExp", col ="continent",xlab ="GDP per capita ($)", ylab ="Life expectancy (yrs)",main ="GapMinder Data 2007", add ="reg.line", conf.int =TRUE) +xscale("log10", .format =TRUE) +stat_regline_equation(aes(color = continent))
Generating histograms
gghistogram(gap07, x ="lifeExp", main ="GapMinder Life Expectancy")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.
gghistogram(gap07, x ="lifeExp", fill ="continent",main ="GapMinder Life Expectancy")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.
gghistogram(gap07, x ="lifeExp", fill ="continent",main ="GapMinder Life Expectancy", palette ="jco")
Warning: Using `bins = 30` by default. Pick better value with the argument
`bins`.
Generating density plots
ggdensity(gap07, x ="lifeExp", fill ="continent",main ="GapMinder Life Expectancy", palette ="jco")
ggdensity(gap07, x ="lifeExp", fill ="continent", main ="GapMinder Life Expectancy", palette ="jco",facet.by ="continent")
ggdensity(gap07, x ="lifeExp", fill ="continent",main ="GapMinder Life Expectancy", palette ="jco",facet.by ="continent",add ="median", rug =TRUE)
Generating violin plots
ggviolin(gap07, x ="continent", y ="lifeExp")
ggviolin(gap07, x ="continent", y ="lifeExp",fill ="continent", palette ="jco",add =c("boxplot", "jitter"),ylab ="Life expectancy (yrs)")
ggviolin(gap07, x ="continent", y ="lifeExp",fill ="continent", palette ="jco",add =c("boxplot", "jitter"),ylab ="Life expectancy (yrs)",rotate =TRUE)
gap_sub <- gap |>filter(continent %in%c("Asia", "Africa"), year %in%c(1957, 1982, 2007))ggboxplot(gap_sub, x ="continent", y ="lifeExp",ylab ="Years", col ="continent", add ="jitter")
ggboxplot(gap_sub, x ="continent", y ="lifeExp",ylab ="Years", col ="continent", add ="jitter") +stat_compare_means(label.y =90)
Doing a t-test instead
ggboxplot(gap_sub, x ="continent", y ="lifeExp",ylab ="Years", col ="continent", add ="jitter") +stat_compare_means(method ="t.test", label.y =90)
ggboxplot(gap_sub, x ="continent", y ="lifeExp",ylab ="Years", col ="continent", add ="jitter", facet.by ="year") +stat_compare_means(method ="t.test", label.y =90)
Comparing multiple groups
ggboxplot(gap_sub, x ="year", y ="lifeExp") +stat_compare_means(label.y =80, method ="anova")