#panels in RStudio
#rprojects
##open new project
##start new script
##integrate with git
#math
1+1
## [1] 2
#commands
round(3.14)
## [1] 3
round(x=3.14, digits=1)
## [1] 3.1
#object assignments
##remember keyboard shortcuts
some_name <- 1
some_name <- 1 + 1 + round(x=3.14, digits=1)
#comparing things
some_name > 10
## [1] FALSE
#more complex object assignment
some_name <- data.frame(first_vec=c("fresh", "soph", "junior", "senior", "job??"),
year=c(2004, 2005, 2006, 2007, 2008),
gpa=c(2.5, 3.0, 3.5, 4.0, mean(3.5,3.6,4.0,2.0)),
stringsAsFactors = F)
some_name
## first_vec year gpa
## 1 fresh 2004 2.5
## 2 soph 2005 3.0
## 3 junior 2006 3.5
## 4 senior 2007 4.0
## 5 job?? 2008 3.5
#View(some_name)
head(some_name, 1)
## first_vec year gpa
## 1 fresh 2004 2.5
#simple plot
plot(x=some_name$year, y=some_name$gpa)
#characteristics of obects
str(some_name) ##class(), type()
## 'data.frame': 5 obs. of 3 variables:
## $ first_vec: chr "fresh" "soph" "junior" "senior" ...
## $ year : num 2004 2005 2006 2007 2008
## $ gpa : num 2.5 3 3.5 4 3.5
#common types:
#chracter
#number
#logical
#factor
#in a data.frame, all elements of a vector must be the same type
some_name$gpa[4]
## [1] 4
some_name$gpa[4] <- "fail"
str(some_name) #see how we changed the type of the "gpa" vector
## 'data.frame': 5 obs. of 3 variables:
## $ first_vec: chr "fresh" "soph" "junior" "senior" ...
## $ year : num 2004 2005 2006 2007 2008
## $ gpa : chr "2.5" "3" "3.5" "fail" ...
some_name$gpa[4] <- "4.0"
str(some_name)
## 'data.frame': 5 obs. of 3 variables:
## $ first_vec: chr "fresh" "soph" "junior" "senior" ...
## $ year : num 2004 2005 2006 2007 2008
## $ gpa : chr "2.5" "3" "3.5" "4.0" ...
some_name$gpa <- as.numeric(some_name$gpa) ##change type
str(some_name)
## 'data.frame': 5 obs. of 3 variables:
## $ first_vec: chr "fresh" "soph" "junior" "senior" ...
## $ year : num 2004 2005 2006 2007 2008
## $ gpa : num 2.5 3 3.5 4 3.5
#install.packages('ggplot2')
#load packages
library(ggplot2)
#plot
plot(x=some_name$year, y=some_name$gpa)
#use ggplot to plot first_vec vs. gpa. Do you notice anything wrong with this plot?
ggplot(data=some_name, aes(x=first_vec, y=gpa)) +
geom_point()
###The order of x is all messed up. We'll work on this later.
#summary statistics
summary(some_name$gpa)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.5 3.0 3.5 3.3 3.5 4.0
mod <- lm(gpa~year, data=some_name)
summary(mod)
##
## Call:
## lm(formula = gpa ~ year, data = some_name)
##
## Residuals:
## 1 2 3 4 5
## -2.000e-01 8.368e-15 2.000e-01 4.000e-01 -4.000e-01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -598.5000 231.6330 -2.584 0.0815 .
## year 0.3000 0.1155 2.598 0.0805 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3651 on 3 degrees of freedom
## Multiple R-squared: 0.6923, Adjusted R-squared: 0.5897
## F-statistic: 6.75 on 1 and 3 DF, p-value: 0.08051
#export data
#write.csv(some_name,file="dummy_data.csv",row.names = F, quote = F)
#open up data directly from web
###copy this link into zoom chat for people to copy
gapminder <-
read.csv("https://raw.githubusercontent.com/datacarpentry/r-intro-geospatial/master/_episodes_rmd/data/gapminder_data.csv", stringsAsFactors = F)
str(gapminder)
## 'data.frame': 1704 obs. of 6 variables:
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ pop : num 8425333 9240934 10267083 11537966 13079460 ...
## $ continent: chr "Asia" "Asia" "Asia" "Asia" ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ gdpPercap: num 779 821 853 836 740 ...
#Exercise:What year has the lowest life expectancy? Hint: try plotting. What is the r2 value between year and life expectancy?
plot(x=gapminder$year, y=gapminder$lifeExp)
mod2 <- lm(lifeExp~year, gapminder)
summary(mod2)
##
## Call:
## lm(formula = lifeExp ~ year, data = gapminder)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.949 -9.651 1.697 10.335 22.158
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -585.65219 32.31396 -18.12 <2e-16 ***
## year 0.32590 0.01632 19.96 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.63 on 1702 degrees of freedom
## Multiple R-squared: 0.1898, Adjusted R-squared: 0.1893
## F-statistic: 398.6 on 1 and 1702 DF, p-value: < 2.2e-16
#Exercise: Practice writing and reading data using the gapminder dataset. Put the data into "data" folder.
#write.csv(gapminder, "data/gapminder.csv", row.names = F, quote = F)
#tmp <- read.csv("data/gapminder.csv")
library(ggplot2)
ggplot(data=some_name,aes(x=first_vec, y=gpa, by=)) +
geom_point() +
scale_x_discrete(limits=c("fresh","soph","junior","senior","job??")) ##just a vector
gapminder <-
read.csv("https://raw.githubusercontent.com/datacarpentry/r-intro-geospatial/master/_episodes_rmd/data/gapminder_data.csv", stringsAsFactors = F)
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point()
#understanding ggplot layers
ggplot() #empty plotting layer
#empty plotting layer with correct scales
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp))
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point()
#can be really useful when plotting multiple things at once
ggplot() +
geom_point(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp))
##Exercise: modify the plot to show life expectancy over time
##while you're at it, color the dots by continent
ggplot(data = gapminder, mapping = aes(x = year, y = lifeExp, color=continent)) +
geom_point()
#Need to make it more informative. Make line plot and connect the "by" country
ggplot(data = gapminder, mapping = aes(x = year, y = lifeExp, color=continent, by=country)) +
geom_line()
#layers are added on top of the last layer
ggplot(data = gapminder, mapping = aes(x = year, y = lifeExp, color=continent, by=country)) +
geom_line() +
geom_point(color="black")
#fix up the plot with some edits
ggplot(data = gapminder, mapping = aes(x = year, y = lifeExp, color=continent, by=country)) +
geom_line() +
labs(x= "Year", y= "Life expectancy (years)") +
theme_bw() + ##lots of different pre-set themes
scale_color_brewer(palette = "Dark2") ##there are some nice colors https://www.datanovia.com/en/blog/ggplot-colors-best-tricks-you-will-love/
ggplot(data = gapminder, mapping = aes(x = year, y = lifeExp, color=continent, by=country)) +
geom_line() +
labs(x= "Year", y= "Life expectancy (years)") +
theme_bw() + ##lots of different pre-set themes
scale_color_manual(values = c("#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00")) +
theme(legend.title = element_blank()) ##modify the legend
##slopes look different between the continents. find out if they really are
mod.table <- data.frame(continent=unique(gapminder$continent), slope=NA, r2=NA, p=NA, stringsAsFactors = F)
row.names(mod.table) <- mod.table$continent
for(i in mod.table$continent){
mod <- lm(lifeExp~year, data=gapminder[gapminder$continent == i,])
summary <- summary(mod)
mod.table[i,]$slope <- round(summary$coefficients[2], digits=2)
mod.table[i,]$r2 <- round(summary$adj.r.squared, digits = 2)
mod.table[i,]$p <- formatC(summary$coefficients[,4][2], format="e", digits = 2)
}
mod.table$lab <- paste(mod.table$continent, ", slope=",mod.table$slope, ", r2=", mod.table$r2, sep="")
gapminder$lab <- mod.table[gapminder$continent,]$lab
ggplot(data = gapminder, mapping = aes(x = year, y = lifeExp, color=continent, by=country)) +
geom_line() +
labs(x= "Year", y= "Life expectancy (years)") +
theme_bw() + ##lots of different pre-set themes
scale_color_manual(values = c("#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00")) +
facet_wrap( ~ lab) +
theme(legend.title = element_blank(),
axis.text.x = element_text(angle = 90),
legend.position = "none")
p <- ggplot(gapminder, aes(x = year, y = lifeExp, color=continent, by=country)) +
geom_line() +
labs(x= "Year", y= "Life expectancy (years)") +
theme_bw() + ##lots of different pre-set themes
scale_color_manual(values = c("#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00")) +
facet_wrap( ~ lab) +
stat_summary(data=gapminder, aes(group=continent), fun=mean, geom="line", size=1, color="black") +
#geom_smooth(method='lm', se = F, aes(group=continent), color="grey") +
theme(legend.title = element_blank(),
axis.text.x = element_text(angle = 90),
legend.position = "none")
p
#ggsave(filename = "plots/lifeExp.pdf", plot = p, width = 6.5, height = 4.5, dpi = 300, units = "in")
###There are many packages that can help you with plotting ###some that I find useful that you may want to look at: * cowplot * ggExtra * heatmap2 * gridExtra * RColorBrewer
#install.package('ggExtra')
library(ggExtra)
#Exercise: subset the gapminder data for years after 2000 to narrow down data a bit
gapminder_2000 <- gapminder[gapminder$year >= 2000,]
#Exercise: make scatter plot comparing gdp to lifeexp
p2 <- ggplot(gapminder_2000, aes(x = gdpPercap, y = lifeExp, color=continent)) +
geom_point() +
labs(x= "Per capita GDP ($)", y= "Life expectancy (years)") +
theme_bw() + ##lots of different pre-set themes
scale_color_manual(values = c("#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00")) +
theme(legend.title = element_blank(),
legend.position = "bottom")
ggMarginal(p2, groupColour = TRUE, groupFill = TRUE)
#drop the oceania data
gapminder_2000_nooceania <- gapminder_2000[!gapminder_2000$continent == "Oceania",]
p2 <- ggplot(gapminder_2000_nooceania, aes(x = gdpPercap, y = lifeExp, color=continent)) +
geom_point() +
labs(x= "Per capita GDP ($)", y= "Life expectancy (years)") +
theme_bw() + ##lots of different pre-set themes
scale_color_manual(values = c("#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00")) +
theme(legend.title = element_blank(),
legend.position = "bottom")
p3 <-ggMarginal(p2, groupColour = TRUE, groupFill = TRUE)
p3
#install.packages('cowplot')
library(cowplot)
plot_grid(p,p3,nrow=1)
#ggsave(filename = "plots/cowplot.pdf", plot = plot_grid(p,p3,nrow=1), width = 12, height = 6, dpi = 300, units = "in")
#install.packages("gganimate")
library(gganimate)
library(gapminder)
p3 <- ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, colour = country)) +
geom_point(alpha = 0.7, show.legend = FALSE) +
scale_colour_manual(values = country_colors) +
scale_size(range = c(2, 12)) +
scale_x_log10() +
facet_wrap(~continent) +
# Here comes the gganimate specific bits
labs(title = 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
transition_time(year) +
ease_aes('linear')
animate(p3)
#anim_save("animate.gif", p3)
##may run into rendering issues
#install.packages('gifski')
#library(gifski)
#close and reopen Rstudio
#animate(p3, duration = 5, fps = 20, width = 400, height = 400, renderer = gifski_renderer())
#anim_save("animate.gif", p3)