#gapminder <- read.csv('data/gapminder.csv')
gapminder <- read.csv("https://raw.githubusercontent.com/datacarpentry/r-intro-geospatial/master/_episodes_rmd/data/gapminder_data.csv")
#ways to look at data
str(gapminder) ##we learned this already
## 'data.frame': 1704 obs. of 6 variables:
## $ country : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ pop : num 8425333 9240934 10267083 11537966 13079460 ...
## $ continent: chr "Asia" "Asia" "Asia" "Asia" ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ gdpPercap: num 779 821 853 836 740 ...
nrow(gapminder)
## [1] 1704
ncol(gapminder)
## [1] 6
dim(gapminder)
## [1] 1704 6
colnames(gapminder)
## [1] "country" "year" "pop" "continent" "lifeExp" "gdpPercap"
head(gapminder)
## country year pop continent lifeExp gdpPercap
## 1 Afghanistan 1952 8425333 Asia 28.801 779.4453
## 2 Afghanistan 1957 9240934 Asia 30.332 820.8530
## 3 Afghanistan 1962 10267083 Asia 31.997 853.1007
## 4 Afghanistan 1967 11537966 Asia 34.020 836.1971
## 5 Afghanistan 1972 13079460 Asia 36.088 739.9811
## 6 Afghanistan 1977 14880372 Asia 38.438 786.1134
###try tail(). What does this give you??
##answer in zoom
tail(gapminder)
## country year pop continent lifeExp gdpPercap
## 1699 Zimbabwe 1982 7636524 Africa 60.363 788.8550
## 1700 Zimbabwe 1987 9216418 Africa 62.351 706.1573
## 1701 Zimbabwe 1992 10704340 Africa 60.377 693.4208
## 1702 Zimbabwe 1997 11404948 Africa 46.809 792.4500
## 1703 Zimbabwe 2002 11926563 Africa 39.989 672.0386
## 1704 Zimbabwe 2007 12311143 Africa 43.487 469.7093
#View(gapminder)
#Challenge: Show me a command that gives lines in the middle of gapminder
tail(head(gapminder, 500))
## country year pop continent lifeExp gdpPercap
## 495 Eritrea 1962 1666618 Africa 40.158 380.9958
## 496 Eritrea 1967 1820319 Africa 42.189 468.7950
## 497 Eritrea 1972 2260187 Africa 44.142 514.3242
## 498 Eritrea 1977 2512642 Africa 44.535 505.7538
## 499 Eritrea 1982 2637297 Africa 43.890 524.8758
## 500 Eritrea 1987 2915959 Africa 46.453 521.1341
gapminder[495:500,]
## country year pop continent lifeExp gdpPercap
## 495 Eritrea 1962 1666618 Africa 40.158 380.9958
## 496 Eritrea 1967 1820319 Africa 42.189 468.7950
## 497 Eritrea 1972 2260187 Africa 44.142 514.3242
## 498 Eritrea 1977 2512642 Africa 44.535 505.7538
## 499 Eritrea 1982 2637297 Africa 43.890 524.8758
## 500 Eritrea 1987 2915959 Africa 46.453 521.1341
#more tips for checking out data
table(gapminder$continent)
##
## Africa Americas Asia Europe Oceania
## 624 300 396 360 24
summary(gapminder)
## country year pop continent
## Length:1704 Min. :1952 Min. :6.001e+04 Length:1704
## Class :character 1st Qu.:1966 1st Qu.:2.794e+06 Class :character
## Mode :character Median :1980 Median :7.024e+06 Mode :character
## Mean :1980 Mean :2.960e+07
## 3rd Qu.:1993 3rd Qu.:1.959e+07
## Max. :2007 Max. :1.319e+09
## lifeExp gdpPercap
## Min. :23.60 Min. : 241.2
## 1st Qu.:48.20 1st Qu.: 1202.1
## Median :60.71 Median : 3531.8
## Mean :59.47 Mean : 7215.3
## 3rd Qu.:70.85 3rd Qu.: 9325.5
## Max. :82.60 Max. :113523.1
##adding columns and rows to data frames
#We would like to create a new column to hold information on whether the life expectancy is below the world average life expectancy (70.5) or above:
below_average <- gapminder$lifeExp < 70.5 ##do some basic evaluation
str(below_average) #output is a vector of logical values
## logi [1:1704] TRUE TRUE TRUE TRUE TRUE TRUE ...
nrow(gapminder) ##do the number of rows match? why is this important??
## [1] 1704
table(below_average)
## below_average
## FALSE TRUE
## 461 1243
#cbind(gapminder, below_average)
head(cbind(gapminder, below_average))
## country year pop continent lifeExp gdpPercap below_average
## 1 Afghanistan 1952 8425333 Asia 28.801 779.4453 TRUE
## 2 Afghanistan 1957 9240934 Asia 30.332 820.8530 TRUE
## 3 Afghanistan 1962 10267083 Asia 31.997 853.1007 TRUE
## 4 Afghanistan 1967 11537966 Asia 34.020 836.1971 TRUE
## 5 Afghanistan 1972 13079460 Asia 36.088 739.9811 TRUE
## 6 Afghanistan 1977 14880372 Asia 38.438 786.1134 TRUE
head(cbind(gapminder, gapminder$lifeExp < 70.5)) ##same output, but name is different
## country year pop continent lifeExp gdpPercap
## 1 Afghanistan 1952 8425333 Asia 28.801 779.4453
## 2 Afghanistan 1957 9240934 Asia 30.332 820.8530
## 3 Afghanistan 1962 10267083 Asia 31.997 853.1007
## 4 Afghanistan 1967 11537966 Asia 34.020 836.1971
## 5 Afghanistan 1972 13079460 Asia 36.088 739.9811
## 6 Afghanistan 1977 14880372 Asia 38.438 786.1134
## gapminder$lifeExp < 70.5
## 1 TRUE
## 2 TRUE
## 3 TRUE
## 4 TRUE
## 5 TRUE
## 6 TRUE
gapminder2 <- cbind(gapminder, below_average) ##make new data with appended column
head(gapminder2)
## country year pop continent lifeExp gdpPercap below_average
## 1 Afghanistan 1952 8425333 Asia 28.801 779.4453 TRUE
## 2 Afghanistan 1957 9240934 Asia 30.332 820.8530 TRUE
## 3 Afghanistan 1962 10267083 Asia 31.997 853.1007 TRUE
## 4 Afghanistan 1967 11537966 Asia 34.020 836.1971 TRUE
## 5 Afghanistan 1972 13079460 Asia 36.088 739.9811 TRUE
## 6 Afghanistan 1977 14880372 Asia 38.438 786.1134 TRUE
#gapminder <- gapminder2
###with data frames, this may be easier
gapminder$below_average <- gapminder$lifeExp < 70.5
head(gapminder)
## country year pop continent lifeExp gdpPercap below_average
## 1 Afghanistan 1952 8425333 Asia 28.801 779.4453 TRUE
## 2 Afghanistan 1957 9240934 Asia 30.332 820.8530 TRUE
## 3 Afghanistan 1962 10267083 Asia 31.997 853.1007 TRUE
## 4 Afghanistan 1967 11537966 Asia 34.020 836.1971 TRUE
## 5 Afghanistan 1972 13079460 Asia 36.088 739.9811 TRUE
## 6 Afghanistan 1977 14880372 Asia 38.438 786.1134 TRUE
##add some rows
##this trickier because of all the different types of vectors that each row has
str(gapminder[1,])
## 'data.frame': 1 obs. of 7 variables:
## $ country : chr "Afghanistan"
## $ year : int 1952
## $ pop : num 8425333
## $ continent : chr "Asia"
## $ lifeExp : num 28.8
## $ gdpPercap : num 779
## $ below_average: logi TRUE
new_row <- list('Norway', 2016, 5000000, 'Nordic', 80.3, 49400.0, FALSE)
#Challenge: write a line to code that checks if the dimensions of the new_row will work with the gapminder data frame.
ncol(gapminder) == length(new_row)
## [1] TRUE
gapminder_norway <- rbind(gapminder, new_row)
tail(gapminder_norway)
## country year pop continent lifeExp gdpPercap below_average
## 1700 Zimbabwe 1987 9216418 Africa 62.351 706.1573 TRUE
## 1701 Zimbabwe 1992 10704340 Africa 60.377 693.4208 TRUE
## 1702 Zimbabwe 1997 11404948 Africa 46.809 792.4500 TRUE
## 1703 Zimbabwe 2002 11926563 Africa 39.989 672.0386 TRUE
## 1704 Zimbabwe 2007 12311143 Africa 43.487 469.7093 TRUE
## 1705 Norway 2016 5000000 Nordic 80.300 49400.0000 FALSE
#factors
str(gapminder$continent)
## chr [1:1704] "Asia" "Asia" "Asia" "Asia" "Asia" "Asia" "Asia" "Asia" ...
#change columns to factors
gapminder$continent <- factor(gapminder$continent)
#gapminder_norway <- rbind(gapminder, new_row)
tail(gapminder_norway)
## country year pop continent lifeExp gdpPercap below_average
## 1700 Zimbabwe 1987 9216418 Africa 62.351 706.1573 TRUE
## 1701 Zimbabwe 1992 10704340 Africa 60.377 693.4208 TRUE
## 1702 Zimbabwe 1997 11404948 Africa 46.809 792.4500 TRUE
## 1703 Zimbabwe 2002 11926563 Africa 39.989 672.0386 TRUE
## 1704 Zimbabwe 2007 12311143 Africa 43.487 469.7093 TRUE
## 1705 Norway 2016 5000000 Nordic 80.300 49400.0000 FALSE
levels(gapminder$continent) <- c(levels(gapminder$continent), "Nordic")
gapminder_norway <- rbind(gapminder, new_row)
tail(gapminder_norway)
## country year pop continent lifeExp gdpPercap below_average
## 1700 Zimbabwe 1987 9216418 Africa 62.351 706.1573 TRUE
## 1701 Zimbabwe 1992 10704340 Africa 60.377 693.4208 TRUE
## 1702 Zimbabwe 1997 11404948 Africa 46.809 792.4500 TRUE
## 1703 Zimbabwe 2002 11926563 Africa 39.989 672.0386 TRUE
## 1704 Zimbabwe 2007 12311143 Africa 43.487 469.7093 TRUE
## 1705 Norway 2016 5000000 Nordic 80.300 49400.0000 FALSE
dim(gapminder)
## [1] 1704 7
dim(rbind(gapminder, gapminder)) #paste together dataframes
## [1] 3408 7
dim(cbind(gapminder, gapminder))
## [1] 1704 14
#do not have to import data frames from csv. Can make them in R
data_frame <- data.frame(title = c('a', 'b', 'c'), numbers = 1:3, data = c(T,T,F))
str(data_frame)
## 'data.frame': 3 obs. of 3 variables:
## $ title : chr "a" "b" "c"
## $ numbers: int 1 2 3
## $ data : logi TRUE TRUE FALSE