Links
Carpentries workshops survey (one survey for all workshops):
Part 1: Introducing R and RStudio IDE
##in interactive R console type
##to run, click run or ctrl+enter or special+enter
100+1
## [1] 101
##make an *.R script (file new)
##can run multiple lines by highlighting them
100+1
## [1] 101
100*5
## [1] 500
#what happens when you execute an incomplete command?
#100+
##save the R script
##why use the "source" pane??
##commands
getwd() ##can add comments with "#"
## [1] "/home/jt/Dropbox/Documents/UI/carpentries/workshops/06-08-20-caes"
##Exercise: execute getwd() from the console
##paste your output into zoom
#Exercise: Try the following functions by writing them in your script. See if you can guess what they do, and make sure to add comments to your script about your assumed purpose.
#eg: sessionInfo() #tells you system info
Sys.time()
## [1] "2020-06-09 11:18:28 PDT"
date()
## [1] "Tue Jun 9 11:18:28 2020"
dir()
## [1] "C3-BootCamp-RunOfShow.html"
## [2] "Copy of report-2020-06-07.xlsx"
## [3] "Idaho National Laboratory_ Jun 8-11, 2020.pdf"
## [4] "JT_060820_caes_Rintro.pdf"
## [5] "notes.txt"
## [6] "R Carpentry Schedule Draft.odt"
## [7] "R_intro_caes"
## [8] "R_workshop_runofshow_jt.pdf"
## [9] "R_workshop_walkthrough_v2.pdf"
## [10] "R_workshop_walkthrough_v2.Rmd"
## [11] "R_workshop_walkthrough.pdf"
## [12] "R_workshop_walkthrough.Rmd"
## [13] "SWC-Registrations.xlsx"
## [14] "teaching_materials"
#what are the parts of a command??
##a name: "getwd"
##pair of "()"
##arguments inside of the "()"
#input can be to get an answer or provide instructions for modifying command
round(3.14)
## [1] 3
round(3.14,digits=1)
## [1] 3.1
####Getting help
#how to learn about functions
?round() ###use help menu - lots of info
#use tab
#practice pressing tab after "round("
#you can use "positional arguments" or specify arguments
round(3.14, 1)
## [1] 3.1
round(x=3.14, digits=1) ##sometimes this is better for "readability"
## [1] 3.1
###there are many functions beyond what comes with "base" R
##what happens here:
#geom_point()
#?geom_point() ##searches for the package name
#??geom_point() ##uses keyword search
#Exercise
#look for some commands that you're interested in
#can use ?? or help.serach() what are the differences?
help.search("dot plot")
#wont help you unless packages are installed.
#to install packages:
#install.packages('ggplot2')
??geom_point()
#objects and assignment
a <- 1 ###no output?? why not??
a
## [1] 1
#check out the "Environment" panel
##objects allow you to store things for later. A key feature for any programming language
#objects can be named anything
yo <- 1/40
yo ###output is a "floating point number" type.
## [1] 0.025
#Exercise: Make some objects. Assign some as numbers and words.
#Did it all work? Paste something into the Zoom chat that didn't work.
#yo 2 <- 1/20
#yo.2 <- 1/20
#yo_2 <- 1/20
#note: don't use special characters for object names. Be safe, use letters, numbers, and underscores
#reassign object
yo <- 100
yo
## [1] 100
yo <- yo + 1
yo
## [1] 101
log(yo)
## [1] 4.615121
#can also remove, this can be useful to keep "Environment" clean
rm(yo)
##can do lots of stuff on objects, but
###need to understand a little about what types of object exist
###every object has properties
yo <- 100 #number)
class(yo)
## [1] "numeric"
yo <- "JT" #character)
class(yo)
## [1] "character"
yo <- TRUE #logical
class(yo)
## [1] "logical"
#Exercise: try make some objects and see what kind they are using the function class().
#post the results of something...even if it didnt work like you think.
#There are 6 main types: numeric, integer, complex, logical, character, and factor.
###break
### We are jumping to the "Data Structures" part of "Introduction to R for Geospatial Data" https://datacarpentry.org/r-intro-geospatial/03-data-structures-part1/index.html
###lets go get some data and explore it using what we've learned so far
## https://datacarpentry.org/r-intro-geospatial/02-project-intro/index.html
###copy this link into zoom chat for people to copy
nordic <-
read.csv("https://raw.githubusercontent.com/datacarpentry/r-intro-geospatial/master/_episodes_rmd/data/nordic-data-2.csv")
#Exercise: Download "gapminder_data.csv" the same way
gapminder <-
read.csv("https://raw.githubusercontent.com/datacarpentry/r-intro-geospatial/master/_episodes_rmd/data/gapminder_data.csv")
###what does it mean to have a csv file extension? Answer in Zoom chat
##comma separated values file
nordic
## country year lifeExp
## 1 Denmark 2002 77.2
## 2 Sweden 2002 80
## 3 Norway 2002 79.0 or 83
#gapminder
View(gapminder)
#just like above, we can do stuff on information in these more complex objects
#Exercise: What kind of object is "nordic"?
class(nordic)
## [1] "data.frame"
#data.frame is a combination of objects.
#to see what sort of combinationy we have to look at the individual columns
str(nordic)
## 'data.frame': 3 obs. of 3 variables:
## $ country: Factor w/ 3 levels "Denmark","Norway",..: 1 3 2
## $ year : int 2002 2002 2002
## $ lifeExp: Factor w/ 3 levels "77.2","79.0 or 83",..: 1 3 2
class(nordic$year)
## [1] "integer"
#can do normal math on data.frames
nordic$year + 1
## [1] 2003 2003 2003
#did that actually change the data????
nordic$year <- nordic$year + 1
nordic$year
## [1] 2003 2003 2003
#what about
nordic$lifeExp + 1
## Warning in Ops.factor(nordic$lifeExp, 1): '+' not meaningful for factors
## [1] NA NA NA
#what happened?
#why aren't lifeExp values of data type "numeric"??
#lifeExp is a set of character objects. This set is called a vector
#this stringency safeguards against mistakes of doing things to different classes of data
quiz_scores <- c(95, 65, 'absent') #7sing "c()" combine to make a vector
#quiz_scores + 5
#we can coerce data in R.
quiz_scores_fixed <- as.numeric(quiz_scores)
## Warning: NAs introduced by coercion
str(quiz_scores_fixed)
## num [1:3] 95 65 NA
quiz_scores_fixed + 5
## [1] 100 70 NA
##be careful with coercion and check your data types
as.numeric(nordic$lifeExp)
## [1] 1 3 2
as.numeric(as.character(nordic$lifeExp))
## Warning: NAs introduced by coercion
## [1] 77.2 80.0 NA
#wise to use "stringsAsFactors=F", then convert to factors if needed
nordic <- read.csv("https://raw.githubusercontent.com/datacarpentry/r-intro-geospatial/master/_episodes_rmd/data/nordic-data-2.csv", stringsAsFactors = F)
as.numeric(nordic$lifeExp)
## Warning: NAs introduced by coercion
## [1] 77.2 80.0 NA
#can use "StringsAsFactors" when explicitly making data frames as well
quiz_and_test_scores <- data.frame(quiz=c(95, 65, 'absent'), test=c(100, 85, 75), stringsAsFactors = F)
str(quiz_and_test_scores)
## 'data.frame': 3 obs. of 2 variables:
## $ quiz: chr "95" "65" "absent"
## $ test: num 100 85 75
##notice that we named the vectors in this data frame.
##we can rename them if needed
names(quiz_and_test_scores) <- c("quiz1", "test1")
quiz_and_test_scores
## quiz1 test1
## 1 95 100
## 2 65 85
## 3 absent 75
#or
names(quiz_and_test_scores)[1] <- "prequiz"
quiz_and_test_scores
## prequiz test1
## 1 95 100
## 2 65 85
## 3 absent 75
#Challenge: fill in a prequiz score for student 3 and then curve the quiz
quiz_and_test_scores$prequiz <- c(95,65,0)
quiz_and_test_scores$prequiz <- quiz_and_test_scores$prequiz+5
quiz_and_test_scores
## prequiz test1
## 1 100 100
## 2 70 85
## 3 5 75
###Factors can be useful sometimes
str(gapminder) ###rember that we did not use "StringsAsFactors=F"
## 'data.frame': 1704 obs. of 6 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ pop : num 8425333 9240934 10267083 11537966 13079460 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ gdpPercap: num 779 821 853 836 740 ...
#notice that using factors tells us how many categories there are in a vector
#e.g., 142 types of countries in "gapminer$country"
levels(gapminder$country)
## [1] "Afghanistan" "Albania"
## [3] "Algeria" "Angola"
## [5] "Argentina" "Australia"
## [7] "Austria" "Bahrain"
## [9] "Bangladesh" "Belgium"
## [11] "Benin" "Bolivia"
## [13] "Bosnia and Herzegovina" "Botswana"
## [15] "Brazil" "Bulgaria"
## [17] "Burkina Faso" "Burundi"
## [19] "Cambodia" "Cameroon"
## [21] "Canada" "Central African Republic"
## [23] "Chad" "Chile"
## [25] "China" "Colombia"
## [27] "Comoros" "Congo Dem. Rep."
## [29] "Congo Rep." "Costa Rica"
## [31] "Cote d'Ivoire" "Croatia"
## [33] "Cuba" "Czech Republic"
## [35] "Denmark" "Djibouti"
## [37] "Dominican Republic" "Ecuador"
## [39] "Egypt" "El Salvador"
## [41] "Equatorial Guinea" "Eritrea"
## [43] "Ethiopia" "Finland"
## [45] "France" "Gabon"
## [47] "Gambia" "Germany"
## [49] "Ghana" "Greece"
## [51] "Guatemala" "Guinea"
## [53] "Guinea-Bissau" "Haiti"
## [55] "Honduras" "Hong Kong China"
## [57] "Hungary" "Iceland"
## [59] "India" "Indonesia"
## [61] "Iran" "Iraq"
## [63] "Ireland" "Israel"
## [65] "Italy" "Jamaica"
## [67] "Japan" "Jordan"
## [69] "Kenya" "Korea Dem. Rep."
## [71] "Korea Rep." "Kuwait"
## [73] "Lebanon" "Lesotho"
## [75] "Liberia" "Libya"
## [77] "Madagascar" "Malawi"
## [79] "Malaysia" "Mali"
## [81] "Mauritania" "Mauritius"
## [83] "Mexico" "Mongolia"
## [85] "Montenegro" "Morocco"
## [87] "Mozambique" "Myanmar"
## [89] "Namibia" "Nepal"
## [91] "Netherlands" "New Zealand"
## [93] "Nicaragua" "Niger"
## [95] "Nigeria" "Norway"
## [97] "Oman" "Pakistan"
## [99] "Panama" "Paraguay"
## [101] "Peru" "Philippines"
## [103] "Poland" "Portugal"
## [105] "Puerto Rico" "Reunion"
## [107] "Romania" "Rwanda"
## [109] "Sao Tome and Principe" "Saudi Arabia"
## [111] "Senegal" "Serbia"
## [113] "Sierra Leone" "Singapore"
## [115] "Slovak Republic" "Slovenia"
## [117] "Somalia" "South Africa"
## [119] "Spain" "Sri Lanka"
## [121] "Sudan" "Swaziland"
## [123] "Sweden" "Switzerland"
## [125] "Syria" "Taiwan"
## [127] "Tanzania" "Thailand"
## [129] "Togo" "Trinidad and Tobago"
## [131] "Tunisia" "Turkey"
## [133] "Uganda" "United Kingdom"
## [135] "United States" "Uruguay"
## [137] "Venezuela" "Vietnam"
## [139] "West Bank and Gaza" "Yemen Rep."
## [141] "Zambia" "Zimbabwe"
#another benefit = size of data.frame
object.size(gapminder)
## 73496 bytes
gapminder <- read.csv("https://raw.githubusercontent.com/datacarpentry/r-intro-geospatial/master/_episodes_rmd/data/gapminder_data.csv", stringsAsFactors = F) ###switch to character vectors
object.size(gapminder)
## 85176 bytes
##put gapminder back to how is was
gapminder <- read.csv("https://raw.githubusercontent.com/datacarpentry/r-intro-geospatial/master/_episodes_rmd/data/gapminder_data.csv")
#last data structure: lists
##list are just piles of stuff.
##can be very useful if you have mixed data types or have nested sets of data
list_example <- list(1, "a", TRUE, c(2, 6, 7))
list_example
## [[1]]
## [1] 1
##
## [[2]]
## [1] "a"
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] 2 6 7
another_list_example <- list(number = 1, character="a", logical=TRUE, vector=c(2, 6, 7))
another_list_example
## $number
## [1] 1
##
## $character
## [1] "a"
##
## $logical
## [1] TRUE
##
## $vector
## [1] 2 6 7
#Challenge: what are the structural differences between lists and data.frames?? Look at both
str(another_list_example)
## List of 4
## $ number : num 1
## $ character: chr "a"
## $ logical : logi TRUE
## $ vector : num [1:3] 2 6 7
str(quiz_and_test_scores)
## 'data.frame': 3 obs. of 2 variables:
## $ prequiz: num 100 70 5
## $ test1 : num 100 85 75
#list elements don't have to be the same length
#data.frame rows can be accessed.
#quiz_and_test_scores$prequiz <- c(100,70)
quiz_and_test_scores[1,] ###we haven't done this yet. the number "1" allows you to access the 1st row. Format is data.frame[row,column]
## prequiz test1
## 1 100 100
#another_list_example[1,] ##no rows for lists.
another_list_example[1]
## $number
## [1] 1
#lists can be anything
stats404 <- list(yr2019=quiz_and_test_scores, yr2020=data.frame(quiz=c(95, 65, 89), test=c(100, 85, 75), stringsAsFactors = F))
stats404
## $yr2019
## prequiz test1
## 1 100 100
## 2 70 85
## 3 5 75
##
## $yr2020
## quiz test
## 1 95 100
## 2 65 85
## 3 89 75
#Challenge: There are several subtly different ways to call variables, observations and elements from data frames:
nordic[1]
## country
## 1 Denmark
## 2 Sweden
## 3 Norway
nordic[[1]]
## [1] "Denmark" "Sweden" "Norway"
nordic$country
## [1] "Denmark" "Sweden" "Norway"
nordic["country"]
## country
## 1 Denmark
## 2 Sweden
## 3 Norway
nordic[1, 1]
## [1] "Denmark"
nordic[, 1]
## [1] "Denmark" "Sweden" "Norway"
nordic[1, ]
## country year lifeExp
## 1 Denmark 2002 77.2
#Try out these examples and explain what is returned by each one.
#Hint: Use the function class() or str() to examine what is returned in each case.
Part 3: R Exploring Data Frames
normal analysis pipeline:
raw data (don’t modify) -> clean and organize data -> explore data -> analyze data -> plot
#ways to look at data
str(gapminder) ##we learned this already
## 'data.frame': 1704 obs. of 6 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ pop : num 8425333 9240934 10267083 11537966 13079460 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ gdpPercap: num 779 821 853 836 740 ...
nrow(gapminder)
## [1] 1704
ncol(gapminder)
## [1] 6
dim(gapminder)
## [1] 1704 6
colnames(gapminder)
## [1] "country" "year" "pop" "continent" "lifeExp" "gdpPercap"
head(gapminder)
## country year pop continent lifeExp gdpPercap
## 1 Afghanistan 1952 8425333 Asia 28.801 779.4453
## 2 Afghanistan 1957 9240934 Asia 30.332 820.8530
## 3 Afghanistan 1962 10267083 Asia 31.997 853.1007
## 4 Afghanistan 1967 11537966 Asia 34.020 836.1971
## 5 Afghanistan 1972 13079460 Asia 36.088 739.9811
## 6 Afghanistan 1977 14880372 Asia 38.438 786.1134
###try tail(). What does this give you??
##answer in zoom
tail(gapminder)
## country year pop continent lifeExp gdpPercap
## 1699 Zimbabwe 1982 7636524 Africa 60.363 788.8550
## 1700 Zimbabwe 1987 9216418 Africa 62.351 706.1573
## 1701 Zimbabwe 1992 10704340 Africa 60.377 693.4208
## 1702 Zimbabwe 1997 11404948 Africa 46.809 792.4500
## 1703 Zimbabwe 2002 11926563 Africa 39.989 672.0386
## 1704 Zimbabwe 2007 12311143 Africa 43.487 469.7093
View(gapminder)
#Challenge: Show me a command that gives lines in the middle of gapminder
tail(head(gapminder, 500))
## country year pop continent lifeExp gdpPercap
## 495 Eritrea 1962 1666618 Africa 40.158 380.9958
## 496 Eritrea 1967 1820319 Africa 42.189 468.7950
## 497 Eritrea 1972 2260187 Africa 44.142 514.3242
## 498 Eritrea 1977 2512642 Africa 44.535 505.7538
## 499 Eritrea 1982 2637297 Africa 43.890 524.8758
## 500 Eritrea 1987 2915959 Africa 46.453 521.1341
gapminder[495:500,]
## country year pop continent lifeExp gdpPercap
## 495 Eritrea 1962 1666618 Africa 40.158 380.9958
## 496 Eritrea 1967 1820319 Africa 42.189 468.7950
## 497 Eritrea 1972 2260187 Africa 44.142 514.3242
## 498 Eritrea 1977 2512642 Africa 44.535 505.7538
## 499 Eritrea 1982 2637297 Africa 43.890 524.8758
## 500 Eritrea 1987 2915959 Africa 46.453 521.1341
#Challenge: Show me a command that gives the first 10 categories of countries in gapminder. Hint: What does the command levels() tell you about factors?
head(levels(gapminder$country),10)
## [1] "Afghanistan" "Albania" "Algeria" "Angola" "Argentina"
## [6] "Australia" "Austria" "Bahrain" "Bangladesh" "Belgium"
##adding columns and rows to data frames
#We would like to create a new column to hold information on whether the life expectancy is below the world average life expectancy (70.5) or above:
below_average <- gapminder$lifeExp < 70.5 ##do some basic evaluation
str(below_average) #output is a vector of logical values
## logi [1:1704] TRUE TRUE TRUE TRUE TRUE TRUE ...
nrow(gapminder) ##do the number of rows match?
## [1] 1704
table(below_average) ###quite useful to counting stuff
## below_average
## FALSE TRUE
## 461 1243
#cbind(gapminder, below_average)
head(cbind(gapminder, below_average))
## country year pop continent lifeExp gdpPercap below_average
## 1 Afghanistan 1952 8425333 Asia 28.801 779.4453 TRUE
## 2 Afghanistan 1957 9240934 Asia 30.332 820.8530 TRUE
## 3 Afghanistan 1962 10267083 Asia 31.997 853.1007 TRUE
## 4 Afghanistan 1967 11537966 Asia 34.020 836.1971 TRUE
## 5 Afghanistan 1972 13079460 Asia 36.088 739.9811 TRUE
## 6 Afghanistan 1977 14880372 Asia 38.438 786.1134 TRUE
head(cbind(gapminder, gapminder$lifeExp < 70.5)) ##same output, but name is different
## country year pop continent lifeExp gdpPercap
## 1 Afghanistan 1952 8425333 Asia 28.801 779.4453
## 2 Afghanistan 1957 9240934 Asia 30.332 820.8530
## 3 Afghanistan 1962 10267083 Asia 31.997 853.1007
## 4 Afghanistan 1967 11537966 Asia 34.020 836.1971
## 5 Afghanistan 1972 13079460 Asia 36.088 739.9811
## 6 Afghanistan 1977 14880372 Asia 38.438 786.1134
## gapminder$lifeExp < 70.5
## 1 TRUE
## 2 TRUE
## 3 TRUE
## 4 TRUE
## 5 TRUE
## 6 TRUE
gapminder2 <- cbind(gapminder, below_average) ##make new data with appended column
head(gapminder2)
## country year pop continent lifeExp gdpPercap below_average
## 1 Afghanistan 1952 8425333 Asia 28.801 779.4453 TRUE
## 2 Afghanistan 1957 9240934 Asia 30.332 820.8530 TRUE
## 3 Afghanistan 1962 10267083 Asia 31.997 853.1007 TRUE
## 4 Afghanistan 1967 11537966 Asia 34.020 836.1971 TRUE
## 5 Afghanistan 1972 13079460 Asia 36.088 739.9811 TRUE
## 6 Afghanistan 1977 14880372 Asia 38.438 786.1134 TRUE
###with data frames, this may be easier
gapminder$below_average <- gapminder$lifeExp < 70.5
##add some rows
##this trickier because of all the different types of vectors that each row has
str(gapminder[1,])
## 'data.frame': 1 obs. of 7 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1
## $ year : int 1952
## $ pop : num 8425333
## $ continent : Factor w/ 5 levels "Africa","Americas",..: 3
## $ lifeExp : num 28.8
## $ gdpPercap : num 779
## $ below_average: logi TRUE
new_row <- list('Norway', 2016, 5000000, 'Nordic', 80.3, 49400.0, FALSE)
#Challenge: write a line to code that checks if the dimensions of the new_row will work with the gapminder data frame.
ncol(gapminder) == length(new_row)
## [1] TRUE
###adding "new_row" is especially a pain because of the factors that we have in gapminder.
gapminder_norway <- rbind(gapminder, new_row)
## Warning in `[<-.factor`(`*tmp*`, ri, value = "Nordic"): invalid factor level, NA
## generated
tail(gapminder_norway)
## country year pop continent lifeExp gdpPercap below_average
## 1700 Zimbabwe 1987 9216418 Africa 62.351 706.1573 TRUE
## 1701 Zimbabwe 1992 10704340 Africa 60.377 693.4208 TRUE
## 1702 Zimbabwe 1997 11404948 Africa 46.809 792.4500 TRUE
## 1703 Zimbabwe 2002 11926563 Africa 39.989 672.0386 TRUE
## 1704 Zimbabwe 2007 12311143 Africa 43.487 469.7093 TRUE
## 1705 Norway 2016 5000000 <NA> 80.300 49400.0000 FALSE
##why are we getting an "NA" in the continent column?
###2 options
####1) change factor columns to character, then add
####2) add a level to the factors
save <- gapminder ###keep a copy of gapminder
gapminder$continent <- as.character(gapminder$continent)
gapminder_norway <- rbind(gapminder, new_row)
tail(gapminder_norway)
## country year pop continent lifeExp gdpPercap below_average
## 1700 Zimbabwe 1987 9216418 Africa 62.351 706.1573 TRUE
## 1701 Zimbabwe 1992 10704340 Africa 60.377 693.4208 TRUE
## 1702 Zimbabwe 1997 11404948 Africa 46.809 792.4500 TRUE
## 1703 Zimbabwe 2002 11926563 Africa 39.989 672.0386 TRUE
## 1704 Zimbabwe 2007 12311143 Africa 43.487 469.7093 TRUE
## 1705 Norway 2016 5000000 Nordic 80.300 49400.0000 FALSE
gapminder <- save #put gapminder back to how it was
rm(save)
#or
####3) figure out what is weird with our data....like why is "nordic" not in the gapminder data already
#gapminder$country == 'Norway' ##what type of vector will this output??
gapminder[gapminder$country == 'Norway', ] ##vector of logical values will select 'TRUE' rows. Make sure you put the comma there
## country year pop continent lifeExp gdpPercap below_average
## 1141 Norway 1952 3327728 Europe 72.670 10095.42 FALSE
## 1142 Norway 1957 3491938 Europe 73.440 11653.97 FALSE
## 1143 Norway 1962 3638919 Europe 73.470 13450.40 FALSE
## 1144 Norway 1967 3786019 Europe 74.080 16361.88 FALSE
## 1145 Norway 1972 3933004 Europe 74.340 18965.06 FALSE
## 1146 Norway 1977 4043205 Europe 75.370 23311.35 FALSE
## 1147 Norway 1982 4114787 Europe 75.970 26298.64 FALSE
## 1148 Norway 1987 4186147 Europe 75.890 31540.97 FALSE
## 1149 Norway 1992 4286357 Europe 77.320 33965.66 FALSE
## 1150 Norway 1997 4405672 Europe 78.320 41283.16 FALSE
## 1151 Norway 2002 4535591 Europe 79.050 44683.98 FALSE
## 1152 Norway 2007 4627926 Europe 80.196 49357.19 FALSE
gapminder[which(gapminder$country == 'Norway'), ] ##another useful way to do it
## country year pop continent lifeExp gdpPercap below_average
## 1141 Norway 1952 3327728 Europe 72.670 10095.42 FALSE
## 1142 Norway 1957 3491938 Europe 73.440 11653.97 FALSE
## 1143 Norway 1962 3638919 Europe 73.470 13450.40 FALSE
## 1144 Norway 1967 3786019 Europe 74.080 16361.88 FALSE
## 1145 Norway 1972 3933004 Europe 74.340 18965.06 FALSE
## 1146 Norway 1977 4043205 Europe 75.370 23311.35 FALSE
## 1147 Norway 1982 4114787 Europe 75.970 26298.64 FALSE
## 1148 Norway 1987 4186147 Europe 75.890 31540.97 FALSE
## 1149 Norway 1992 4286357 Europe 77.320 33965.66 FALSE
## 1150 Norway 1997 4405672 Europe 78.320 41283.16 FALSE
## 1151 Norway 2002 4535591 Europe 79.050 44683.98 FALSE
## 1152 Norway 2007 4627926 Europe 80.196 49357.19 FALSE
str(new_row)
## List of 7
## $ : chr "Norway"
## $ : num 2016
## $ : num 5e+06
## $ : chr "Nordic"
## $ : num 80.3
## $ : num 49400
## $ : logi FALSE
##change "Nordic" to "Europe" and it should work fine without fiddling with the factors
##Challenge: Make a data frame containing only Norway data and test out our idea.
gapminder_norway <- gapminder[gapminder$country == 'Norway', ]
new_row_fix <- list('Norway', 2016, 5000000, 'Europe', 80.3, 49400.0, FALSE)
gapminder_norway <- rbind(gapminder_norway, new_row_fix)
gapminder_norway
## country year pop continent lifeExp gdpPercap below_average
## 1141 Norway 1952 3327728 Europe 72.670 10095.42 FALSE
## 1142 Norway 1957 3491938 Europe 73.440 11653.97 FALSE
## 1143 Norway 1962 3638919 Europe 73.470 13450.40 FALSE
## 1144 Norway 1967 3786019 Europe 74.080 16361.88 FALSE
## 1145 Norway 1972 3933004 Europe 74.340 18965.06 FALSE
## 1146 Norway 1977 4043205 Europe 75.370 23311.35 FALSE
## 1147 Norway 1982 4114787 Europe 75.970 26298.64 FALSE
## 1148 Norway 1987 4186147 Europe 75.890 31540.97 FALSE
## 1149 Norway 1992 4286357 Europe 77.320 33965.66 FALSE
## 1150 Norway 1997 4405672 Europe 78.320 41283.16 FALSE
## 1151 Norway 2002 4535591 Europe 79.050 44683.98 FALSE
## 1152 Norway 2007 4627926 Europe 80.196 49357.19 FALSE
## 13 Norway 2016 5000000 Europe 80.300 49400.00 FALSE
##plotting data is a fairly strong suit of R.
##especially using packages
##ggplot is one of the best
##base graphics are great for quick plots, but aren't as pretty
##load ggplot
library(ggplot2)
ggplot(gapminder) ###empty canvas

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) ##empty canvas with correct dimensions

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) + ###these are global options/values
geom_point() ####options can be put here. they will only be applied to this ("geom") layer

##don't have to supply all that info
##this works too
ggplot(gapminder, aes(gdpPercap,lifeExp)) +
geom_point()

# `geom_point()` for scatter plots, dot plots, etc.
# `geom_boxplot()` for, well, boxplots!
# `geom_line()` for trend lines, time series, etc.
# 'geom_bar()'
# 'geom_histogram()'
#Challenge: Make a plot to see if life expectancy has increased over the years.
ggplot(gapminder, aes(year, lifeExp)) +
geom_point()

##add some colors
ggplot(gapminder, aes(year, lifeExp, color=continent)) +
geom_point() +
scale_color_brewer(palette="Set1") ###color brewer has nice color palettes and is built into ggplot

##think of geom layers as being able additive
##you can just keep plotting more geom layers
ggplot(gapminder, aes(year, lifeExp, color=continent, by=country)) +
geom_point() +
geom_line() +
scale_color_brewer(palette="Set1")

ggplot(gapminder, aes(year, lifeExp, color=continent, by=country)) +
geom_point() +
geom_line() +
scale_color_brewer(palette="Set1") +
facet_wrap( ~ continent, scales="free")

##ggplot can be objects
p <- ggplot(gapminder, aes(year, lifeExp, color=continent, by=country)) +
geom_point() +
geom_line() +
scale_color_brewer(palette="Set1") +
facet_wrap( ~ continent, scales="free")
#these objects can be modified. exactly like above
p + theme(legend.position = c(1, 0),
legend.justification = c(1, 0))

List of useful resources: