January 10, 2017
install.packages('devtools')
install.packages('tidyverse')
install.packages(c('readxl', 'rmarkdown'))
library(tidyverse)
foo <- c(1,2,4) foo %>% min() foo %>% mean() foo %>% max() foo %>% sd()
lm functionhelp(lm)
lm functioncat_function <- function(love=TRUE){
if(love==TRUE){
print('I love cats!')
}
else {
print('I am not a cool person.')
}
}
min <- function(vector_of_values, na.rm=TRUE)
lm functionmin <- function(vector_of_values, na.rm=TRUE)
foo <- c(1,2,NA, 4) foo %>% min() foo %>% mean() foo %>% max() foo %>% sd()
lm functionmin <- function(vector_of_values, na.rm=TRUE)
foo <- c(1,2,NA, 4) foo %>% min() ## [1] NA foo %>% mean() ## [1] NA foo %>% max() ## [1] NA foo %>% sd() ## [1] NA
lm functionmin <- function(vector_of_values, na.rm=TRUE)
foo <- c(1,2,NA, 4) min(foo, na.rm = TRUE) ## [1] 1 mean(foo, na.rm = TRUE) ## [1] 2.333333 max(foo, na.rm = TRUE) ## [1] 4 sd(foo, na.rm = TRUE) ## [1] 1.527525
c('foo', 'moo', 'boo') %>% class()
c('foo', 'moo', 'boo') %>% is.character()
c('foo', 'moo', 'boo') %>% is.factor()
c('foo', 'moo', 'boo') %>% as.factor()
c('foo', 'moo', 'boo') %>% as.factor() %>% class()
c('foo', 'moo', 'boo') %>% class()
## [1] "character"
c('foo', 'moo', 'boo') %>% is.character()
## [1] TRUE
c('foo', 'moo', 'boo') %>% is.factor()
## [1] FALSE
c('foo', 'moo', 'boo') %>% as.factor()
c('foo', 'moo', 'boo') %>% as.factor() %>% class()
c('foo', 'moo', 'boo') %>% class()
## [1] "character"
c('foo', 'moo', 'boo') %>% is.character()
## [1] TRUE
c('foo', 'moo', 'boo') %>% is.factor()
## [1] FALSE
c('foo', 'moo', 'boo') %>% as.factor()
## [1] foo moo boo
## Levels: boo foo moo
c('foo', 'moo', 'boo') %>% as.factor() %>% class()
## [1] "factor"
data_frame(
x = c(1:3)
, y = c(4:6)
, z = c('foo', 'boo', 'moo')
)
## # A tibble: 3 x 3 ## x y z ## <int> <int> <chr> ## 1 1 4 foo ## 2 2 5 boo ## 3 3 6 moo
$ between the data frame name and the variable namecars$speed cars[,1]
## [1] 4 4 7 7 8 9 10 ## [1] 4 4 7 7 8 9 10
matrix(data = 1:6, nrow = 3, ncol = 2) ## [,1] [,2] ## [1,] 1 4 ## [2,] 2 5 ## [3,] 3 6
c('foo', 'moo', 'boo')
## [1] "foo" "moo" "boo"
1:10
## [1] 1 2 3 4 5 6 7 8 9 10
rep(1:2, times = 2)
## [1] 1 2 1 2
rep(c(1,2), times = 2)
## [1] 1 2 1 2
seq(from = 0, to = 100, by = 10)
## [1] 0 10 20 30 40 50 60 70 80 90 100
seq(0, 100, 10)
## [1] 0 10 20 30 40 50 60 70 80 90 100
cars$speed
## [1] 4 4 7 7 8 9 10 10 10 11 11 12 12 12 12 13 13 13 13 14 14 14 14
## [24] 15 15 15 16 16 17 17 17 18 18 18 18 19 19 19 20 20 20 20 20 22 23 24
## [47] 24 24 24 25
c('foo', 'moo', 'boo')[2]
seq(from = 0, to = 100, by = 10)[6]
c('foo', 'moo', 'boo')[2]
## [1] "moo"
seq(from = 0, to = 100, by = 10)[6]
## [1] 50
Questions
- What type of data object is donor?
- What are the min() and mean() for amount?
- What is the max() for legislative_district?
To answer the questions, import the small donations dataset
donor <- read.csv('https://goo.gl/tm9JQ5')
Questions
- What type of data object is donor?
- What are the min() and mean() for amount?
- What is the max() for legislative_district?
donor %>% class() ## [1] "data.frame"
Questions
- What type of data object is donor?
- What are the min() and mean() for amount?
- What is the max() for legislative_district?
donor %>% class() ## [1] "data.frame" donor$amount %>% min() ## [1] 0 donor$amount %>% mean() ## [1] 255.7491
Questions
- What type of data object is donor?
- What are the min() and mean() for amount?
- What is the max() for legislative_district?
donor %>% class() ## [1] "data.frame" donor$amount %>% min() ## [1] 0 donor$amount %>% mean() ## [1] 255.7491 donor$legislative_district %>% max(na.rm = TRUE) ## [1] 49
head() shows you the top subset of a data in a data frame
head() argument and defaults at 5tail() shows the bottom subset of data in a data framesummary() shows summary statistics on all variabes in a dataset
ls() shows all variables in a data frame
ls() without calling an object between the parentheses to see all objects in your workspacestr() tells the variable type and selected variable values in a data frame for all variablesdim() tells you the dimensions of your dataset
nrow() reports the number of rows onlyncol() reports the number of columns onlyQuestions
- How many rows are in donor?
- What is the median value for amount?
- How many variables are in donor?
Hint: There are multiple ways to answers these questions with the functions you know
Questions
- How many rows are in donor?
- What is the median value for amount?
- How many variables are in donor?
donor %>% dim() ## [1] 9491 38 donor$amount %>% median(na.rm = TRUE) ## [1] 35 donor %>% ncol() ## [1] 38
Other methods to answer questions
donor %>% summary() donor %>% ls()
table() shows you the distribution of values in a vectorlength() tells you in the number of elements in a vectorunique() shows you the unique values in a vectorsummary() shows you descriptive statistics for a vector
summary() can run on a vector or data frameQuestions
- How many ‘DEMOCRAT’ values are there in party?
- Which value in contributor_employer_state is most frequent?
- Is ‘Mayoral Race’ a type value?
- How many distinct first_name values are there?
Hint: There are multiple ways to answers these questions with the functions you know
Questions
- How many ‘DEMOCRAT’ values are there in party?
- Which value in contributor_employer_state is most frequent?
- Is ‘Mayoral Race’ a type value?
- How many distinct first_name values are there?
table(donor$party) %>% c() %>% .[1] ## DEMOCRAT ## 1366 table(donor$contributor_employer_state) %>% c() %>% tail() ## PA SE TX UT VA WA ## 8 1 14 2 2 2741 donor$type %>% unique() ## [1] Candidate Political Committee ## Levels: Candidate Political Committee donor$first_name %>% unique() %>% length() ## [1] 486
Make sure…
min() on a character string, but probably not<-
%in%
Questions
- How many rows and columns are there in police?
- What is the most recent event_clearance_date?
- Which district_sector sees the most incident activity?
- How many ‘RECKLESS BURNING’ incidences are there in event_clearance_subgroup?
- What is the smallest census_tract value?
Import the small police dataset in R Studio
police <- read.csv('https://goo.gl/T42fHz')
Questions
- How many rows and columns are there in police?
- What is the most recent event_clearance_date?
- Which district_sector sees the most incident activity?
- How many ‘RECKLESS BURNING’ incidences are there in event_clearance_subgroup?
- What is the smallest census_tract value?
police %>% dim() ## [1] 10000 25
Questions
- How many rows and columns are there in police?
- What is the most recent event_clearance_date?
- Which district_sector sees the most incident activity?
- How many ‘RECKLESS BURNING’ incidences are there in event_clearance_subgroup?
- What is the smallest census_tract value?
police %>% dim() ## [1] 10000 25 police$event_clearance_date %>% as.Date() %>% max(na.rm = TRUE) ## [1] "2017-11-10"
Questions
- How many rows and columns are there in police?
- What is the most recent event_clearance_date?
- Which district_sector sees the most incident activity?
- How many ‘RECKLESS BURNING’ incidences are there in event_clearance_subgroup?
- What is the smallest census_tract value?
police %>% dim() ## [1] 10000 25 police$event_clearance_date %>% as.Date() %>% max(na.rm = TRUE) ## [1] "2017-11-10" table(police$district_sector) %>% c() %>% .[5:12] ## E F G J K L M N ## 771 471 412 522 901 539 876 622
Questions
- How many rows and columns are there in police?
- What is the most recent event_clearance_date?
- Which district_sector sees the most incident activity?
- How many ‘RECKLESS BURNING’ incidences are there in event_clearance_subgroup?
- What is the smallest census_tract value?
police %>% dim() ## [1] 10000 25 police$event_clearance_date %>% as.Date() %>% max(na.rm = TRUE) ## [1] "2017-11-10" table(police$district_sector) %>% c() %>% .[5:12] ## E F G J K L M N ## 771 471 412 522 901 539 876 622 police$event_clearance_subgroup %>% table() %>% c() %>% .[32:33] ## PUBLIC GATHERINGS RECKLESS BURNING ## 4 3
Questions
- How many rows and columns are there in police?
- What is the most recent event_clearance_date?
- Which district_sector sees the most incident activity?
- How many ‘RECKLESS BURNING’ incidences are there in event_clearance_subgroup?
- What is the smallest census_tract value?
police %>% dim() ## [1] 10000 25 police$event_clearance_date %>% as.Date() %>% max(na.rm = TRUE) ## [1] "2017-11-10" table(police$district_sector) %>% c() %>% .[5:12] ## E F G J K L M N ## 771 471 412 522 901 539 876 622 police$event_clearance_subgroup %>% table() %>% c() %>% .[32:33] ## PUBLIC GATHERINGS RECKLESS BURNING ## 4 3 police$census_tract %>% as.integer() %>% min(na.rm = TRUE) ## [1] 1