# Load the haven package
library(haven)
## Warning: package 'haven' was built under R version 3.2.5
# Import sales.sas7bdat: sales
sales <- read_sas("sales.sas7bdat")
# Display the structure of sales
str(sales)
## Classes 'tbl_df', 'tbl' and 'data.frame': 431 obs. of 4 variables:
## $ purchase: num 0 0 1 1 0 0 0 0 0 0 ...
## $ age : num 41 47 41 39 32 32 33 45 43 40 ...
## $ gender : chr "Female" "Female" "Female" "Female" ...
## $ income : chr "Low" "Low" "Low" "Low" ...
# Import the data from the URL: sugar
sugar <- read_dta("http://assets.datacamp.com/course/importing_data_into_r/trade.dta")
# Structure of sugar
str(sugar)
## Classes 'tbl_df', 'tbl' and 'data.frame': 10 obs. of 5 variables:
## $ Date :Class 'labelled' atomic [1:10] 10 9 8 7 6 5 4 3 2 1
## .. ..- attr(*, "label")= chr "Date"
## .. ..- attr(*, "labels")= Named int [1:10] 1 2 3 4 5 6 7 8 9 10
## .. .. ..- attr(*, "names")= chr [1:10] "2004-12-31" "2005-12-31" "2006-12-31" "2007-12-31" ...
## $ Import : atomic 37664782 16316512 11082246 35677943 9879878 1539992 28021 2652 7067402 1033672
## ..- attr(*, "label")= chr "Import"
## $ Weight_I: atomic 54029106 21584365 14526089 55034932 14806865 1749318 54567 3821 23722957 1964980
## ..- attr(*, "label")= chr "Weight_I"
## $ Export : atomic 54505513 102700010 37935000 48515008 71486545 12311696 16489813 29273920 46497438 27131638
## ..- attr(*, "label")= chr "Export"
## $ Weight_E: atomic 93350013 158000010 88000000 112000005 131800000 18500014 39599944 102072480 147583380 78268792
## ..- attr(*, "label")= chr "Weight_E"
# Convert values in Date column to dates
sugar$Date <- as.Date(as_factor(sugar$Date))
# Structure of sugar again
str(sugar)
## Classes 'tbl_df', 'tbl' and 'data.frame': 10 obs. of 5 variables:
## $ Date : Date, format: "2013-12-31" "2012-12-31" ...
## $ Import : atomic 37664782 16316512 11082246 35677943 9879878 1539992 28021 2652 7067402 1033672
## ..- attr(*, "label")= chr "Import"
## $ Weight_I: atomic 54029106 21584365 14526089 55034932 14806865 1749318 54567 3821 23722957 1964980
## ..- attr(*, "label")= chr "Weight_I"
## $ Export : atomic 54505513 102700010 37935000 48515008 71486545 12311696 16489813 29273920 46497438 27131638
## ..- attr(*, "label")= chr "Export"
## $ Weight_E: atomic 93350013 158000010 88000000 112000005 131800000 18500014 39599944 102072480 147583380 78268792
## ..- attr(*, "label")= chr "Weight_E"
plot(sugar$Import, sugar$weight_I)
- The import figures in USD and the import figures in weight are rather positively correlated.
# Import person.sav: traits
#traits <- read_sav("personality.sav")
# Summarize traits
#summary(traits)
# Print out a subset
#subset(traits, Extroversion >40 & Agreeableness > 40)
# Import SPSS data from the URL: work
work <- read_sav("http://assets.datacamp.com/course/importing_data_into_r/employee.sav")
# Display summary of work$GENDER
summary(work$GENDER)
## Length Class Mode
## 474 labelled character
# Convert work$GENDER to a factor
work$GENDER <- as_factor(work$GENDER)
# Display summary of work$GENDER again
summary(work$GENDER)
## Female Male
## 216 258
# Load the foreign package
library(foreign)
## Warning: package 'foreign' was built under R version 3.2.3
# Import florida.dta and name the resulting data frame florida
florida <- read.dta("florida.dta")
# Check tail() of florida
tail(florida)
## gore bush buchanan nader total
## 62 2647 4051 27 59 6784
## 63 1399 2326 26 29 3780
## 64 97063 82214 396 2436 182109
## 65 3835 4511 46 149 8541
## 66 5637 12176 120 265 18198
## 67 2796 4983 88 93 7960
# Specify the file path using file.path(): path
path <- file.path("worldbank", "edequality.dta")
# Create and print structure of edu_equal_1
edu_equal_1 <- read.dta(path)
str(edu_equal_1)
## 'data.frame': 12214 obs. of 27 variables:
## $ hhid : num 1 1 1 2 2 3 4 4 5 6 ...
## $ hhweight : num 627 627 627 627 627 ...
## $ location : Factor w/ 2 levels "urban location",..: 1 1 1 1 1 2 2 2 1 1 ...
## $ region : Factor w/ 9 levels "Sofia city","Bourgass",..: 8 8 8 9 9 4 4 4 8 8 ...
## $ ethnicity_head : Factor w/ 4 levels "Bulgaria","Turks",..: 2 2 2 1 1 1 1 1 1 1 ...
## $ age : num 37 11 8 73 70 75 79 80 82 83 ...
## $ gender : Factor w/ 2 levels "male","female": 2 2 1 1 2 1 1 2 2 2 ...
## $ relation : Factor w/ 9 levels "head ",..: 1 3 3 1 2 1 1 2 1 1 ...
## $ literate : Factor w/ 2 levels "no","yes": 1 2 2 2 2 2 2 2 2 2 ...
## $ income_mnt : num 13.3 13.3 13.3 142.5 142.5 ...
## $ income : num 160 160 160 1710 1710 ...
## $ aggregate : num 1042 1042 1042 3271 3271 ...
## $ aggr_ind_annual : num 347 347 347 1635 1635 ...
## $ educ_completed : int 2 4 4 4 3 3 3 3 4 4 ...
## $ grade_complete : num 4 3 0 3 4 4 4 4 5 5 ...
## $ grade_all : num 4 11 8 11 8 8 8 8 13 13 ...
## $ unemployed : int 2 1 1 1 1 1 1 1 1 1 ...
## $ reason_OLF : int NA NA NA 3 3 3 9 9 3 3 ...
## $ sector : int NA NA NA NA NA NA 1 1 NA NA ...
## $ occupation : int NA NA NA NA NA NA 5 5 NA NA ...
## $ earn_mont : num 0 0 0 0 0 0 20 20 0 0 ...
## $ earn_ann : num 0 0 0 0 0 0 240 240 0 0 ...
## $ hours_week : num NA NA NA NA NA NA 30 35 NA NA ...
## $ hours_mnt : num NA NA NA NA NA ...
## $ fulltime : int NA NA NA NA NA NA 1 1 NA NA ...
## $ hhexp : num 100 100 100 343 343 ...
## $ legacy_pension_amt: num NA NA NA NA NA NA NA NA NA NA ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr ""
## - attr(*, "formats")= chr "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
## - attr(*, "types")= int 100 100 108 108 108 100 108 108 108 100 ...
## - attr(*, "val.labels")= chr "" "" "location" "region" ...
## - attr(*, "var.labels")= chr "hhid" "hhweight" "location" "region" ...
## - attr(*, "expansion.fields")=List of 12
## ..$ : chr "_dta" "_svy_su1" "cluster"
## ..$ : chr "_dta" "_svy_strata1" "strata"
## ..$ : chr "_dta" "_svy_stages" "1"
## ..$ : chr "_dta" "_svy_version" "2"
## ..$ : chr "_dta" "__XijVarLabcons" "(sum) cons"
## ..$ : chr "_dta" "ReS_Xij" "cons"
## ..$ : chr "_dta" "ReS_str" "0"
## ..$ : chr "_dta" "ReS_j" "group"
## ..$ : chr "_dta" "ReS_ver" "v.2"
## ..$ : chr "_dta" "ReS_i" "hhid dur"
## ..$ : chr "_dta" "note1" "variables g1pc, g2pc, g3pc, g4pc, g5pc, g7pc, g8pc, g9pc, g10pc, g11pc, g12pc, gall, health, rent, durables were adjusted by r"| __truncated__
## ..$ : chr "_dta" "note0" "1"
## - attr(*, "version")= int 7
## - attr(*, "label.table")=List of 12
## ..$ location: Named int 1 2
## .. ..- attr(*, "names")= chr "urban location" "rural location"
## ..$ region : Named int 1 2 3 4 5 6 7 8 9
## .. ..- attr(*, "names")= chr "Sofia city" "Bourgass" "Varna" "Lovetch" ...
## ..$ ethnic : Named int 1 2 3 4
## .. ..- attr(*, "names")= chr "Bulgaria" "Turks" "Roma" "Other"
## ..$ s2_q2 : Named int 1 2
## .. ..- attr(*, "names")= chr "male" "female"
## ..$ s2_q3 : Named int 1 2 3 4 5 6 7 8 9
## .. ..- attr(*, "names")= chr "head " "spouse/partner " "child " "son/daughter-in-law " ...
## ..$ lit : Named int 1 2
## .. ..- attr(*, "names")= chr "no" "yes"
## ..$ : Named int 1 2 3 4
## .. ..- attr(*, "names")= chr "never attanded" "primary" "secondary" "postsecondary"
## ..$ : Named int 1 2
## .. ..- attr(*, "names")= chr "Not unemployed" "Unemployed"
## ..$ : Named int 1 2 3 4 5 6 7 8 9 10
## .. ..- attr(*, "names")= chr "student" "housewife/childcare" "in retirement" "illness, disability" ...
## ..$ : Named int 1 2 3 4 5 6 7 8 9 10
## .. ..- attr(*, "names")= chr "agriculture" "mining" "manufacturing" "utilities" ...
## ..$ : Named int 1 2 3 4 5
## .. ..- attr(*, "names")= chr "private company" "public works program" "government,public sector, army" "private individual" ...
## ..$ : Named int 1 2
## .. ..- attr(*, "names")= chr "no" "yes"
# Create and print structure of edu_equal_2
edu_equal_2 <- read.dta(path, convert.factors = FALSE)
str(edu_equal_2)
## 'data.frame': 12214 obs. of 27 variables:
## $ hhid : num 1 1 1 2 2 3 4 4 5 6 ...
## $ hhweight : num 627 627 627 627 627 ...
## $ location : int 1 1 1 1 1 2 2 2 1 1 ...
## $ region : int 8 8 8 9 9 4 4 4 8 8 ...
## $ ethnicity_head : int 2 2 2 1 1 1 1 1 1 1 ...
## $ age : num 37 11 8 73 70 75 79 80 82 83 ...
## $ gender : int 2 2 1 1 2 1 1 2 2 2 ...
## $ relation : int 1 3 3 1 2 1 1 2 1 1 ...
## $ literate : int 1 2 2 2 2 2 2 2 2 2 ...
## $ income_mnt : num 13.3 13.3 13.3 142.5 142.5 ...
## $ income : num 160 160 160 1710 1710 ...
## $ aggregate : num 1042 1042 1042 3271 3271 ...
## $ aggr_ind_annual : num 347 347 347 1635 1635 ...
## $ educ_completed : int 2 4 4 4 3 3 3 3 4 4 ...
## $ grade_complete : num 4 3 0 3 4 4 4 4 5 5 ...
## $ grade_all : num 4 11 8 11 8 8 8 8 13 13 ...
## $ unemployed : int 2 1 1 1 1 1 1 1 1 1 ...
## $ reason_OLF : int NA NA NA 3 3 3 9 9 3 3 ...
## $ sector : int NA NA NA NA NA NA 1 1 NA NA ...
## $ occupation : int NA NA NA NA NA NA 5 5 NA NA ...
## $ earn_mont : num 0 0 0 0 0 0 20 20 0 0 ...
## $ earn_ann : num 0 0 0 0 0 0 240 240 0 0 ...
## $ hours_week : num NA NA NA NA NA NA 30 35 NA NA ...
## $ hours_mnt : num NA NA NA NA NA ...
## $ fulltime : int NA NA NA NA NA NA 1 1 NA NA ...
## $ hhexp : num 100 100 100 343 343 ...
## $ legacy_pension_amt: num NA NA NA NA NA NA NA NA NA NA ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr ""
## - attr(*, "formats")= chr "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
## - attr(*, "types")= int 100 100 108 108 108 100 108 108 108 100 ...
## - attr(*, "val.labels")= chr "" "" "location" "region" ...
## - attr(*, "var.labels")= chr "hhid" "hhweight" "location" "region" ...
## - attr(*, "expansion.fields")=List of 12
## ..$ : chr "_dta" "_svy_su1" "cluster"
## ..$ : chr "_dta" "_svy_strata1" "strata"
## ..$ : chr "_dta" "_svy_stages" "1"
## ..$ : chr "_dta" "_svy_version" "2"
## ..$ : chr "_dta" "__XijVarLabcons" "(sum) cons"
## ..$ : chr "_dta" "ReS_Xij" "cons"
## ..$ : chr "_dta" "ReS_str" "0"
## ..$ : chr "_dta" "ReS_j" "group"
## ..$ : chr "_dta" "ReS_ver" "v.2"
## ..$ : chr "_dta" "ReS_i" "hhid dur"
## ..$ : chr "_dta" "note1" "variables g1pc, g2pc, g3pc, g4pc, g5pc, g7pc, g8pc, g9pc, g10pc, g11pc, g12pc, gall, health, rent, durables were adjusted by r"| __truncated__
## ..$ : chr "_dta" "note0" "1"
## - attr(*, "version")= int 7
## - attr(*, "label.table")=List of 12
## ..$ location: Named int 1 2
## .. ..- attr(*, "names")= chr "urban location" "rural location"
## ..$ region : Named int 1 2 3 4 5 6 7 8 9
## .. ..- attr(*, "names")= chr "Sofia city" "Bourgass" "Varna" "Lovetch" ...
## ..$ ethnic : Named int 1 2 3 4
## .. ..- attr(*, "names")= chr "Bulgaria" "Turks" "Roma" "Other"
## ..$ s2_q2 : Named int 1 2
## .. ..- attr(*, "names")= chr "male" "female"
## ..$ s2_q3 : Named int 1 2 3 4 5 6 7 8 9
## .. ..- attr(*, "names")= chr "head " "spouse/partner " "child " "son/daughter-in-law " ...
## ..$ lit : Named int 1 2
## .. ..- attr(*, "names")= chr "no" "yes"
## ..$ : Named int 1 2 3 4
## .. ..- attr(*, "names")= chr "never attanded" "primary" "secondary" "postsecondary"
## ..$ : Named int 1 2
## .. ..- attr(*, "names")= chr "Not unemployed" "Unemployed"
## ..$ : Named int 1 2 3 4 5 6 7 8 9 10
## .. ..- attr(*, "names")= chr "student" "housewife/childcare" "in retirement" "illness, disability" ...
## ..$ : Named int 1 2 3 4 5 6 7 8 9 10
## .. ..- attr(*, "names")= chr "agriculture" "mining" "manufacturing" "utilities" ...
## ..$ : Named int 1 2 3 4 5
## .. ..- attr(*, "names")= chr "private company" "public works program" "government,public sector, army" "private individual" ...
## ..$ : Named int 1 2
## .. ..- attr(*, "names")= chr "no" "yes"
# Create and print structure of edu_equal_3
edu_equal_3 <- read.dta(path, convert.underscore = TRUE)
str(edu_equal_3)
## 'data.frame': 12214 obs. of 27 variables:
## $ hhid : num 1 1 1 2 2 3 4 4 5 6 ...
## $ hhweight : num 627 627 627 627 627 ...
## $ location : Factor w/ 2 levels "urban location",..: 1 1 1 1 1 2 2 2 1 1 ...
## $ region : Factor w/ 9 levels "Sofia city","Bourgass",..: 8 8 8 9 9 4 4 4 8 8 ...
## $ ethnicity.head : Factor w/ 4 levels "Bulgaria","Turks",..: 2 2 2 1 1 1 1 1 1 1 ...
## $ age : num 37 11 8 73 70 75 79 80 82 83 ...
## $ gender : Factor w/ 2 levels "male","female": 2 2 1 1 2 1 1 2 2 2 ...
## $ relation : Factor w/ 9 levels "head ",..: 1 3 3 1 2 1 1 2 1 1 ...
## $ literate : Factor w/ 2 levels "no","yes": 1 2 2 2 2 2 2 2 2 2 ...
## $ income.mnt : num 13.3 13.3 13.3 142.5 142.5 ...
## $ income : num 160 160 160 1710 1710 ...
## $ aggregate : num 1042 1042 1042 3271 3271 ...
## $ aggr.ind.annual : num 347 347 347 1635 1635 ...
## $ educ.completed : int 2 4 4 4 3 3 3 3 4 4 ...
## $ grade.complete : num 4 3 0 3 4 4 4 4 5 5 ...
## $ grade.all : num 4 11 8 11 8 8 8 8 13 13 ...
## $ unemployed : int 2 1 1 1 1 1 1 1 1 1 ...
## $ reason.OLF : int NA NA NA 3 3 3 9 9 3 3 ...
## $ sector : int NA NA NA NA NA NA 1 1 NA NA ...
## $ occupation : int NA NA NA NA NA NA 5 5 NA NA ...
## $ earn.mont : num 0 0 0 0 0 0 20 20 0 0 ...
## $ earn.ann : num 0 0 0 0 0 0 240 240 0 0 ...
## $ hours.week : num NA NA NA NA NA NA 30 35 NA NA ...
## $ hours.mnt : num NA NA NA NA NA ...
## $ fulltime : int NA NA NA NA NA NA 1 1 NA NA ...
## $ hhexp : num 100 100 100 343 343 ...
## $ legacy.pension.amt: num NA NA NA NA NA NA NA NA NA NA ...
## - attr(*, "datalabel")= chr ""
## - attr(*, "time.stamp")= chr ""
## - attr(*, "formats")= chr "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
## - attr(*, "types")= int 100 100 108 108 108 100 108 108 108 100 ...
## - attr(*, "val.labels")= chr "" "" "location" "region" ...
## - attr(*, "var.labels")= chr "hhid" "hhweight" "location" "region" ...
## - attr(*, "expansion.fields")=List of 12
## ..$ : chr "_dta" "_svy_su1" "cluster"
## ..$ : chr "_dta" "_svy_strata1" "strata"
## ..$ : chr "_dta" "_svy_stages" "1"
## ..$ : chr "_dta" "_svy_version" "2"
## ..$ : chr "_dta" "__XijVarLabcons" "(sum) cons"
## ..$ : chr "_dta" "ReS_Xij" "cons"
## ..$ : chr "_dta" "ReS_str" "0"
## ..$ : chr "_dta" "ReS_j" "group"
## ..$ : chr "_dta" "ReS_ver" "v.2"
## ..$ : chr "_dta" "ReS_i" "hhid dur"
## ..$ : chr "_dta" "note1" "variables g1pc, g2pc, g3pc, g4pc, g5pc, g7pc, g8pc, g9pc, g10pc, g11pc, g12pc, gall, health, rent, durables were adjusted by r"| __truncated__
## ..$ : chr "_dta" "note0" "1"
## - attr(*, "version")= int 7
## - attr(*, "label.table")=List of 12
## ..$ location: Named int 1 2
## .. ..- attr(*, "names")= chr "urban location" "rural location"
## ..$ region : Named int 1 2 3 4 5 6 7 8 9
## .. ..- attr(*, "names")= chr "Sofia city" "Bourgass" "Varna" "Lovetch" ...
## ..$ ethnic : Named int 1 2 3 4
## .. ..- attr(*, "names")= chr "Bulgaria" "Turks" "Roma" "Other"
## ..$ s2_q2 : Named int 1 2
## .. ..- attr(*, "names")= chr "male" "female"
## ..$ s2_q3 : Named int 1 2 3 4 5 6 7 8 9
## .. ..- attr(*, "names")= chr "head " "spouse/partner " "child " "son/daughter-in-law " ...
## ..$ lit : Named int 1 2
## .. ..- attr(*, "names")= chr "no" "yes"
## ..$ : Named int 1 2 3 4
## .. ..- attr(*, "names")= chr "never attanded" "primary" "secondary" "postsecondary"
## ..$ : Named int 1 2
## .. ..- attr(*, "names")= chr "Not unemployed" "Unemployed"
## ..$ : Named int 1 2 3 4 5 6 7 8 9 10
## .. ..- attr(*, "names")= chr "student" "housewife/childcare" "in retirement" "illness, disability" ...
## ..$ : Named int 1 2 3 4 5 6 7 8 9 10
## .. ..- attr(*, "names")= chr "agriculture" "mining" "manufacturing" "utilities" ...
## ..$ : Named int 1 2 3 4 5
## .. ..- attr(*, "names")= chr "private company" "public works program" "government,public sector, army" "private individual" ...
## ..$ : Named int 1 2
## .. ..- attr(*, "names")= chr "no" "yes"
# Import international.sav as a data frame: demo
demo <- read.spss("international.sav", to.data.frame=TRUE)
# Create boxplot of gdp variable of demo
boxplot(demo$gdp)
cor(demo$gdp, demo$f_illit)
## [1] -0.4476856
# Import international.sav as demo_1
demo_1 <- read.spss("international.sav", to.data.frame=TRUE)
# Print out the head of demo_1
head(demo_1, n=2)
## id country contint m_illit f_illit lifeexpt gdp
## 1 1 Argentina Americas 3.0 3.0 16 3375
## 2 2 Benin Africa 45.2 74.5 7 521
# Import international.sav as demo_2
demo_2 <- read.spss("international.sav", to.data.frame=TRUE, use.value.labels=FALSE)
# Print out the head of demo_2
head(demo_2, n=2)
## id country contint m_illit f_illit lifeexpt gdp
## 1 1 Argentina 2 3.0 3.0 16 3375
## 2 2 Benin 1 45.2 74.5 7 521