First import the important haven library and a dataset
library(haven) # package to read files from popular statistical software packages such as SPSS, SAS, Stata
data <- read_sav("https://mmi.psycho.unibas.ch/r-toolbox/data/Cars.sav") # import data
Presenting the imported data (Data about cars)
data
## # A tibble: 406 × 9
## MPG ENGINE HORSE WEIGHT ACCEL YEAR ORIGIN CYLINDER `FILTER_$`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl+lbl> <dbl+lbl> <dbl+lbl> <dbl+lbl>
## 1 18 307 130 3504 12 70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## 2 15 350 165 3693 11.5 70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## 3 18 318 150 3436 11 70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## 4 16 304 150 3433 12 70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## 5 17 302 140 3449 10.5 70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## 6 15 429 198 4341 10 70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## 7 14 454 220 4354 9 70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## 8 14 440 215 4312 8.5 70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## 9 14 455 225 4425 10 70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## 10 15 390 190 3850 8.5 70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## # … with 396 more rows
Showing the labels of variables using get_label and get_labels
library(sjlabelled) # package to read and write item labels and values
##
## Attaching package: 'sjlabelled'
## The following objects are masked from 'package:haven':
##
## as_factor, read_sas, read_spss, read_stata, write_sas, zap_labels
get_label(data) # show content of variables (what the variable measures)
## MPG
## "Miles per Gallon"
## ENGINE
## "Engine Displacement (cu. inches)"
## HORSE
## "Horsepower"
## WEIGHT
## "Vehicle Weight (lbs.)"
## ACCEL
## "Time to Accelerate from 0 to 60 mph (sec)"
## YEAR
## "Model Year (modulo 100)"
## ORIGIN
## "Country of Origin"
## CYLINDER
## "Number of Cylinders"
## FILTER_$
## "cylrec = 1 | cylrec = 2 (FILTER)"
# which returns for example:
# MPG
# "Miles per Gallon"
get_labels(data) # show value labels (what the different answer options mean)
## $MPG
## NULL
##
## $ENGINE
## NULL
##
## $HORSE
## NULL
##
## $WEIGHT
## NULL
##
## $ACCEL
## NULL
##
## $YEAR
## [1] "0 (Missing)" "70" "71" "72" "73"
## [6] "74" "75" "76" "77" "78"
## [11] "79" "80" "81" "82"
##
## $ORIGIN
## [1] "American" "European" "Japanese"
##
## $CYLINDER
## [1] "3 Cylinders" "4 Cylinders" "5 Cylinders" "6 Cylinders" "8 Cylinders"
##
## $`FILTER_$`
## [1] "Not Selected" "Selected"
# note: the value labels are not used for this very simple codebook.
# which returns for example:
# $CYLINDER
# [1] "3 Cylinders" "4 Cylinders" "5 Cylinders" "6 Cylinders" "8 Cylinders"
Create a simple codebook based on the labels of the dataset
# extract labels from dataframe and store as new object
library(tibble)
simple_codebook <- enframe(get_label(data))
# use more informative column names
colnames(simple_codebook) <- c("variable_id", "item_text")
# Show the new data frame
simple_codebook
## # A tibble: 9 × 2
## variable_id item_text
## <chr> <chr>
## 1 MPG Miles per Gallon
## 2 ENGINE Engine Displacement (cu. inches)
## 3 HORSE Horsepower
## 4 WEIGHT Vehicle Weight (lbs.)
## 5 ACCEL Time to Accelerate from 0 to 60 mph (sec)
## 6 YEAR Model Year (modulo 100)
## 7 ORIGIN Country of Origin
## 8 CYLINDER Number of Cylinders
## 9 FILTER_$ cylrec = 1 | cylrec = 2 (FILTER)
Generating decriptive statistics.
# get descriptive statistics and select those of interest
library(psych)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:sjlabelled':
##
## as_label
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
descriptives <- data %>% describe() %>% as_tibble() %>% select("n","min","max","mean")
# add stats to codebook
simple_codebook <- cbind(simple_codebook,descriptives)