This work is based on [Generate a simple codebook in R, https://www.adrianbruegger.com/post/simple-codebook/ ] accessed 21/11/2021.

First import the important haven library and a dataset

library(haven) # package to read files from popular statistical software packages such as SPSS, SAS, Stata
data <- read_sav("https://mmi.psycho.unibas.ch/r-toolbox/data/Cars.sav") # import data

Presenting the imported data (Data about cars)

data
## # A tibble: 406 × 9
##      MPG ENGINE HORSE WEIGHT ACCEL      YEAR       ORIGIN   CYLINDER  `FILTER_$`
##    <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl+lbl>    <dbl+lbl>  <dbl+lbl>   <dbl+lbl>
##  1    18    307   130   3504  12     70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
##  2    15    350   165   3693  11.5   70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
##  3    18    318   150   3436  11     70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
##  4    16    304   150   3433  12     70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
##  5    17    302   140   3449  10.5   70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
##  6    15    429   198   4341  10     70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
##  7    14    454   220   4354   9     70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
##  8    14    440   215   4312   8.5   70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
##  9    14    455   225   4425  10     70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## 10    15    390   190   3850   8.5   70 [70] 1 [American] 8 [8 Cyli… 0 [Not Sel…
## # … with 396 more rows

Showing the labels of variables using get_label and get_labels

library(sjlabelled) # package to read and write item labels and values
## 
## Attaching package: 'sjlabelled'
## The following objects are masked from 'package:haven':
## 
##     as_factor, read_sas, read_spss, read_stata, write_sas, zap_labels
get_label(data) # show content of variables (what the variable measures)
##                                         MPG 
##                          "Miles per Gallon" 
##                                      ENGINE 
##          "Engine Displacement (cu. inches)" 
##                                       HORSE 
##                                "Horsepower" 
##                                      WEIGHT 
##                     "Vehicle Weight (lbs.)" 
##                                       ACCEL 
## "Time to Accelerate from 0 to 60 mph (sec)" 
##                                        YEAR 
##                   "Model Year (modulo 100)" 
##                                      ORIGIN 
##                         "Country of Origin" 
##                                    CYLINDER 
##                       "Number of Cylinders" 
##                                    FILTER_$ 
##          "cylrec = 1 | cylrec = 2 (FILTER)"
# which returns for example:
# MPG               
# "Miles per Gallon"

get_labels(data) # show value labels (what the different answer options mean)
## $MPG
## NULL
## 
## $ENGINE
## NULL
## 
## $HORSE
## NULL
## 
## $WEIGHT
## NULL
## 
## $ACCEL
## NULL
## 
## $YEAR
##  [1] "0 (Missing)" "70"          "71"          "72"          "73"         
##  [6] "74"          "75"          "76"          "77"          "78"         
## [11] "79"          "80"          "81"          "82"         
## 
## $ORIGIN
## [1] "American" "European" "Japanese"
## 
## $CYLINDER
## [1] "3 Cylinders" "4 Cylinders" "5 Cylinders" "6 Cylinders" "8 Cylinders"
## 
## $`FILTER_$`
## [1] "Not Selected" "Selected"
# note: the value labels are not used for this very simple codebook.

# which returns for example:
# $CYLINDER
# [1] "3 Cylinders" "4 Cylinders" "5 Cylinders" "6 Cylinders" "8 Cylinders"

Create a simple codebook based on the labels of the dataset

# extract labels from dataframe and store as new object
library(tibble)
simple_codebook <- enframe(get_label(data))

# use more informative column names
colnames(simple_codebook) <- c("variable_id", "item_text")

# Show the new data frame
simple_codebook
## # A tibble: 9 × 2
##   variable_id item_text                                
##   <chr>       <chr>                                    
## 1 MPG         Miles per Gallon                         
## 2 ENGINE      Engine Displacement (cu. inches)         
## 3 HORSE       Horsepower                               
## 4 WEIGHT      Vehicle Weight (lbs.)                    
## 5 ACCEL       Time to Accelerate from 0 to 60 mph (sec)
## 6 YEAR        Model Year (modulo 100)                  
## 7 ORIGIN      Country of Origin                        
## 8 CYLINDER    Number of Cylinders                      
## 9 FILTER_$    cylrec = 1 | cylrec = 2 (FILTER)

Generating decriptive statistics.

# get descriptive statistics and select those of interest
library(psych)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:sjlabelled':
## 
##     as_label
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
descriptives <- data %>% describe() %>% as_tibble() %>% select("n","min","max","mean")
# add stats to codebook 
simple_codebook <- cbind(simple_codebook,descriptives)

Saving the codebook in csv and xlsx file format.

# write to csv and Excel
write.csv(simple_codebook,file="simple_codebook.csv", na="", row.names=FALSE) 

library(openxlsx)
write.xlsx(simple_codebook,file="simple_codebook.xlsx")