CodeBook

An important document for research or data analysis.
Provides data on variables coding and database generation.
Provide details of the data and where it was sourced.
Provide a reference for the research or analysis many years later.

I am taking Titanic dataset from R studio in order to create the codebook.

library(lattice)
library(MASS)
library(memisc)
## 
## Attaching package: 'memisc'
## The following objects are masked from 'package:stats':
## 
##     contr.sum, contr.treatment, contrasts
## The following object is masked from 'package:base':
## 
##     as.array
x <- data.set(Titanic) # cast the data to R dataset
typeof(x)     #shows the type of dataset
## [1] "list"
x
## 
## Data set with 32 observations and 5 variables
## 
##    Titanic.Class Titanic.Sex Titanic.Age Titanic.Survived Titanic.Freq
##  1           1st        Male       Child               No            0
##  2           2nd        Male       Child               No            0
##  3           3rd        Male       Child               No           35
##  4          Crew        Male       Child               No            0
##  5           1st      Female       Child               No            0
##  6           2nd      Female       Child               No            0
##  7           3rd      Female       Child               No           17
##  8          Crew      Female       Child               No            0
##  9           1st        Male       Adult               No          118
## 10           2nd        Male       Adult               No          154
## 11           3rd        Male       Adult               No          387
## 12          Crew        Male       Adult               No          670
## 13           1st      Female       Adult               No            4
## 14           2nd      Female       Adult               No           13
## 15           3rd      Female       Adult               No           89
## 16          Crew      Female       Adult               No            3
## 17           1st        Male       Child              Yes            5
## 18           2nd        Male       Child              Yes           11
## 19           3rd        Male       Child              Yes           13
## 20          Crew        Male       Child              Yes            0
## 21           1st      Female       Child              Yes            1
## 22           2nd      Female       Child              Yes           13
## 23           3rd      Female       Child              Yes           14
## 24          Crew      Female       Child              Yes            0
## 25           1st        Male       Adult              Yes           57
## .. ............. ........... ........... ................ ............
## (25 of 32 observations shown)
codebook(x)   #call the codebook function
## ================================================================================
## 
##    Titanic.Class
## 
## --------------------------------------------------------------------------------
## 
##    Storage mode: integer
##    Measurement: nominal
## 
##    Values and labels       N Percent
##                                     
##    1 '1st'                 8    25.0
##    2 '2nd'                 8    25.0
##    3 '3rd'                 8    25.0
##    4 'Crew'                8    25.0
## 
## ================================================================================
## 
##    Titanic.Sex
## 
## --------------------------------------------------------------------------------
## 
##    Storage mode: integer
##    Measurement: nominal
## 
##    Values and labels       N Percent
##                                     
##    1 'Male'               16    50.0
##    2 'Female'             16    50.0
## 
## ================================================================================
## 
##    Titanic.Age
## 
## --------------------------------------------------------------------------------
## 
##    Storage mode: integer
##    Measurement: nominal
## 
##    Values and labels       N Percent
##                                     
##    1 'Child'              16    50.0
##    2 'Adult'              16    50.0
## 
## ================================================================================
## 
##    Titanic.Survived
## 
## --------------------------------------------------------------------------------
## 
##    Storage mode: integer
##    Measurement: nominal
## 
##    Values and labels       N Percent
##                                     
##    1 'No'                 16    50.0
##    2 'Yes'                16    50.0
## 
## ================================================================================
## 
##    Titanic.Freq
## 
## --------------------------------------------------------------------------------
## 
##    Storage mode: double
##    Measurement: interval
## 
##         Min:   0.000
##         Max: 670.000
##        Mean:  68.781
##    Std.Dev.: 133.854

To check the data type we can use class() function

x <- data.set(Titanic)
class (x)   
## [1] "data.set"
## attr(,"package")
## [1] "memisc"

To extract more details we can use sapply() fynction

sapply(Titanic,class)
##  [1] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
##  [8] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## [15] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## [22] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## [29] "numeric" "numeric" "numeric" "numeric"
sapply(Titanic, min)
##  [1]   0   0  35   0   0   0  17   0 118 154 387 670   4  13  89   3   5  11  13
## [20]   0   1  13  14   0  57  14  75 192 140  80  76  20
sapply(Titanic, max)
##  [1]   0   0  35   0   0   0  17   0 118 154 387 670   4  13  89   3   5  11  13
## [20]   0   1  13  14   0  57  14  75 192 140  80  76  20
sapply(Titanic, range)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,]    0    0   35    0    0    0   17    0  118   154   387   670     4    13
## [2,]    0    0   35    0    0    0   17    0  118   154   387   670     4    13
##      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
## [1,]    89     3     5    11    13     0     1    13    14     0    57    14
## [2,]    89     3     5    11    13     0     1    13    14     0    57    14
##      [,27] [,28] [,29] [,30] [,31] [,32]
## [1,]    75   192   140    80    76    20
## [2,]    75   192   140    80    76    20

The summary of the dataset

summary(Titanic)
## Number of cases in table: 2201 
## Number of factors: 4 
## Test for independence of all factors:
##  Chisq = 1637.4, df = 25, p-value = 0
##  Chi-squared approximation may be incorrect