An important document for research or data analysis.
Provides data on variables coding and database generation.
Provide details of the data and where it was sourced.
Provide a reference for the research or analysis many years later.
I am taking Titanic dataset from R studio in order to create the codebook.
library(lattice)
library(MASS)
library(memisc)
##
## Attaching package: 'memisc'
## The following objects are masked from 'package:stats':
##
## contr.sum, contr.treatment, contrasts
## The following object is masked from 'package:base':
##
## as.array
x <- data.set(Titanic) # cast the data to R dataset
typeof(x) #shows the type of dataset
## [1] "list"
x
##
## Data set with 32 observations and 5 variables
##
## Titanic.Class Titanic.Sex Titanic.Age Titanic.Survived Titanic.Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
## 7 3rd Female Child No 17
## 8 Crew Female Child No 0
## 9 1st Male Adult No 118
## 10 2nd Male Adult No 154
## 11 3rd Male Adult No 387
## 12 Crew Male Adult No 670
## 13 1st Female Adult No 4
## 14 2nd Female Adult No 13
## 15 3rd Female Adult No 89
## 16 Crew Female Adult No 3
## 17 1st Male Child Yes 5
## 18 2nd Male Child Yes 11
## 19 3rd Male Child Yes 13
## 20 Crew Male Child Yes 0
## 21 1st Female Child Yes 1
## 22 2nd Female Child Yes 13
## 23 3rd Female Child Yes 14
## 24 Crew Female Child Yes 0
## 25 1st Male Adult Yes 57
## .. ............. ........... ........... ................ ............
## (25 of 32 observations shown)
codebook(x) #call the codebook function
## ================================================================================
##
## Titanic.Class
##
## --------------------------------------------------------------------------------
##
## Storage mode: integer
## Measurement: nominal
##
## Values and labels N Percent
##
## 1 '1st' 8 25.0
## 2 '2nd' 8 25.0
## 3 '3rd' 8 25.0
## 4 'Crew' 8 25.0
##
## ================================================================================
##
## Titanic.Sex
##
## --------------------------------------------------------------------------------
##
## Storage mode: integer
## Measurement: nominal
##
## Values and labels N Percent
##
## 1 'Male' 16 50.0
## 2 'Female' 16 50.0
##
## ================================================================================
##
## Titanic.Age
##
## --------------------------------------------------------------------------------
##
## Storage mode: integer
## Measurement: nominal
##
## Values and labels N Percent
##
## 1 'Child' 16 50.0
## 2 'Adult' 16 50.0
##
## ================================================================================
##
## Titanic.Survived
##
## --------------------------------------------------------------------------------
##
## Storage mode: integer
## Measurement: nominal
##
## Values and labels N Percent
##
## 1 'No' 16 50.0
## 2 'Yes' 16 50.0
##
## ================================================================================
##
## Titanic.Freq
##
## --------------------------------------------------------------------------------
##
## Storage mode: double
## Measurement: interval
##
## Min: 0.000
## Max: 670.000
## Mean: 68.781
## Std.Dev.: 133.854
x <- data.set(Titanic)
class (x)
## [1] "data.set"
## attr(,"package")
## [1] "memisc"
sapply(Titanic,class)
## [1] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## [8] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## [15] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## [22] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## [29] "numeric" "numeric" "numeric" "numeric"
sapply(Titanic, min)
## [1] 0 0 35 0 0 0 17 0 118 154 387 670 4 13 89 3 5 11 13
## [20] 0 1 13 14 0 57 14 75 192 140 80 76 20
sapply(Titanic, max)
## [1] 0 0 35 0 0 0 17 0 118 154 387 670 4 13 89 3 5 11 13
## [20] 0 1 13 14 0 57 14 75 192 140 80 76 20
sapply(Titanic, range)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] 0 0 35 0 0 0 17 0 118 154 387 670 4 13
## [2,] 0 0 35 0 0 0 17 0 118 154 387 670 4 13
## [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
## [1,] 89 3 5 11 13 0 1 13 14 0 57 14
## [2,] 89 3 5 11 13 0 1 13 14 0 57 14
## [,27] [,28] [,29] [,30] [,31] [,32]
## [1,] 75 192 140 80 76 20
## [2,] 75 192 140 80 76 20
summary(Titanic)
## Number of cases in table: 2201
## Number of factors: 4
## Test for independence of all factors:
## Chisq = 1637.4, df = 25, p-value = 0
## Chi-squared approximation may be incorrect