BE 7024 & PH 7024 Computational Statistics Fall, 2017 Homework No. 1 Umesh Singh

R Markdown Presentation for Homework 1

This is an R Markdown presentation for Homework 1

Installing package ‘LearnBayes’ and activating library ‘LearnBayes’

#loading library and data dimension
#install.packages("LearnBayes")
library("LearnBayes")

Checking fileszie (file dimension)

#1. What is the size of the data? (data dimension)
dim(studentdata)
## [1] 657  11

Loading Dataset studentdata and printing first six rows

#2. Show the top six rows of the data. data(studentdata)
head(studentdata)
##   Student Height Gender Shoes Number Dvds ToSleep WakeUp Haircut  Job
## 1       1     67 female    10      5   10    -2.5    5.5      60 30.0
## 2       2     64 female    20      7    5     1.5    8.0       0 20.0
## 3       3     61 female    12      2    6    -1.5    7.5      48  0.0
## 4       4     61 female     3      6   40     2.0    8.5      10  0.0
## 5       5     70   male     4      5    6     0.0    9.0      15 17.5
## 6       6     63 female    NA      3    5     1.0    8.5      25  0.0
##   Drink
## 1 water
## 2   pop
## 3  milk
## 4 water
## 5   pop
## 6 water

Printing specific observations (Student 4, 22, 517, 533)

#3. Pull out the complete data of Students 4, 22, 517, and 533.
sd1 <- studentdata[c(4, 22, 517, 533),]
sd1
##     Student Height Gender Shoes Number Dvds ToSleep WakeUp Haircut Job
## 4         4     61 female     3      6   40     2.0    8.5      10   0
## 22       22     61 female    20      5   10     1.5    7.5      20   5
## 517     517     71   male     5      7   15     1.5    8.0      15  20
## 533     533     70   male     3      5   10     0.0    7.0       9  12
##     Drink
## 4   water
## 22  water
## 517 water
## 533   pop

Printing studentdata documentation

#4. Using the documentation command, describe each variable in the data. 
?studentdata

Determining class of variables

#6. What is the nature of each variable? 
sapply(studentdata, class)
##   Student    Height    Gender     Shoes    Number      Dvds   ToSleep 
## "integer" "numeric"  "factor" "numeric" "integer" "numeric" "numeric" 
##    WakeUp   Haircut       Job     Drink 
## "numeric" "numeric" "numeric"  "factor"

Summary of studentdata

#7. Show the summary statistics of each variable. 
summary(studentdata)
##     Student        Height        Gender        Shoes       
##  Min.   :  1   Min.   :54.0   female:435   Min.   :  0.00  
##  1st Qu.:165   1st Qu.:64.0   male  :222   1st Qu.:  6.00  
##  Median :329   Median :66.0                Median : 12.00  
##  Mean   :329   Mean   :66.7                Mean   : 15.42  
##  3rd Qu.:493   3rd Qu.:70.0                3rd Qu.: 20.00  
##  Max.   :657   Max.   :84.0                Max.   :164.00  
##                NA's   :10                  NA's   :22      
##      Number           Dvds            ToSleep           WakeUp      
##  Min.   : 1.00   Min.   :   0.00   Min.   :-2.500   Min.   : 1.000  
##  1st Qu.: 4.00   1st Qu.:  10.00   1st Qu.: 0.000   1st Qu.: 7.500  
##  Median : 6.00   Median :  20.00   Median : 1.000   Median : 8.500  
##  Mean   : 5.67   Mean   :  30.93   Mean   : 1.001   Mean   : 8.383  
##  3rd Qu.: 7.00   3rd Qu.:  30.00   3rd Qu.: 2.000   3rd Qu.: 9.000  
##  Max.   :10.00   Max.   :1000.00   Max.   : 6.000   Max.   :13.000  
##  NA's   :2       NA's   :16        NA's   :3        NA's   :2       
##     Haircut            Job          Drink    
##  Min.   :  0.00   Min.   : 0.00   milk :113  
##  1st Qu.: 10.00   1st Qu.: 0.00   pop  :178  
##  Median : 16.00   Median :10.50   water:355  
##  Mean   : 25.91   Mean   :11.45   NA's : 11  
##  3rd Qu.: 30.00   3rd Qu.:17.50              
##  Max.   :180.00   Max.   :80.00              
##  NA's   :20       NA's   :32

Gender distribution

#8. What is the gender distribution? 
table(studentdata$Gender)
## 
## female   male 
##    435    222

Most, Second most and Least favorite number of students

#9. What is the most favorite number of the students?
table(studentdata$Number)
## 
##   1   2   3   4   5   6   7   8   9  10 
##   9  57  76  60  78  66 191  69  42   7
##Most favorite number is 7

#10.    What is the second most favorite number of the students? 
##Second most favorite number is 5

#11.    What is the least favorite number of students? 
##Least favorite number is 10

Table Shoes

#12.    Use the 'table' command on 'studentdata$Shoes' and show the output. 
table(studentdata$Shoes)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13 13.5 
##    1    2   16   34   44   51   35   22   30   13   63    5   20    8    1 
##   14   15 15.5   16   17 17.5   18   20   21   22   23   24   25   26   27 
##    5   67    1    3    4    2    7   60    4    2    4    4   30    2    2 
## 27.5   28   29   30   32   33   34   35   36   37   38   40   44   45   47 
##    2    1    1   32    2    1    1   16    1    1    1   11    1    1    1 
##   49   50   53   54   55   60   63   65   70  100  164 
##    1    8    1    1    1    1    1    1    3    1    1

Table Shoes

# 13.   What is unusual about the output in question 11? 
sd2 <- subset(studentdata, Shoes== 13.5 | Shoes ==15.5 | Shoes==17.5 | Shoes==27.5, select=c(Student, Shoes))
sd2
##     Student Shoes
## 282     282  17.5
## 296     296  13.5
## 377     377  17.5
## 397     397  27.5
## 483     483  27.5
## 618     618  15.5
##Some of the values are not integers e.g., 17.5, 15.5

Table Drink

#14.    Use the 'table' command on 'studentdata$Drink.'
table(studentdata$Drink)
## 
##  milk   pop water 
##   113   178   355

Table Gender x Drink

#15.    Cross-tabulate 'Gender' and 'Drink.'
genderdrink<-table(studentdata$Gender, studentdata$Drink)
genderdrink
##         
##          milk pop water
##   female   63 110   256
##   male     50  68    99

Table Proportion Gender x Drink

#16.    Calculate proportions row-wise and column-wise as well correct to two decimal places. 
round(prop.table(genderdrink),2)
##         
##          milk  pop water
##   female 0.10 0.17  0.40
##   male   0.08 0.11  0.15