Assignment #4
Woongbae Park
November 19, 2015
require(corrplot)
## Loading required package: corrplot
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(Ecdat)
## Loading required package: Ecdat
## Loading required package: Ecfun
## 
## Attaching package: 'Ecdat'
## 
## The following object is masked from 'package:datasets':
## 
##     Orange
require(ggvis)
## Loading required package: ggvis
data(Mroz)
names(Mroz)
##  [1] "work"       "hoursw"     "child6"     "child618"   "agew"      
##  [6] "educw"      "hearnw"     "wagew"      "hoursh"     "ageh"      
## [11] "educh"      "wageh"      "income"     "educwm"     "educwf"    
## [16] "unemprate"  "city"       "experience"
summary(Mroz)
##   work         hoursw           child6          child618    
##  yes:325   Min.   :   0.0   Min.   :0.0000   Min.   :0.000  
##  no :428   1st Qu.:   0.0   1st Qu.:0.0000   1st Qu.:0.000  
##            Median : 288.0   Median :0.0000   Median :1.000  
##            Mean   : 740.6   Mean   :0.2377   Mean   :1.353  
##            3rd Qu.:1516.0   3rd Qu.:0.0000   3rd Qu.:2.000  
##            Max.   :4950.0   Max.   :3.0000   Max.   :8.000  
##       agew           educw           hearnw           wagew     
##  Min.   :30.00   Min.   : 5.00   Min.   : 0.000   Min.   :0.00  
##  1st Qu.:36.00   1st Qu.:12.00   1st Qu.: 0.000   1st Qu.:0.00  
##  Median :43.00   Median :12.00   Median : 1.625   Median :0.00  
##  Mean   :42.54   Mean   :12.29   Mean   : 2.375   Mean   :1.85  
##  3rd Qu.:49.00   3rd Qu.:13.00   3rd Qu.: 3.788   3rd Qu.:3.58  
##  Max.   :60.00   Max.   :17.00   Max.   :25.000   Max.   :9.98  
##      hoursh          ageh           educh           wageh        
##  Min.   : 175   Min.   :30.00   Min.   : 3.00   Min.   : 0.4121  
##  1st Qu.:1928   1st Qu.:38.00   1st Qu.:11.00   1st Qu.: 4.7883  
##  Median :2164   Median :46.00   Median :12.00   Median : 6.9758  
##  Mean   :2267   Mean   :45.12   Mean   :12.49   Mean   : 7.4822  
##  3rd Qu.:2553   3rd Qu.:52.00   3rd Qu.:15.00   3rd Qu.: 9.1667  
##  Max.   :5010   Max.   :60.00   Max.   :17.00   Max.   :40.5090  
##      income          educwm           educwf         unemprate     
##  Min.   : 1500   Min.   : 0.000   Min.   : 0.000   Min.   : 3.000  
##  1st Qu.:15428   1st Qu.: 7.000   1st Qu.: 7.000   1st Qu.: 7.500  
##  Median :20880   Median :10.000   Median : 7.000   Median : 7.500  
##  Mean   :23081   Mean   : 9.251   Mean   : 8.809   Mean   : 8.624  
##  3rd Qu.:28200   3rd Qu.:12.000   3rd Qu.:12.000   3rd Qu.:11.000  
##  Max.   :96000   Max.   :17.000   Max.   :17.000   Max.   :14.000  
##   city       experience   
##  no :269   Min.   : 0.00  
##  yes:484   1st Qu.: 4.00  
##            Median : 9.00  
##            Mean   :10.63  
##            3rd Qu.:15.00  
##            Max.   :45.00
1. Select several continuous variables from Mroz.
Mroz1 <- Mroz %>%
  select(wagew, income, agew, ageh)
head(Mroz1)
##   wagew income agew ageh
## 1  2.65  16310   32   34
## 2  2.65  21800   30   30
## 3  4.04  21040   35   40
## 4  3.25   7300   34   53
## 5  3.60  27300   31   32
## 6  4.70  19495   54   57
2. Estimate Pearson Product-Moment Correlations for four pairs of variables.
rquery.cormat(Mroz1)

## $r
##          agew   ageh wagew income
## agew        1                    
## ageh     0.89      1             
## wagew  -0.058 -0.055     1       
## income  0.052  0.041  0.21      1
## 
## $p
##        agew ageh   wagew income
## agew      0                    
## ageh      0    0               
## wagew  0.11 0.13       0       
## income 0.15 0.27 7.4e-09      0
## 
## $sym
##        agew ageh wagew income
## agew   1                     
## ageh   +    1                
## wagew            1           
## income                 1     
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
3. Test null hypotheses that the population correlations = 0 for the four pairs of variables you selected.
cor.test(Mroz1$income, Mroz1$wagew)
## 
##  Pearson's product-moment correlation
## 
## data:  Mroz1$income and Mroz1$wagew
## t = 5.8484, df = 751, p-value = 7.404e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1393437 0.2760413
## sample estimates:
##       cor 
## 0.2087117
##### The correlation cofficient is 0.21. There is weak (small) significant positive linear correlation between income and wagew. We reject the null hypothesis.
cor.test(Mroz1$income, Mroz1$ageh)
## 
##  Pearson's product-moment correlation
## 
## data:  Mroz1$income and Mroz1$ageh
## t = 1.1109, df = 751, p-value = 0.267
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03103299  0.11162545
## sample estimates:
##        cor 
## 0.04050263
The correlation cofficient is 0.04. There is no significant linear correlation between income and ageh. We fail to reject the null hypothesis.
cor.test(Mroz1$income, Mroz1$agew)
## 
##  Pearson's product-moment correlation
## 
## data:  Mroz1$income and Mroz1$agew
## t = 1.4391, df = 751, p-value = 0.1505
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.01907725  0.12342346
## sample estimates:
##        cor 
## 0.05244006
The correlation cofficient is 0.05. There is no significant linear correlation between income and agew. We fail to reject the null hypothesis.
cor.test(Mroz1$ageh, Mroz1$agew)
## 
##  Pearson's product-moment correlation
## 
## data:  Mroz1$ageh and Mroz1$agew
## t = 52.959, df = 751, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8720254 0.9023277
## sample estimates:
##      cor 
## 0.888138
The correlation cofficient is 0.87. There is strong (large) significant positive linear correlation between ageh and agew. We reject the null hypothesis.
4. Using ggvis, plot scatterplots containing points and a smooth line for the four pairs of variable you selected.
Mroz1 %>% ggvis(x=~income, y=~wagew) %>% layer_points %>% layer_smooths()

Mroz1 %>% ggvis(x=~income, y=~agew) %>% layer_points %>% layer_smooths()

Mroz1 %>% ggvis(x=~income, y=~ageh) %>% layer_points %>% layer_smooths()

Mroz1 %>% ggvis(x=~agew, y=~ageh) %>% layer_points %>% layer_smooths()

5. Produce correlograms and heat maps for the four pairs of variables you selected.
require(corrgram)
## Loading required package: corrgram
corrgram(Mroz1, upper.panel=NULL)

cormat<-rquery.cormat(Mroz1, graphType="heatmap")