In-Class Exercise 1: Chapter 4 of Lattice

Load data file

VADeaths

##       Rural Male Rural Female Urban Male Urban Female
## 50-54       11.7          8.7       15.4          8.4
## 55-59       18.1         11.7       24.3         13.6
## 60-64       26.9         20.3       37.0         19.3
## 65-69       41.0         30.9       54.6         35.1
## 70-74       66.0         54.3       71.1         50.0

## Show data type
class(VADeaths)

## [1] "matrix"

Show the methods of dotplot in lattice package

library(lattice)
methods("dotplot")

## [1] dotplot.array*   dotplot.default* dotplot.formula* dotplot.matrix* 
## [5] dotplot.numeric* dotplot.table*  
## see '?methods' for accessing help and source code

Plot the dotplot of death rates for the different age by people

dotplot(VADeaths, groups=FALSE)

Plot the dotplot of death rates, show one column and four rows

dotplot(VADeaths, groups=FALSE, 
        layout=c(1, 4), 
        aspect=0.7, 
        origin=0, 
        type=c("p", "h"),
        main="Death Rates in Virginia - 1940", 
        xlab="Rate (per 1000)")

Plot the scatter plot of death rates for the different age by people in one figure.

dotplot(VADeaths, type="o",
        auto.key=list(lines=TRUE, space="right"),
        main="Death Rates in Virginia - 1940",
        xlab="Rate (per 1000)")

Plot the barplot of death rates for the different age by people

barchart(VADeaths, groups=FALSE,
         layout=c(1, 4), 
         aspect=0.7, 
         reference=FALSE, 
         main="Death Rates in Virginia - 1940",
         xlab="Rate (per 100)")

Load data file

data(postdoc, package="latticeExtra")

## Plot barplot of the proportion of the field by training and occupations
barchart(prop.table(postdoc, margin=1), 
         xlab="Proportion",
         auto.key=list(adj=1))

Plot scatter plot of the proportion of the field by training and occupations

dotplot(prop.table(postdoc, margin=1), 
        groups=FALSE, 
        xlab="Proportion",
        par.strip.text=list(abbreviate=TRUE, minlength=10))

## Plot scatter plot of the proportion of the field by training and occupations, Show in different panels.

dotplot(prop.table(postdoc, margin=1), 
        groups=FALSE, 
        index.cond=function(x, y) median(x),
        xlab="Proportion", 
        layout=c(1, 5), 
        aspect=0.6,
        scales=list(y=list(relation="free", rot=0)),
        prepanel=function(x, y) {
            list(ylim=levels(reorder(y, x)))
        },
        panel=function(x, y, ...) {
            panel.dotplot(x, reorder(y, x), ...)
        })

Load data file

data(Chem97, package="mlmRev")

## Create a table which include gcsescore, gender and Chem97
gcsescore.tab <- xtabs(~ gcsescore + gender, Chem97)

## Transform table to data frame
gcsescore.df <- as.data.frame(gcsescore.tab)

## Transform gcsescore to character and numeric
gcsescore.df$gcsescore <- as.numeric(as.character(gcsescore.df$gcsescore))

Plot a histogram of GCSE scores by different gender

xyplot(Freq ~ gcsescore | gender, 
       data = gcsescore.df, 
       type="h", 
       layout=c(1, 2), 
       xlab="Average GCSE Score")

## Create a table which include score, gender and Chem97
score.tab <- xtabs(~score + gender, Chem97)

## Transform table to data frame
score.df <- as.data.frame(score.tab)

## Plot a histogram of scores by different gender
barchart(Freq ~ score | gender, score.df, origin=0)

In-Class Exercise 2: The Reading scores for the Student-Teacher ratio

Load data file

dta <- Ecdat::Caschool
head(dta)

##   distcod  county                        district grspan enrltot teachers
## 1   75119 Alameda              Sunol Glen Unified  KK-08     195    10.90
## 2   61499   Butte            Manzanita Elementary  KK-08     240    11.15
## 3   61549   Butte     Thermalito Union Elementary  KK-08    1550    82.90
## 4   61457   Butte Golden Feather Union Elementary  KK-08     243    14.00
## 5   61523   Butte        Palermo Union Elementary  KK-08    1335    71.50
## 6   62042  Fresno         Burrel Union Elementary  KK-08     137     6.40
##   calwpct mealpct computer testscr   compstu  expnstu      str    avginc
## 1  0.5102  2.0408       67  690.80 0.3435898 6384.911 17.88991 22.690001
## 2 15.4167 47.9167      101  661.20 0.4208333 5099.381 21.52466  9.824000
## 3 55.0323 76.3226      169  643.60 0.1090323 5501.955 18.69723  8.978000
## 4 36.4754 77.0492       85  647.70 0.3497942 7101.831 17.35714  8.978000
## 5 33.1086 78.4270      171  640.85 0.1280899 5235.988 18.67133  9.080333
## 6 12.3188 86.9565       25  605.55 0.1824818 5580.147 21.40625 10.415000
##       elpct readscr mathscr
## 1  0.000000   691.6   690.0
## 2  4.583333   660.5   661.9
## 3 30.000002   636.3   650.9
## 4  0.000000   651.9   643.5
## 5 13.857677   641.8   639.9
## 6 12.408759   605.7   605.4

Create a new variable of student-teacher ratio

In fact, The raw data set have been existed a str variable which is same as new variable I was created.

dta$ST_R <- (dta$enrltot / dta$teachers)
head(dta)

##   distcod  county                        district grspan enrltot teachers
## 1   75119 Alameda              Sunol Glen Unified  KK-08     195    10.90
## 2   61499   Butte            Manzanita Elementary  KK-08     240    11.15
## 3   61549   Butte     Thermalito Union Elementary  KK-08    1550    82.90
## 4   61457   Butte Golden Feather Union Elementary  KK-08     243    14.00
## 5   61523   Butte        Palermo Union Elementary  KK-08    1335    71.50
## 6   62042  Fresno         Burrel Union Elementary  KK-08     137     6.40
##   calwpct mealpct computer testscr   compstu  expnstu      str    avginc
## 1  0.5102  2.0408       67  690.80 0.3435898 6384.911 17.88991 22.690001
## 2 15.4167 47.9167      101  661.20 0.4208333 5099.381 21.52466  9.824000
## 3 55.0323 76.3226      169  643.60 0.1090323 5501.955 18.69723  8.978000
## 4 36.4754 77.0492       85  647.70 0.3497942 7101.831 17.35714  8.978000
## 5 33.1086 78.4270      171  640.85 0.1280899 5235.988 18.67133  9.080333
## 6 12.3188 86.9565       25  605.55 0.1824818 5580.147 21.40625 10.415000
##       elpct readscr mathscr     ST_R
## 1  0.000000   691.6   690.0 17.88991
## 2  4.583333   660.5   661.9 21.52466
## 3 30.000002   636.3   650.9 18.69723
## 4  0.000000   651.9   643.5 17.35714
## 5 13.857677   641.8   639.9 18.67133
## 6 12.408759   605.7   605.4 21.40625

Divide the reading scores into three parts

Filter grspan == KK-08

library(tidyverse)

## ─ Attaching packages ────────────────────────── tidyverse 1.3.0 ─

## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0

## ─ Conflicts ─────────────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

dta_n <- dta %>% 
  mutate(RSCR = cut(readscr, breaks=quantile(readscr, probs=c(0, .33, .67, 1)), 
                     label=c("L", "M", "H"), ordered=T)) %>%
  filter(grspan == "KK-08")

Plot the reading scores for the student-teacher ratio by different level of reading socres

xyplot(readscr~ST_R|RSCR, data=dta_n, 
       xlab="Student-Teacher ratio", ylab="Reading score", type=c("p", "g", "r"),
       layout=c(3,1), pch=1)

In-Class Exercise 3:

Load data file

dta <- read.table("/Users/haolunfu/Documents/資料管理/week6/beautyCourseEval.txt", header = T)
head(dta)

##   eval     beauty sex age minority tenure courseID
## 1  4.3  0.2015666   1  36        1      0        3
## 2  4.5 -0.8260813   0  59        0      1        0
## 3  3.7 -0.6603327   0  51        0      1        4
## 4  4.3 -0.7663125   1  40        0      1        2
## 5  4.4  1.4214450   1  31        0      0        0
## 6  4.2  0.5002196   0  62        0      1        0

Show the details of the data

str(dta)

## 'data.frame':    463 obs. of  7 variables:
##  $ eval    : num  4.3 4.5 3.7 4.3 4.4 4.2 4 3.4 4.5 3.9 ...
##  $ beauty  : num  0.202 -0.826 -0.66 -0.766 1.421 ...
##  $ sex     : int  1 0 0 1 1 0 1 1 1 0 ...
##  $ age     : int  36 59 51 40 31 62 33 51 33 47 ...
##  $ minority: int  1 0 0 0 0 0 0 0 0 0 ...
##  $ tenure  : int  0 1 1 1 0 1 0 1 0 0 ...
##  $ courseID: int  3 0 4 2 0 0 4 0 0 4 ...

Plot

xyplot(eval~beauty|as.factor(courseID), data=dta, type=c("p", "g", "r"), 
       layout=c(6,6),
       xlab="Beauty judgment score", ylab="Average course evaluation score")

Try to list the beta (coefficients), but it can’t work on the plot function.

cal_betas <- function(df) { lm(eval ~ beauty, data = df)$coefficients }
betas <- t(simplify2array(by(dta, dta$courseID, cal_betas)))
betas_df <- as.data.frame(betas)

Refer my classmate procedure (Thanks Jay Liao), I use function(x, y) coefficients(lm(y ~ x))[2] to reorder.

xyplot(eval~beauty|as.factor(courseID), data=dta, type=c("p", "g", "r"), 
       layout=c(6,6), index.cond = function(x, y) coefficients(lm(y ~ x))[2],
       xlab="Beauty judgment score", ylab="Average course evaluation score")

In-Class Exercise 4: Brain Size and IQ

Load data file

dta <- read.table("/Users/haolunfu/Documents/資料管理/week6/brainsize.txt", header = T)
head(dta)

##   Sbj Gender FSIQ VIQ PIQ Weight Height MRICount
## 1   1 Female  133 132 124    118   64.5   816932
## 2   2   Male  140 150 124     NA   72.5  1001121
## 3   3   Male  139 123 150    143   73.3  1038437
## 4   4   Male  133 129 128    172   68.8   965353
## 5   5 Female  137 132 134    147   65.0   951545
## 6   6 Female   99  90 110    146   69.0   928799

Transform the different type of IQ from wide to long format

library(reshape)

## 
## Attaching package: 'reshape'

## The following object is masked from 'package:dplyr':
## 
##     rename

## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths

library(dplyr)
dta_n <- melt(dta, id=c("Sbj", "Gender", "Weight", "Height", "MRICount")) %>%
  dplyr::rename(Subject=Sbj, Gender=Gender, Weight=Weight, Height=Height,
                MRICount=MRICount, Type=variable, IQ=value)

Q: Are there gender differences in the three IQ scores?

bwplot(IQ ~ Gender|Type, data=dta_n, 
       xlab="Gender", ylab="IQ score")

## A: Yes, in our visual inspection, but specificially in FSIQ (small effect), VIQ (large effect), not in PIQ (no effect).

Q: Is the relationship between height and weight gender dependent?

xyplot(Weight ~ Height, groups=Gender, 
       data=dta, xlab="Height", ylab="Weight", 
       type=c('p', 'g', 'r'))

## A: Yes, in our visual inspection, gender modulate the weight and height.

Q: Is the relationship between IQ and brainsize (as measured by MRIcount) gender dependent?

xyplot(IQ ~ MRICount|Type, groups=Gender, 
       data=dta_n, xlab="Brain Size", ylab="IQ", 
       type=c('p', 'g', 'r'))

## A: Because I couldn’t observe the significant pattern in the figure, I conducted linear regression model to examine the gender effect.

summary(lm(FSIQ ~ MRICount + Gender, data = dta))

## 
## Call:
## lm(formula = FSIQ ~ MRICount + Gender, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.410 -20.011   0.924  21.978  36.094 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -4.371e+01  5.644e+01  -0.774  0.44358   
## MRICount     1.804e-04  6.516e-05   2.768  0.00876 **
## GenderMale  -1.353e+01  9.302e+00  -1.455  0.15418   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.46 on 37 degrees of freedom
## Multiple R-squared:  0.1751, Adjusted R-squared:  0.1305 
## F-statistic: 3.927 on 2 and 37 DF,  p-value: 0.02842

The results revealed that no gender effect on Brain size to FSIQ

summary(lm(VIQ ~ MRICount + Gender, data = dta))

## 
## Call:
## lm(formula = VIQ ~ MRICount + Gender, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.840 -18.429  -1.268  17.487  35.537 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -1.491e+01  5.687e+01  -0.262   0.7946  
## MRICount     1.442e-04  6.567e-05   2.195   0.0345 *
## GenderMale  -7.492e+00  9.374e+00  -0.799   0.4293  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.63 on 37 degrees of freedom
## Multiple R-squared:  0.1289, Adjusted R-squared:  0.08184 
## F-statistic: 2.738 on 2 and 37 DF,  p-value: 0.0778

The results revealed that no gender effect on Brain size to VIQ

summary(lm(PIQ ~ MRICount + Gender, data = dta))

## 
## Call:
## lm(formula = PIQ ~ MRICount + Gender, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.696 -15.155  -6.259  16.898  37.771 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -5.985e+01  5.069e+01  -1.181  0.24530   
## MRICount     1.974e-04  5.853e-05   3.373  0.00176 **
## GenderMale  -1.705e+01  8.355e+00  -2.041  0.04844 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.17 on 37 degrees of freedom
## Multiple R-squared:  0.2357, Adjusted R-squared:  0.1944 
## F-statistic: 5.704 on 2 and 37 DF,  p-value: 0.006929

Week 6 In-class exercise (Trellis)

Hao-Lun Fu

2020-04-20

In-Class Exercise 1: Chapter 4 of Lattice

Load data file

Show the methods of dotplot in lattice package

Plot the dotplot of death rates for the different age by people

Plot the dotplot of death rates, show one column and four rows

Plot the scatter plot of death rates for the different age by people in one figure.

Plot the barplot of death rates for the different age by people

Load data file

Plot scatter plot of the proportion of the field by training and occupations

Load data file

Plot a histogram of GCSE scores by different gender

In-Class Exercise 2: The Reading scores for the Student-Teacher ratio

Load data file

Create a new variable of student-teacher ratio

In fact, The raw data set have been existed a str variable which is same as new variable I was created.

Divide the reading scores into three parts

Filter grspan == KK-08

Plot the reading scores for the student-teacher ratio by different level of reading socres

In-Class Exercise 3:

Load data file

Show the details of the data

Plot

Try to list the beta (coefficients), but it can’t work on the plot function.

Refer my classmate procedure (Thanks Jay Liao), I use function(x, y) coefficients(lm(y ~ x))[2] to reorder.

In-Class Exercise 4: Brain Size and IQ

Load data file

Transform the different type of IQ from wide to long format

Q: Are there gender differences in the three IQ scores?

Q: Is the relationship between height and weight gender dependent?

Q: Is the relationship between IQ and brainsize (as measured by MRIcount) gender dependent?

The results revealed that no gender effect on Brain size to FSIQ

The results revealed that no gender effect on Brain size to VIQ

The results revealed that there is a gender effect on Brain size to PIQ