grammer of graphics HW

HW 1

Fifty male and fifty female students fill out the same questionnaire in weekly intervals starting five weeks before an important examination to measure state anxiety.
The research interests are:
1. whether there are gender difference in state anxiety
2. individual differences in state anxiety Explore the answers to both questions with plots involving confidence intervals or error bars for the means.

Source: Von Eye, A., & Schuster C. (1998). Regression Analysis for Social Sciences. San Diego: Academic Press.

Column 1: Anxiety score 5 weeks before exam for female
Column 2: Anxiety score 4 weeks before exam for female
Column 3: Anxiety score 3 weeks before exam for female
Column 4: Anxiety score 2 weeks before exam for female
Column 5: Anxiety score 1 weeks before exam for female
Column 6: Anxiety score 5 weeks before exam for male
Column 7: Anxiety score 4 weeks before exam for male
Column 8: Anxiety score 3 weeks before exam for male
Column 9: Anxiety score 2 weeks before exam for male
Column 10: Anxiety score 1 weeks before exam for male

loading data and check data structure

dta<-read.table("C:/Users/USER/Desktop/R_data management/0420/data/stateAnxiety.txt", header=T)
str(dta)

## 'data.frame':    50 obs. of  10 variables:
##  $ f1: int  13 26 13 22 18 32 16 18 14 20 ...
##  $ f2: int  17 31 17 24 19 31 16 22 17 19 ...
##  $ f3: int  18 33 24 26 19 30 21 25 23 23 ...
##  $ f4: int  20 38 29 27 22 31 27 29 21 25 ...
##  $ f5: int  24 42 32 29 30 32 30 35 25 28 ...
##  $ m1: int  6 4 17 19 12 11 14 9 12 11 ...
##  $ m2: int  14 11 25 22 21 16 23 18 16 13 ...
##  $ m3: int  22 14 26 26 21 20 26 20 23 17 ...
##  $ m4: int  20 12 29 30 23 19 29 20 26 14 ...
##  $ m5: int  24 23 38 34 24 22 33 24 32 20 ...

head(dta)

##   f1 f2 f3 f4 f5 m1 m2 m3 m4 m5
## 1 13 17 18 20 24  6 14 22 20 24
## 2 26 31 33 38 42  4 11 14 12 23
## 3 13 17 24 29 32 17 25 26 29 38
## 4 22 24 26 27 29 19 22 26 30 34
## 5 18 19 19 22 30 12 21 21 23 24
## 6 32 31 30 31 32 11 16 20 19 22

data manipulation

library(dplyr)
library(reshape)
# subset%>%melt%>%rbind
dta.m<-dta%>%select(m1,m2, m3, m4, m5)
colnames(dta.m)<-c(paste0("weekb", rep(1:5)))
dta.m$gender<-rep("M", length(dta.m))
dta.m$id<-paste0("M", rep(1:50))
dta.m.melt<-dta.m%>%melt(by=c("id", "gender"))

dta.f<-dta%>%select(f1, f2, f3, f4, f5)
colnames(dta.f)<-c(paste0("weekb", rep(1:5)))
dta.f$gender<-rep("F", length(dta.f))
dta.f$id<-paste0("F", rep(1:50))
dta.f.melt<-dta.f%>%melt(by=c("id", "gender"))
dta2<-rbind(dta.m.melt, dta.f.melt)

1. whether there are gender difference in state anxiety

library(ggplot2)
pd<-position_dodge(.2)
ggplot(dta2, aes(x=variable, y=value, group=gender, color=gender))+
  geom_point(position=pd)+
  stat_smooth(aes(color=gender), method="lm", se=TRUE)+
  labs(x="weeks before an important examination", y="Anxiety score")+
  theme(legend.position="top")+
  scale_x_discrete(labels=c(paste0("-", rev(rep(1:5)))))

Female has higher anxiety score than male before an important examination, it shows that there are gender difference in state anxiety

2. individual differences in state anxiety

ggplot(dta2, aes(x=variable, y=value))+
  geom_point()+
  stat_smooth(aes(group=id, color=id), method="lm", se=FALSE)+
  facet_wrap(.~gender)+
  
  labs(x="weeks before an important examination", y="Anxiety score")+
  scale_x_discrete(labels=c(paste0("-", rev(rep(1:5)))))+
  theme(axis.text.x=element_text(angle=90), legend.position = " ")

from the plot, it seems that state anxiety score is different among female individual

HW3

The dataset consists of a sample of 14 primary school children between 8 and 12 years old. The children were asked to respond on 8 emotions and coping strategies scales for each of 6 situations: fail to fulfill assingments in class, not allowed to play with other children, forbidden to do something by the teacher, victim of bullying, too much school work, forbidden to do something by the mother. Plot the data in some meaningful ways. You may have to manipulate data into a different format first.

Column 1: Unpleasant (Annoy)
Column 2: Sad
Column 3: Afraid
Column 4: Angry
Column 5: Approach coping
Column 6: Avoidant coping
Column 7: Social support seeking
Column 8: Emotional reaction, especially agression
Column 9: Situation ID
Column 10: Children ID

loading data and check data structure

dta<-read.table("C:/Users/USER/Desktop/R_data management/0420/data/coping.txt", header=T)
str(dta)

## 'data.frame':    84 obs. of  10 variables:
##  $ annoy    : int  4 4 2 4 4 4 3 3 3 4 ...
##  $ sad      : int  2 4 2 3 2 3 2 1 1 4 ...
##  $ afraid   : int  2 4 2 4 1 1 2 1 1 2 ...
##  $ angry    : int  2 2 2 4 1 4 2 2 2 1 ...
##  $ approach : num  1 4 2.67 4 1 2.33 2 1.33 1 1.67 ...
##  $ avoid    : num  2 3 3 1.5 2.75 2.5 1 4 1 4 ...
##  $ support  : num  1 1.25 1 3.25 1.25 1 1.5 2.75 1.33 3.5 ...
##  $ agressive: num  2.5 1.5 2.33 1 1.5 3.67 1 2 1.67 2.5 ...
##  $ situation: Factor w/ 6 levels "Bully","Fail",..: 2 4 5 1 6 3 2 4 5 1 ...
##  $ sbj      : Factor w/ 14 levels "S135","S137",..: 6 6 6 6 6 6 4 4 4 4 ...

head(dta)

##   annoy sad afraid angry approach avoid support agressive situation sbj
## 1     4   2      2     2     1.00  2.00    1.00      2.50      Fail  S2
## 2     4   4      4     2     4.00  3.00    1.25      1.50    NoPart  S2
## 3     2   2      2     2     2.67  3.00    1.00      2.33    TeacNo  S2
## 4     4   3      4     4     4.00  1.50    3.25      1.00     Bully  S2
## 5     4   2      1     1     1.00  2.75    1.25      1.50      Work  S2
## 6     4   3      1     4     2.33  2.50    1.00      3.67     MomNo  S2

data manipulation

library(reshape)
library(dplyr)
dta1<-dta%>%reshape::melt(id=c("sbj", "situation"))
dta1$emotion<-dta1$variable
dta1$scale<-dta1$value
dta1<-dta1[ ,-c(3,4)]

plot

library(ggplot2)
qplot(scale, data=dta1, geom="density", facets=situation~emotion)

lolli plot

# standardize
dta2<-dta1%>%
  group_by(situation, emotion)%>%summarise(value=mean(scale, na.rm=T))%>%as.data.frame
emotion.mean<-dta1%>%group_by(emotion)%>%summarise(m=mean(scale))%>%as.data.frame
dta3<-merge(dta2, emotion.mean, by="emotion")
dta3$newvalue<-dta3$value-dta3$m
str(dta3)

## 'data.frame':    48 obs. of  5 variables:
##  $ emotion  : Factor w/ 8 levels "annoy","sad",..: 3 3 3 3 3 3 8 8 8 8 ...
##  $ situation: Factor w/ 6 levels "Bully","Fail",..: 3 4 2 1 6 5 3 5 4 2 ...
##  $ value    : num  1.07 1.29 1.86 1.57 1.29 ...
##  $ m        : num  1.4 1.4 1.4 1.4 1.4 ...
##  $ newvalue : num  -0.333 -0.119 0.452 0.167 -0.119 ...

p<-ggplot(dta3, aes(x=newvalue, y=situation))+
  geom_point()+
  facet_wrap(.~emotion, ncol=4)+
  labs(x="Standardized score", y="Situation")+
  geom_segment(aes(xend=0, yend=situation))+
  geom_vline(xintercept=0)
p+scale_x_continuous(limits = c(-1,1))

corrplot

library(ggcorrplot)
ggcorrplot(cor(dta[1:8]), hc.order=T, type="lower", lab=T)

HW5

Use the Cushings{MASS} data set to generate a plot similar to the following one:

load data and check data structure

library(MASS)
dta<-Cushings
str(dta)

## 'data.frame':    27 obs. of  3 variables:
##  $ Tetrahydrocortisone: num  3.1 3 1.9 3.8 4.1 1.9 8.3 3.8 3.9 7.8 ...
##  $ Pregnanetriol      : num  11.7 1.3 0.1 0.04 1.1 0.4 1 0.2 0.6 1.2 ...
##  $ Type               : Factor w/ 4 levels "a","b","c","u": 1 1 1 1 1 1 2 2 2 2 ...

head(dta)

##    Tetrahydrocortisone Pregnanetriol Type
## a1                 3.1         11.70    a
## a2                 3.0          1.30    a
## a3                 1.9          0.10    a
## a4                 3.8          0.04    a
## a5                 4.1          1.10    a
## a6                 1.9          0.40    a

data manipulation

# label type
dta$Type<-factor(dta$Type,  
                 labels=c("Adenoma","Bilateral Hyperplasia","Carcinoma","Unknown"))
# subset the first data in each type for the text labels
dta1<-subset(dta, rownames(dta)%in%c("a1", "b1", "c1", "u1"))

plot

library(ggrepel)
library(ggplot2)
library(ggthemes)

p<-ggplot(dta, aes(x=Tetrahydrocortisone, y=Pregnanetriol, color=Type))+
  geom_point(aes(fill =factor(Type)), colour="black", pch=21, size=3)+
  labs(x="Tetrahydrocortisone (mg/24 hours)",y="Pregnanetriol (mg/24 hours)", title="Cushing's syndrome")+
# using the similar theme, change background to white
    theme_economist_white(gray_bg = FALSE)+ 
  theme(plot.title=element_text(hjust=1, face="bold", size=12))+ # adjust title to aligned to the right, bold and size 
  theme(legend.position=" ")+
# remove the x-axis bottom line and ticks
  theme(axis.line.x.bottom =  element_line(colour = "white"), 
        axis.ticks.x = element_line(colour="white"))
# adding the text label
p+geom_text_repel(dta1, mapping=aes(label=Type))+
  scale_y_continuous(breaks=c(0, 2, 4, 6, 8, 10, 12))+ # setting the breaks in x and y axis
  scale_x_continuous(limits=c(0, 60), breaks=c(0, 10, 20, 30, 40, 50, 60))+
  theme(axis.text.y=element_text(angle=90))+ # rotate y-axis text to 90 degree
  # adjust the space between x and y axis title and text using margin
  theme(axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))