Q1
dta1 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/stateAnxiety.txt",header = T)
str(dta1)
## 'data.frame': 50 obs. of 10 variables:
## $ f1: int 13 26 13 22 18 32 16 18 14 20 ...
## $ f2: int 17 31 17 24 19 31 16 22 17 19 ...
## $ f3: int 18 33 24 26 19 30 21 25 23 23 ...
## $ f4: int 20 38 29 27 22 31 27 29 21 25 ...
## $ f5: int 24 42 32 29 30 32 30 35 25 28 ...
## $ m1: int 6 4 17 19 12 11 14 9 12 11 ...
## $ m2: int 14 11 25 22 21 16 23 18 16 13 ...
## $ m3: int 22 14 26 26 21 20 26 20 23 17 ...
## $ m4: int 20 12 29 30 23 19 29 20 26 14 ...
## $ m5: int 24 23 38 34 24 22 33 24 32 20 ...
dta1_F <- dta1[,1:5]
dta1_F$subject <- paste0("S",101:150)
dta1_M <- dta1[,6:10]
dta1_M$subject <- paste0("S",151:200)
dta1_FL <- dta1_F%>%gather(key = "week",value = "score",1:5)
dta1_FL$gender <- rep(paste0("F"),dim(dta1_FL)[1])
dta1_ML <- dta1_M%>%gather(key = "week",value = "score",1:5)
dta1_ML$gender <- rep(paste0("M"),dim(dta1_ML)[1])
dta1_ML$week <- as.factor(dta1_ML$week)
dta1_ML$gender <- as.factor(dta1_ML$gender)
dta1_FL$week <- as.factor(dta1_FL$week)
dta1_FL$gender <- as.factor(dta1_FL$gender)
levels(dta1_FL$week) <- c("w1","w2","w3","w4","w5")
levels(dta1_ML$week) <- c("w1","w2","w3","w4","w5")
dta1L <- rbind(dta1_FL,dta1_ML)
str(dta1L)
## 'data.frame': 500 obs. of 4 variables:
## $ subject: chr "S101" "S102" "S103" "S104" ...
## $ week : Factor w/ 5 levels "w1","w2","w3",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ score : int 13 26 13 22 18 32 16 18 14 20 ...
## $ gender : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...
#
ggplot(data = dta1L,aes(x = week,y = score,group = gender,col = gender))+
stat_summary(fun.data = mean_se,geom = "pointrange",size = 0.5)+
theme_bw()

#
ggplot(data = dta1L, aes(x=week, y=score, group=subject))+
geom_line()+
stat_smooth(aes(group=gender), method="lm")+
facet_grid(~factor(gender, labels=c("Female","Male")))+
labs(x="Week", y="State anxiety")+
theme_bw()

Q2
dta2 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/langMathDutch.txt",header = T)
str(dta2)
## 'data.frame': 2287 obs. of 6 variables:
## $ school: int 1 1 1 1 1 1 1 1 1 1 ...
## $ pupil : int 17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
## $ IQV : num 15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
## $ size : int 29 29 29 29 29 29 29 29 29 29 ...
## $ lang : int 46 45 33 46 20 30 30 57 36 36 ...
## $ arith : int 24 19 24 26 9 13 13 30 23 22 ...
dta2$IQV <- cut(dta2$IQV,c(0,quantile(dta2$IQV,.33),quantile(dta2$IQV,.67),Inf),
labels = c("Low","Middle","High"))
dta2$size <- cut(dta2$size,c(0,quantile(dta2$size,.33),quantile(dta2$size,.67),Inf),
labels = c("Small","Medium","Large"))
dta2$group <- paste0(dta2$size,",",dta2$IQV)
dta2$group <- as.factor(dta2$group)
levels(dta2$group) <- c("Small,Low","Small,Middle","Small,High",
"Medium,Low","Medium,Middle","Medium,High",
"Large,Low","Large,Middle","Large,High")
##
ggplot(data = dta2,aes(x = lang, y = arith))+
geom_point(shape = 18)+
stat_smooth(method = "lm")+
facet_wrap(~group,nrow = 3)+
theme_bw()

Q3
##Q3
dta3 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/mathAttainment.txt",header = T)
str(dta3)
## 'data.frame': 39 obs. of 3 variables:
## $ math2: int 28 56 51 13 39 41 30 13 17 32 ...
## $ math1: int 18 22 44 8 20 12 16 5 9 18 ...
## $ cc : num 328 406 387 167 328 ...
dta3$pupil <- paste0("S",101:139)
dta3L <- dta3%>%gather(key = "year",value = "score",1:2)
str(dta3L)
## 'data.frame': 78 obs. of 4 variables:
## $ cc : num 328 406 387 167 328 ...
## $ pupil: chr "S101" "S102" "S103" "S104" ...
## $ year : chr "math2" "math2" "math2" "math2" ...
## $ score: int 28 56 51 13 39 41 30 13 17 32 ...
dta3L <- dta3L%>%mutate(pupil = as.factor(pupil),
year = as.factor(year))
levels(dta3L$year) <- c("Year_1","Year_2")
##ggplot
ggplot(data = dta3L, aes(x = cc,y = score,group = year))+
geom_point(aes(color = year))+
stat_smooth(method = "lm")+
theme_bw()

Q4
##It's the pyramid chart of mathematic score, but the x axis is density instead of commonly numbers of people.
dta4 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/hs0.txt", h = T) %>%
mutate(female = factor(female, levels(female),
labels = c("Female", "Male")),
race = factor(race, levels(race),
labels = c("Black", "Asian", "Hispanic",
"White")),
ses = ordered(ses, levels = c("low", "middle",
"high"),
labels = c("Low", "Middle", "High"))) %>%
mutate(race = reorder(race, math, median))
str(dta4)
## 'data.frame': 200 obs. of 11 variables:
## $ id : int 70 121 86 141 172 113 50 11 84 48 ...
## $ female : Factor w/ 2 levels "Female","Male": 2 1 2 2 2 2 2 2 2 2 ...
## $ race : Factor w/ 4 levels "Black","Hispanic",..: 3 3 3 3 3 3 1 2 3 1 ...
## ..- attr(*, "scores")= num [1:4(1d)] 45 61 47 54
## .. ..- attr(*, "dimnames")=List of 1
## .. .. ..$ : chr "Black" "Asian" "Hispanic" "White"
## $ ses : Ord.factor w/ 3 levels "Low"<"Middle"<..: 1 2 3 3 2 2 2 2 2 2 ...
## $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
## $ prog : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
## $ read : int 57 68 44 63 47 44 50 34 63 57 ...
## $ write : int 52 59 33 44 52 52 59 46 57 55 ...
## $ math : int 41 53 54 47 57 51 42 45 54 52 ...
## $ science: int 47 63 58 53 53 63 53 39 58 NA ...
## $ socst : int 57 61 31 56 61 61 61 36 51 51 ...
bw <- with(dta4, IQR(math)/(length(math)^(1/3)))
ggplot() +
stat_bin(data = subset(dta4, female=="Male"), binwidth = bw,
aes(math, color = "Male", fill = "Male", y = - ..density.. )) +
stat_bin(data = subset(dta4, female == "Female"), binwidth = bw,
aes(math, color = "Female", fill = "Female", y = ..density.. )) +
scale_color_manual(values = c("black", "black"),
guide = guide_legend(title = NULL, direction = "horizontal",
title.position = "top", reverse = TRUE,
label.position = "bottom", label.hjust = .5, label.vjust = .5,
label.theme = element_text(angle = 90) ) ) +
scale_fill_manual(values = c("White", "gray80"),
guide = guide_legend(title = NULL, reverse = TRUE,
direction = "horizontal", title.position = "top",
label.position = "bottom", label.hjust = .5, label.vjust = .5,
label.theme = element_text(angle = 90))) +
scale_x_continuous(limits = c(30, 80), breaks=seq(30, 80, by = 5)) +
labs(x = "Mathematic score", y = "Density") +
coord_flip() +
theme_bw() +
theme(legend.position=c(.9, .85))

###
Q6
dta6 <- USPersonalExpenditure
dta6 <- data.frame(dta6)
colnames(dta6) <- c("1940","1945","1950","1955","1960")
dta6$category <- rownames(dta6)
str(dta6)
## 'data.frame': 5 obs. of 6 variables:
## $ 1940 : num 22.2 10.5 3.53 1.04 0.341
## $ 1945 : num 44.5 15.5 5.76 1.98 0.974
## $ 1950 : num 59.6 29 9.71 2.45 1.8
## $ 1955 : num 73.2 36.5 14 3.4 2.6
## $ 1960 : num 86.8 46.2 21.1 5.4 3.64
## $ category: chr "Food and Tobacco" "Household Operation" "Medical and Health" "Personal Care" ...
dta6L <- gather(dta6,key = time, value = Exp,1:5)
dta6L <- dta6L%>%mutate(category = as.factor(category),
time = as.factor(time),
ExpL10 = log10(Exp))
str(dta6L)
## 'data.frame': 25 obs. of 4 variables:
## $ category: Factor w/ 5 levels "Food and Tobacco",..: 1 2 3 4 5 1 2 3 4 5 ...
## $ time : Factor w/ 5 levels "1940","1945",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ Exp : num 22.2 10.5 3.53 1.04 0.341 44.5 15.5 5.76 1.98 0.974 ...
## $ ExpL10 : num 1.346 1.021 0.548 0.017 -0.467 ...
#ggplot
ggplot(data = dta6L,aes(x = category,y = ExpL10))+
geom_point()+
geom_segment(aes(x = category,xend = category, y = ExpL10 ,yend = 0))+
geom_hline(yintercept = 0)+
facet_grid(~time)+
coord_flip()+
labs(y = "Logexpenditure")+
theme_bw()

Q7
dta7 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/hs0.txt",header = T)
str(dta7)
## 'data.frame': 200 obs. of 11 variables:
## $ id : int 70 121 86 141 172 113 50 11 84 48 ...
## $ female : Factor w/ 2 levels "female","male": 2 1 2 2 2 2 2 2 2 2 ...
## $ race : Factor w/ 4 levels "african-amer",..: 4 4 4 4 4 4 1 3 4 1 ...
## $ ses : Factor w/ 3 levels "high","low","middle": 2 3 1 1 3 3 3 3 3 3 ...
## $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
## $ prog : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
## $ read : int 57 68 44 63 47 44 50 34 63 57 ...
## $ write : int 52 59 33 44 52 52 59 46 57 55 ...
## $ math : int 41 53 54 47 57 51 42 45 54 52 ...
## $ science: int 47 63 58 53 53 63 53 39 58 NA ...
## $ socst : int 57 61 31 56 61 61 61 36 51 51 ...
theme_science <- theme(text = element_text(size = 14),
panel.grid.major = element_line(size = .5, color="grey"),
axis.line = element_line(size = .7, color = "black"),
legend.key.size = unit(.3, "cm"),
legend.text = element_text(size = 6),
legend.title = element_text(size = 8),
strip.background = element_rect(fill = "white")
)
##ggplot
ggplot(data = dta7,aes(x = write,y = read))+
geom_point(aes(color = female))+
scale_color_discrete(guide=FALSE)+
geom_density2d()+
facet_grid(~female)+
stat_density2d(aes(fill=..level..), geom="tile")+
labs(x = "Writing score", y = "Reading score")+
theme_classic()

Q8
Intercourse <- c("Yes","No")
Race <- c("White","Black")
Gender <- c("Male","Female")
dta8 <- expand.grid(Intercourse,Race,Gender)
dta8$count <- c(43,134,29,23,26,149,22,36)
colnames(dta8) <- c("Intercourse","Race","Gender","count")
dta8 <- dta8%>%mutate(Intercourse = as.factor(Intercourse),
Race = as.factor(Race),
Gender = as.factor(Gender))
str(dta8)
## 'data.frame': 8 obs. of 4 variables:
## $ Intercourse: Factor w/ 2 levels "Yes","No": 1 2 1 2 1 2 1 2
## $ Race : Factor w/ 2 levels "White","Black": 1 1 2 2 1 1 2 2
## $ Gender : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2
## $ count : num 43 134 29 23 26 149 22 36
#ggplot
ggplot(data = dta8,aes(x = Intercourse,y = count))+
geom_bar(stat = "identity")+
facet_wrap(~Race*Gender,nrow = 2)+
theme_bw()

Q9
load("C:/Users/Cheng_wen_sung/Desktop/db.Rda")
dta9 <- db
str(dta9)
## 'data.frame': 2368 obs. of 4 variables:
## $ storyid: int 295 290 310 309 315 317 320 295 293 292 ...
## $ rating : int 4 6 3 4 3 4 1 5 2 1 ...
## $ src : Factor w/ 4 levels "CNN","FOX","NYT",..: 2 4 4 1 2 4 3 2 1 1 ...
## $ story : chr "FOX The Bloom Box: Energy Breakthrough or Silicon ..." "WSJ Blame Evolution for Disease..." "WSJ Test-Driving Gadgets..." "CNN Survey: Cost a barrier to spread of broadband..." ...
dta9$story <- as.factor(dta9$story)
#ggplot
dta9%>%group_by(story)%>%summarise(mean_rating = mean(rating,na.rm = T),
se_rating = sd(rating,na.rm = T)/sqrt(n()))%>%
ggplot(aes(x = reorder(story,mean_rating),y = mean_rating))+
geom_errorbar(aes(ymin = mean_rating-se_rating,ymax = mean_rating+se_rating),color ="grey70",width = 0)+
geom_point()+
labs(x = "Rating",y = NULL,title = "Article rateing for best and wrost stories, with SE")+
theme_bw()+
coord_flip()
