dataM_week7

Q1

dta1 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/stateAnxiety.txt",header = T)
str(dta1)

## 'data.frame':    50 obs. of  10 variables:
##  $ f1: int  13 26 13 22 18 32 16 18 14 20 ...
##  $ f2: int  17 31 17 24 19 31 16 22 17 19 ...
##  $ f3: int  18 33 24 26 19 30 21 25 23 23 ...
##  $ f4: int  20 38 29 27 22 31 27 29 21 25 ...
##  $ f5: int  24 42 32 29 30 32 30 35 25 28 ...
##  $ m1: int  6 4 17 19 12 11 14 9 12 11 ...
##  $ m2: int  14 11 25 22 21 16 23 18 16 13 ...
##  $ m3: int  22 14 26 26 21 20 26 20 23 17 ...
##  $ m4: int  20 12 29 30 23 19 29 20 26 14 ...
##  $ m5: int  24 23 38 34 24 22 33 24 32 20 ...

dta1_F <- dta1[,1:5]
dta1_F$subject <- paste0("S",101:150)
dta1_M <- dta1[,6:10]
dta1_M$subject <- paste0("S",151:200)
dta1_FL <- dta1_F%>%gather(key = "week",value = "score",1:5)
dta1_FL$gender <- rep(paste0("F"),dim(dta1_FL)[1])
dta1_ML <- dta1_M%>%gather(key = "week",value = "score",1:5)
dta1_ML$gender <- rep(paste0("M"),dim(dta1_ML)[1])

dta1_ML$week <- as.factor(dta1_ML$week)
dta1_ML$gender <- as.factor(dta1_ML$gender)
dta1_FL$week <- as.factor(dta1_FL$week)
dta1_FL$gender <- as.factor(dta1_FL$gender)

levels(dta1_FL$week) <- c("w1","w2","w3","w4","w5")
levels(dta1_ML$week) <- c("w1","w2","w3","w4","w5")

dta1L <- rbind(dta1_FL,dta1_ML)
str(dta1L)

## 'data.frame':    500 obs. of  4 variables:
##  $ subject: chr  "S101" "S102" "S103" "S104" ...
##  $ week   : Factor w/ 5 levels "w1","w2","w3",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ score  : int  13 26 13 22 18 32 16 18 14 20 ...
##  $ gender : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 1 ...

#
ggplot(data = dta1L,aes(x = week,y = score,group = gender,col = gender))+
  stat_summary(fun.data = mean_se,geom = "pointrange",size = 0.5)+
  theme_bw()

#
ggplot(data = dta1L, aes(x=week, y=score, group=subject))+
  geom_line()+
  stat_smooth(aes(group=gender), method="lm")+
  facet_grid(~factor(gender, labels=c("Female","Male")))+
  labs(x="Week", y="State anxiety")+
  theme_bw()

Q2

dta2 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/langMathDutch.txt",header = T)
str(dta2)

## 'data.frame':    2287 obs. of  6 variables:
##  $ school: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ pupil : int  17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
##  $ IQV   : num  15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
##  $ size  : int  29 29 29 29 29 29 29 29 29 29 ...
##  $ lang  : int  46 45 33 46 20 30 30 57 36 36 ...
##  $ arith : int  24 19 24 26 9 13 13 30 23 22 ...

dta2$IQV <- cut(dta2$IQV,c(0,quantile(dta2$IQV,.33),quantile(dta2$IQV,.67),Inf),
                labels = c("Low","Middle","High"))
dta2$size <- cut(dta2$size,c(0,quantile(dta2$size,.33),quantile(dta2$size,.67),Inf),
                 labels = c("Small","Medium","Large"))

dta2$group <- paste0(dta2$size,",",dta2$IQV)
dta2$group <- as.factor(dta2$group)
levels(dta2$group) <- c("Small,Low","Small,Middle","Small,High",
                        "Medium,Low","Medium,Middle","Medium,High",
                        "Large,Low","Large,Middle","Large,High")

##
ggplot(data = dta2,aes(x = lang, y = arith))+
  geom_point(shape = 18)+
  stat_smooth(method = "lm")+
  facet_wrap(~group,nrow = 3)+
  theme_bw()

Q3

##Q3

dta3 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/mathAttainment.txt",header = T)
str(dta3)

## 'data.frame':    39 obs. of  3 variables:
##  $ math2: int  28 56 51 13 39 41 30 13 17 32 ...
##  $ math1: int  18 22 44 8 20 12 16 5 9 18 ...
##  $ cc   : num  328 406 387 167 328 ...

dta3$pupil <- paste0("S",101:139)
dta3L <- dta3%>%gather(key = "year",value = "score",1:2)
str(dta3L)

## 'data.frame':    78 obs. of  4 variables:
##  $ cc   : num  328 406 387 167 328 ...
##  $ pupil: chr  "S101" "S102" "S103" "S104" ...
##  $ year : chr  "math2" "math2" "math2" "math2" ...
##  $ score: int  28 56 51 13 39 41 30 13 17 32 ...

dta3L <- dta3L%>%mutate(pupil = as.factor(pupil),
                      year = as.factor(year))
levels(dta3L$year) <- c("Year_1","Year_2")

##ggplot
ggplot(data = dta3L, aes(x = cc,y = score,group = year))+
  geom_point(aes(color = year))+
  stat_smooth(method = "lm")+
  theme_bw()

Q4

##It's the pyramid chart of mathematic score, but the x axis is density instead of commonly numbers of people.
dta4 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/hs0.txt", h = T) %>%
  mutate(female = factor(female, levels(female), 
                         labels = c("Female", "Male")),
         race = factor(race, levels(race), 
                       labels = c("Black", "Asian", "Hispanic", 
                                  "White")),
         ses = ordered(ses, levels = c("low", "middle", 
                                       "high"),
                       labels = c("Low", "Middle", "High"))) %>%
  mutate(race = reorder(race, math, median))

str(dta4)

## 'data.frame':    200 obs. of  11 variables:
##  $ id     : int  70 121 86 141 172 113 50 11 84 48 ...
##  $ female : Factor w/ 2 levels "Female","Male": 2 1 2 2 2 2 2 2 2 2 ...
##  $ race   : Factor w/ 4 levels "Black","Hispanic",..: 3 3 3 3 3 3 1 2 3 1 ...
##   ..- attr(*, "scores")= num [1:4(1d)] 45 61 47 54
##   .. ..- attr(*, "dimnames")=List of 1
##   .. .. ..$ : chr  "Black" "Asian" "Hispanic" "White"
##  $ ses    : Ord.factor w/ 3 levels "Low"<"Middle"<..: 1 2 3 3 2 2 2 2 2 2 ...
##  $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
##  $ prog   : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
##  $ read   : int  57 68 44 63 47 44 50 34 63 57 ...
##  $ write  : int  52 59 33 44 52 52 59 46 57 55 ...
##  $ math   : int  41 53 54 47 57 51 42 45 54 52 ...
##  $ science: int  47 63 58 53 53 63 53 39 58 NA ...
##  $ socst  : int  57 61 31 56 61 61 61 36 51 51 ...

bw <- with(dta4, IQR(math)/(length(math)^(1/3)))
ggplot() +
  stat_bin(data = subset(dta4, female=="Male"), binwidth = bw,
           aes(math, color = "Male", fill = "Male", y = - ..density.. )) +
  stat_bin(data = subset(dta4, female == "Female"), binwidth = bw,
           aes(math, color = "Female", fill = "Female", y = ..density.. )) +
  scale_color_manual(values = c("black", "black"),
                     guide = guide_legend(title = NULL, direction = "horizontal",
                                          title.position = "top", reverse = TRUE,
                                          label.position = "bottom", label.hjust = .5, label.vjust = .5,
                                          label.theme = element_text(angle = 90) ) ) +
  scale_fill_manual(values = c("White", "gray80"),
                    guide = guide_legend(title = NULL, reverse = TRUE,
                                         direction = "horizontal", title.position = "top",
                                         label.position = "bottom", label.hjust = .5, label.vjust = .5,
                                         label.theme = element_text(angle = 90))) +
  scale_x_continuous(limits = c(30, 80), breaks=seq(30, 80, by = 5)) +
  labs(x = "Mathematic score", y = "Density") +
  coord_flip() +
  theme_bw() +
  theme(legend.position=c(.9, .85))

###

Q6

dta6 <- USPersonalExpenditure
dta6 <- data.frame(dta6)
colnames(dta6) <- c("1940","1945","1950","1955","1960")
dta6$category <- rownames(dta6)
str(dta6)

## 'data.frame':    5 obs. of  6 variables:
##  $ 1940    : num  22.2 10.5 3.53 1.04 0.341
##  $ 1945    : num  44.5 15.5 5.76 1.98 0.974
##  $ 1950    : num  59.6 29 9.71 2.45 1.8
##  $ 1955    : num  73.2 36.5 14 3.4 2.6
##  $ 1960    : num  86.8 46.2 21.1 5.4 3.64
##  $ category: chr  "Food and Tobacco" "Household Operation" "Medical and Health" "Personal Care" ...

dta6L <- gather(dta6,key = time, value = Exp,1:5)
dta6L <- dta6L%>%mutate(category = as.factor(category),
                        time = as.factor(time),
                        ExpL10 = log10(Exp))
str(dta6L)

## 'data.frame':    25 obs. of  4 variables:
##  $ category: Factor w/ 5 levels "Food and Tobacco",..: 1 2 3 4 5 1 2 3 4 5 ...
##  $ time    : Factor w/ 5 levels "1940","1945",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ Exp     : num  22.2 10.5 3.53 1.04 0.341 44.5 15.5 5.76 1.98 0.974 ...
##  $ ExpL10  : num  1.346 1.021 0.548 0.017 -0.467 ...

#ggplot
ggplot(data = dta6L,aes(x = category,y = ExpL10))+
  geom_point()+
  geom_segment(aes(x = category,xend = category, y = ExpL10 ,yend = 0))+
  geom_hline(yintercept = 0)+
  facet_grid(~time)+
  coord_flip()+
  labs(y = "Logexpenditure")+
  theme_bw()

Q7

dta7 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/dataM/Data/hs0.txt",header = T)
str(dta7)

## 'data.frame':    200 obs. of  11 variables:
##  $ id     : int  70 121 86 141 172 113 50 11 84 48 ...
##  $ female : Factor w/ 2 levels "female","male": 2 1 2 2 2 2 2 2 2 2 ...
##  $ race   : Factor w/ 4 levels "african-amer",..: 4 4 4 4 4 4 1 3 4 1 ...
##  $ ses    : Factor w/ 3 levels "high","low","middle": 2 3 1 1 3 3 3 3 3 3 ...
##  $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
##  $ prog   : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
##  $ read   : int  57 68 44 63 47 44 50 34 63 57 ...
##  $ write  : int  52 59 33 44 52 52 59 46 57 55 ...
##  $ math   : int  41 53 54 47 57 51 42 45 54 52 ...
##  $ science: int  47 63 58 53 53 63 53 39 58 NA ...
##  $ socst  : int  57 61 31 56 61 61 61 36 51 51 ...

theme_science <- theme(text = element_text(size = 14),
                       panel.grid.major = element_line(size = .5, color="grey"),
                       axis.line = element_line(size = .7, color = "black"),
                       legend.key.size = unit(.3, "cm"),
                       legend.text = element_text(size = 6),
                       legend.title = element_text(size = 8),
                       strip.background = element_rect(fill = "white")
)

##ggplot
ggplot(data = dta7,aes(x = write,y = read))+
  geom_point(aes(color = female))+
  scale_color_discrete(guide=FALSE)+
  geom_density2d()+
  facet_grid(~female)+
  stat_density2d(aes(fill=..level..), geom="tile")+
  labs(x = "Writing score", y = "Reading score")+
  theme_classic()

Q8

Intercourse <- c("Yes","No")
Race <- c("White","Black")
Gender <- c("Male","Female")
dta8 <- expand.grid(Intercourse,Race,Gender)
dta8$count <- c(43,134,29,23,26,149,22,36)
colnames(dta8) <- c("Intercourse","Race","Gender","count")
dta8 <- dta8%>%mutate(Intercourse = as.factor(Intercourse),
                      Race = as.factor(Race),
                      Gender = as.factor(Gender))
str(dta8)

## 'data.frame':    8 obs. of  4 variables:
##  $ Intercourse: Factor w/ 2 levels "Yes","No": 1 2 1 2 1 2 1 2
##  $ Race       : Factor w/ 2 levels "White","Black": 1 1 2 2 1 1 2 2
##  $ Gender     : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2
##  $ count      : num  43 134 29 23 26 149 22 36

#ggplot
ggplot(data = dta8,aes(x = Intercourse,y = count))+
  geom_bar(stat = "identity")+
  facet_wrap(~Race*Gender,nrow = 2)+
  theme_bw()

Q9

load("C:/Users/Cheng_wen_sung/Desktop/db.Rda")
dta9 <- db
str(dta9)

## 'data.frame':    2368 obs. of  4 variables:
##  $ storyid: int  295 290 310 309 315 317 320 295 293 292 ...
##  $ rating : int  4 6 3 4 3 4 1 5 2 1 ...
##  $ src    : Factor w/ 4 levels "CNN","FOX","NYT",..: 2 4 4 1 2 4 3 2 1 1 ...
##  $ story  : chr  "FOX The Bloom Box: Energy Breakthrough or Silicon ..." "WSJ Blame Evolution for Disease..." "WSJ Test-Driving Gadgets..." "CNN Survey: Cost a barrier to spread of broadband..." ...

dta9$story <- as.factor(dta9$story)


#ggplot
dta9%>%group_by(story)%>%summarise(mean_rating = mean(rating,na.rm = T),
                                   se_rating = sd(rating,na.rm = T)/sqrt(n()))%>%
ggplot(aes(x = reorder(story,mean_rating),y = mean_rating))+
geom_errorbar(aes(ymin = mean_rating-se_rating,ymax = mean_rating+se_rating),color ="grey70",width = 0)+
  geom_point()+
  labs(x = "Rating",y = NULL,title = "Article rateing for best and wrost stories, with SE")+
  theme_bw()+
  coord_flip()

dataM_week7

Wen_Sung Cheng

2017年4月25日

Q1

Q2

Q3

Q4

Q6

Q7

Q8

Q9