HW2
# 讀檔案,變項用逗點區分
dta2 <- read.table("nlsy86long.csv",header = T,sep=",")
作性別與種族在閱讀跟數學兩項成績的分布圖,看起來都是正相關
dta2 %>%
ggplot()+ geom_smooth(mapping=aes(math,read,color=race)) +theme_light() + facet_grid(.~sex)
## `geom_smooth()` using method = 'loess'

接下來觀察在總體成績方面,不同的在學階段是否存在影響
dta2 %>%
gather(subject,score,8:9) %>%
ggplot(.,aes(grade,score,color=sex)) +
stat_summary(fun.data = mean_se , geom = "pointrange",
position = position_dodge(0.3)) +
facet_grid(.~subject)+
theme_light()

結果呈現了在幼稚園性別似乎是沒有差異的,但隨著年級上升男生的數學與女生的閱讀能力好像逐漸在性別上有優勢,但仍需進一步檢定
HW3
dta3 <- read.csv("alcohol_age.csv", header = T,sep=",") %>% mutate(over21=ifelse(Age>=21,"yes","no"))
## Warning: package 'bindrcpp' was built under R version 3.4.4
# 作個變項確定是否滿21歲,然後畫圖
dta3 %>%
xyplot(Alcohol ~ Age, group = over21,
data =., type = c("g", "r", "p"),
xlab = "Age (year)", ylab = "Mortality rate from alcohol abuse (per 100,000)")

# 參考子揚的做法畫第二張圖
aggregate(Alcohol ~ over21, FUN = mean,data=dta3)
## over21 Alcohol
## 1 no 1.032118
## 2 yes 1.482557
ggplot(dta3, aes(Age, Alcohol))+
geom_point(aes(color = over21), na.rm = TRUE)+
geom_segment(aes(x = 19, xend = 21, y = 1.032, yend = 1.032), color = "tomato")+
geom_segment(aes(x = 21, xend = 23, y = 1.483, yend = 1.483), color = "turquoise")+
theme(legend.position = "none")+
labs(x = "Age (year)", y = "Mortality rate from alcohol abuse (per 100,000)")

HW4
# 將表格整理弄成檔案讀取,其中因為表格整理方便把瑞士與荷蘭稍作修正
dta4 <- read.table("dta4.txt", header=T)
# Header有點跑掉,改回來
colnames(dta4)<- c("country","25-34","35-44","45-54","55-64","65-74")
knitr::kable(dta4)
| Canada |
22 |
27 |
31 |
34 |
24 |
| Israel |
9 |
19 |
10 |
14 |
27 |
| Japan |
22 |
19 |
21 |
31 |
49 |
| Austria |
29 |
40 |
52 |
53 |
69 |
| France |
16 |
25 |
36 |
47 |
56 |
| Germany |
28 |
35 |
41 |
49 |
52 |
| Hungary |
48 |
65 |
84 |
81 |
107 |
| Italy |
7 |
8 |
11 |
18 |
27 |
| Holland |
8 |
11 |
18 |
20 |
28 |
| Poland |
26 |
29 |
36 |
32 |
28 |
| Spain |
4 |
7 |
10 |
16 |
22 |
| Sweden |
28 |
41 |
46 |
51 |
35 |
| Swiss |
22 |
34 |
41 |
50 |
51 |
| UK |
10 |
13 |
15 |
17 |
22 |
| USA |
20 |
22 |
28 |
33 |
37 |
初步檢視似乎男性自殺率是東歐高於西歐,看看盒鬚圖
dta4 %>%
gather(Age,cases,2:6) %>%
ggplot(.,aes(reorder(country,-cases),cases)) +
geom_boxplot() + theme_bw() +
labs(x="Country",y="Deaths per 100,000 from male suicides")

可以觀察到的確東歐男性的自殺率高於西歐,而北美跟西歐則差不多
再做一張以年齡為基準的圖
dta4 %>% gather(Age, Rate, 2:6) %>%
ggplot(aes(Age, Rate))+
geom_boxplot()+
labs(x = "Age", y = "Deaths per 100,000 from male suicides")

HW5
dta5 <- read.table("coping.txt",header=T)
knitr::kable(head(dta5))
| 4 |
2 |
2 |
2 |
1.00 |
2.00 |
1.00 |
2.50 |
Fail |
S2 |
| 4 |
4 |
4 |
2 |
4.00 |
3.00 |
1.25 |
1.50 |
NoPart |
S2 |
| 2 |
2 |
2 |
2 |
2.67 |
3.00 |
1.00 |
2.33 |
TeacNo |
S2 |
| 4 |
3 |
4 |
4 |
4.00 |
1.50 |
3.25 |
1.00 |
Bully |
S2 |
| 4 |
2 |
1 |
1 |
1.00 |
2.75 |
1.25 |
1.50 |
Work |
S2 |
| 4 |
3 |
1 |
4 |
2.33 |
2.50 |
1.00 |
3.67 |
MomNo |
S2 |
# 大概可以看到有情緒相關的變項跟處理策略與情境等,評分為1~4(不曾~時常)
dta5 %>% gather(emotion,score,1:4) %>%
ggplot(.,aes(emotion,score,color=situation)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3))+
theme_bw()+
labs(x="emotion",y="Score")

可以看到在不同情境之下,不同情感類型與分數間的關聯
dta5 %>% gather(coping,score,5:8) %>%
ggplot(.,aes(coping,score,color=situation)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3))+
theme_bw()+
labs(x="coping",y="Score")

可以看到在不同情境之下,不同策略與分數間的關聯
HW6
HW7
dta7 <- read.table("beautyCourseEval.txt",header=T)
dta7 %>%
mutate(CourseID = factor(courseID),
Gender = factor(sex, levels = c(0, 1), labels = c("Male", "Female"))) %>%
xyplot(eval ~ beauty | CourseID, group = Gender,
data = ., type = c("g", "r", "p"),
index.cond = function(x, y) coef(lm(y ~ x))[2],
xlab = "Beauty score", ylab = "Course evaluation score",
lattice.options = list(panel.error = "warning"), auto.key = list(column = 2))

HW8
library(sas7bdat)
dta8 <- read.sas7bdat("sales.sas7bdat")
dta8 %>%
mutate(region = factor(region, 1:4, c("Nothern", "Southern", "Eastern", "Western")),
district = factor(district, 1:5, c("North East", "South East", "South West", "North West", "Central West")),
quarter = factor(quarter, 1:4, c("1st", "2nd", "3rd", "4th")),
month = factor(month, 1:12, month.abb),
sales = ifelse(sales < 0, 0, sales),
market = factor(market))
## product category customer year month quarter market sales expense
## 1 Shoes Shoes Acme 2001 Jan 1st 1 300 240
## 2 Boots Shoes Acme 2001 Jan 1st 1 2200 1540
## 3 Slippers Slippers Acme 2001 Jan 1st 1 900 540
## 4 Shoes Shoes Acme 2001 Feb 1st 1 100 80
## 5 Boots Shoes Acme 2001 Feb 1st 1 1400 980
## 6 Slippers Slippers Acme 2001 Feb 1st 1 0 0
## 7 Shoes Shoes Acme 2001 Mar 1st 1 600 480
## 8 Boots Shoes Acme 2001 Mar 1st 1 0 0
## 9 Slippers Slippers Acme 2001 Mar 1st 1 1400 840
## 10 Shoes Shoes Acme 2001 Apr 2nd 1 2600 2080
## 11 Boots Shoes Acme 2001 Apr 2nd 1 1500 1050
## 12 Slippers Slippers Acme 2001 Apr 2nd 1 1000 600
## 13 Shoes Shoes Acme 2001 May 2nd 2 2400 1920
## 14 Boots Shoes Acme 2001 May 2nd 2 0 -280
## 15 Slippers Slippers Acme 2001 May 2nd 2 700 420
## 16 Shoes Shoes Acme 2001 Jun 2nd 2 1000 800
## 17 Boots Shoes Acme 2001 Jun 2nd 2 1100 770
## 18 Slippers Slippers Acme 2001 Jun 2nd 2 300 180
## 19 Shoes Shoes Acme 2001 Jul 3rd 2 3700 2960
## 20 Boots Shoes Acme 2001 Jul 3rd 2 1600 1120
## 21 Slippers Slippers Acme 2001 Jul 3rd 2 2800 1680
## 22 Shoes Shoes TwoFeet 2001 Aug 3rd 2 2800 2240
## 23 Boots Shoes TwoFeet 2001 Aug 3rd 2 2800 1960
## 24 Slippers Slippers TwoFeet 2001 Aug 3rd 2 1100 660
## 25 Shoes Shoes Acme 2001 Sep 3rd 2 1900 1520
## 26 Boots Shoes Acme 2001 Sep 3rd 2 2300 1610
## 27 Slippers Slippers Acme 2001 Sep 3rd 2 1800 1080
## 28 Shoes Shoes Acme 2001 Oct 4th 2 2100 1680
## 29 Boots Shoes Acme 2001 Oct 4th 2 3200 2240
## 30 Slippers Slippers Acme 2001 Oct 4th 2 4700 2820
## 31 Shoes Shoes BigX 2001 Nov 4th 2 2900 2320
## 32 Boots Shoes BigX 2001 Nov 4th 2 2500 1750
## 33 Slippers Slippers BigX 2001 Nov 4th 2 3800 2280
## 34 Shoes Shoes Acme 2001 Dec 4th 2 2300 1840
## 35 Boots Shoes Acme 2001 Dec 4th 2 1900 1330
## 36 Slippers Slippers Acme 2001 Dec 4th 2 1200 720
## 37 Shoes Shoes Acme 2002 Jan 1st 1 1000 800
## 38 Boots Shoes Acme 2002 Jan 1st 1 0 -980
## 39 Slippers Slippers Acme 2002 Jan 1st 1 1100 660
## 40 Shoes Shoes Acme 2002 Feb 1st 1 1000 800
## 41 Boots Shoes Acme 2002 Feb 1st 1 700 490
## 42 Slippers Slippers Acme 2002 Feb 1st 1 2300 1380
## 43 Shoes Shoes Acme 2002 Mar 1st 1 400 320
## 44 Boots Shoes Acme 2002 Mar 1st 1 0 -630
## 45 Slippers Slippers Acme 2002 Mar 1st 1 1100 660
## 46 Shoes Shoes Acme 2002 Apr 2nd 1 2100 1680
## 47 Boots Shoes Acme 2002 Apr 2nd 1 1400 980
## 48 Slippers Slippers Acme 2002 Apr 2nd 1 500 300
## 49 Shoes Shoes Acme 2002 May 2nd 2 1100 880
## 50 Boots Shoes Acme 2002 May 2nd 2 200 140
## 51 Slippers Slippers Acme 2002 May 2nd 2 1300 780
## 52 Boots Shoes Acme 2002 Jun 2nd 2 1800 1260
## 53 Shoes Shoes Acme 2002 Jun 2nd 2 1700 1360
## 54 Slippers Slippers Acme 2002 Jun 2nd 2 1400 840
## 55 Boots Shoes Acme 2002 Jul 3rd 2 0 -630
## 56 Shoes Shoes Acme 2002 Jul 3rd 2 1700 1360
## 57 Slippers Slippers Acme 2002 Jul 3rd 2 1400 840
## 58 Boots Shoes TwoFeet 2002 Aug 3rd 2 3500 2450
## 59 Shoes Shoes TwoFeet 2002 Aug 3rd 2 1300 1040
## 60 Slippers Slippers TwoFeet 2002 Aug 3rd 2 3200 1920
## 61 Boots Shoes Acme 2002 Sep 3rd 2 3000 2100
## 62 Shoes Shoes Acme 2002 Sep 3rd 2 3200 2560
## 63 Slippers Slippers Acme 2002 Sep 3rd 2 1200 720
## 64 Boots Shoes Acme 2002 Oct 4th 2 3000 2100
## 65 Shoes Shoes Acme 2002 Oct 4th 2 4000 2400
## 66 Slippers Slippers Acme 2002 Oct 4th 2 4000 2400
## 67 Boots Shoes BigX 2002 Nov 4th 2 3100 2170
## 68 Shoes Shoes BigX 2002 Nov 4th 2 1700 1360
## 69 Slippers Slippers BigX 2002 Nov 4th 2 1400 840
## 70 Boots Shoes Acme 2002 Dec 4th 2 2300 1610
## 71 Shoes Shoes Acme 2002 Dec 4th 2 1800 1440
## 72 Slippers Slippers Acme 2002 Dec 4th 2 3200 1920
## region district return constantv quantity
## 1 Nothern North East 0 1 30
## 2 Nothern North East 0 1 275
## 3 Nothern North East 0 1 180
## 4 Nothern North East 0 1 10
## 5 Nothern North East 0 1 175
## 6 Nothern North East 0 1 0
## 7 Nothern North East 0 1 60
## 8 Nothern North East 0 1 0
## 9 Nothern North East 0 1 280
## 10 Nothern North East 0 1 260
## 11 Nothern North East 0 1 187
## 12 Nothern North East 0 1 200
## 13 Nothern North East 0 1 240
## 14 Nothern North East 0 1 50
## 15 Nothern North East 0 1 140
## 16 Nothern North East 0 1 100
## 17 Nothern North East 0 1 137
## 18 Nothern North East 0 1 60
## 19 Nothern North East 0 1 370
## 20 Nothern North East 0 1 200
## 21 Nothern North East 0 1 560
## 22 Southern South West 3 1 280
## 23 Southern South West 3 1 350
## 24 Southern South West 3 1 220
## 25 Nothern North East 0 1 190
## 26 Nothern North East 0 1 287
## 27 Nothern North East 0 1 360
## 28 Nothern North East 0 1 210
## 29 Nothern North East 0 1 400
## 30 Nothern North East 0 1 940
## 31 Western Central West 5 1 290
## 32 Western Central West 5 1 312
## 33 Western Central West 5 1 760
## 34 Nothern North East 0 1 230
## 35 Nothern North East 0 1 237
## 36 Nothern North East 0 1 240
## 37 Nothern North East 0 1 100
## 38 Nothern North East 0 1 175
## 39 Nothern North East 0 1 220
## 40 Nothern North East 0 1 100
## 41 Nothern North East 0 1 87
## 42 Nothern North East 0 1 460
## 43 Nothern North East 0 1 40
## 44 Nothern North East 0 1 112
## 45 Nothern North East 0 1 220
## 46 Nothern North East 0 1 210
## 47 Nothern North East 0 1 175
## 48 Nothern North East 0 1 100
## 49 Nothern North East 0 1 110
## 50 Nothern North East 0 1 25
## 51 Nothern North East 0 1 260
## 52 Nothern North East 0 1 225
## 53 Nothern North East 0 1 170
## 54 Nothern North East 0 1 280
## 55 Nothern North East 0 1 112
## 56 Nothern North East 0 1 170
## 57 Nothern North East 0 1 280
## 58 Southern South West 3 1 437
## 59 Southern South West 3 1 130
## 60 Southern South West 3 1 640
## 61 Nothern North East 0 1 375
## 62 Nothern North East 0 1 320
## 63 Nothern North East 0 1 240
## 64 Nothern North East 0 1 375
## 65 Nothern North East 0 1 210
## 66 Nothern North East 0 1 800
## 67 Western Central West 5 1 387
## 68 Western Central West 5 1 170
## 69 Western Central West 5 1 280
## 70 Nothern North East 0 1 287
## 71 Nothern North East 0 1 180
## 72 Nothern North East 0 1 640
# 畫個不同月份跟季期的銷售量的圖
dta8 %>%
ggplot(aes(month, sales, color = quarter))+
stat_summary(fun.data = mean_cl_boot, geom = "pointrange", alpha = .5)+
stat_summary(aes(group = 1), fun.y = mean, geom = "line")+
stat_smooth(aes(group = 1), method = lm, alpha = .5)+
theme_bw()+
facet_wrap(~year)+
labs(x = "Months", y = "Products average Sales", title = "Sales")

HW9
# 讀取檔案後可以發現,基本上色彩在桃園市所做的分析當中並沒有太大的意義
# 在第一部分的分析上,突然使用盒鬚圖有點不知所云,似乎沒有一定要用的必要性
# 第一部分的第二個月分趨勢,前面分成每個月分的長條圖有點看不出變化,可以以下方的趨勢折線圖放在一起,並且加入整體的線條,比較容易看出差別
# 2.2部分對於長方圖直條圖的差異好像有點問題,然後並不是比例類型的變項用著色似乎不太容易看清楚
# 每個月分做成三個type的折線圖似乎可以看得比較清楚?
# 檔案有點讀不進去,修正中