HW2

# 讀檔案,變項用逗點區分
dta2 <- read.table("nlsy86long.csv",header = T,sep=",")

作性別與種族在閱讀跟數學兩項成績的分布圖,看起來都是正相關

dta2 %>% 
  ggplot()+ geom_smooth(mapping=aes(math,read,color=race))  +theme_light() + facet_grid(.~sex)
## `geom_smooth()` using method = 'loess'

接下來觀察在總體成績方面,不同的在學階段是否存在影響

dta2 %>% 
  gather(subject,score,8:9) %>%
  ggplot(.,aes(grade,score,color=sex)) +
  stat_summary(fun.data = mean_se , geom = "pointrange",
               position = position_dodge(0.3)) + 
  facet_grid(.~subject)+
  theme_light()

結果呈現了在幼稚園性別似乎是沒有差異的,但隨著年級上升男生的數學與女生的閱讀能力好像逐漸在性別上有優勢,但仍需進一步檢定

HW3

dta3 <- read.csv("alcohol_age.csv", header = T,sep=",") %>% mutate(over21=ifelse(Age>=21,"yes","no"))
## Warning: package 'bindrcpp' was built under R version 3.4.4
# 作個變項確定是否滿21歲,然後畫圖
dta3 %>% 
  xyplot(Alcohol ~ Age, group = over21,
       data =., type = c("g", "r", "p"), 
       xlab = "Age (year)", ylab = "Mortality rate from alcohol abuse (per 100,000)")

# 參考子揚的做法畫第二張圖 
aggregate(Alcohol ~ over21, FUN = mean,data=dta3)
##   over21  Alcohol
## 1     no 1.032118
## 2    yes 1.482557
ggplot(dta3, aes(Age, Alcohol))+
  geom_point(aes(color = over21), na.rm = TRUE)+
  geom_segment(aes(x = 19, xend = 21, y = 1.032, yend = 1.032), color = "tomato")+
  geom_segment(aes(x = 21, xend = 23, y = 1.483, yend = 1.483), color = "turquoise")+
  theme(legend.position = "none")+
  labs(x = "Age (year)", y = "Mortality rate from alcohol abuse (per 100,000)")

HW4

# 將表格整理弄成檔案讀取,其中因為表格整理方便把瑞士與荷蘭稍作修正
dta4 <- read.table("dta4.txt", header=T)
# Header有點跑掉,改回來
colnames(dta4)<- c("country","25-34","35-44","45-54","55-64","65-74")
knitr::kable(dta4)
country 25-34 35-44 45-54 55-64 65-74
Canada 22 27 31 34 24
Israel 9 19 10 14 27
Japan 22 19 21 31 49
Austria 29 40 52 53 69
France 16 25 36 47 56
Germany 28 35 41 49 52
Hungary 48 65 84 81 107
Italy 7 8 11 18 27
Holland 8 11 18 20 28
Poland 26 29 36 32 28
Spain 4 7 10 16 22
Sweden 28 41 46 51 35
Swiss 22 34 41 50 51
UK 10 13 15 17 22
USA 20 22 28 33 37

初步檢視似乎男性自殺率是東歐高於西歐,看看盒鬚圖

dta4 %>% 
  gather(Age,cases,2:6) %>%
  ggplot(.,aes(reorder(country,-cases),cases)) + 
  geom_boxplot() + theme_bw() +
  labs(x="Country",y="Deaths per 100,000 from male suicides")

可以觀察到的確東歐男性的自殺率高於西歐,而北美跟西歐則差不多

再做一張以年齡為基準的圖

dta4 %>% gather(Age, Rate, 2:6) %>%
  ggplot(aes(Age, Rate))+
  geom_boxplot()+
  labs(x = "Age", y = "Deaths per 100,000 from male suicides")

HW5

dta5 <- read.table("coping.txt",header=T)
knitr::kable(head(dta5))
annoy sad afraid angry approach avoid support agressive situation sbj
4 2 2 2 1.00 2.00 1.00 2.50 Fail S2
4 4 4 2 4.00 3.00 1.25 1.50 NoPart S2
2 2 2 2 2.67 3.00 1.00 2.33 TeacNo S2
4 3 4 4 4.00 1.50 3.25 1.00 Bully S2
4 2 1 1 1.00 2.75 1.25 1.50 Work S2
4 3 1 4 2.33 2.50 1.00 3.67 MomNo S2
# 大概可以看到有情緒相關的變項跟處理策略與情境等,評分為1~4(不曾~時常)
dta5 %>% gather(emotion,score,1:4) %>% 
  ggplot(.,aes(emotion,score,color=situation)) + 
  stat_summary(fun.data = mean_se,position = position_dodge(0.3))+
  theme_bw()+
  labs(x="emotion",y="Score")

可以看到在不同情境之下,不同情感類型與分數間的關聯

dta5 %>% gather(coping,score,5:8) %>% 
  ggplot(.,aes(coping,score,color=situation)) + 
  stat_summary(fun.data = mean_se,position = position_dodge(0.3))+
  theme_bw()+
  labs(x="coping",y="Score")

可以看到在不同情境之下,不同策略與分數間的關聯

HW6

HW7

dta7 <- read.table("beautyCourseEval.txt",header=T)
dta7 %>% 
  mutate(CourseID = factor(courseID),
         Gender = factor(sex, levels = c(0, 1), labels = c("Male", "Female"))) %>%
  xyplot(eval ~ beauty | CourseID, group = Gender,
       data = ., type = c("g", "r", "p"),
       index.cond = function(x, y) coef(lm(y ~ x))[2], 
       xlab = "Beauty score", ylab = "Course evaluation score",
       lattice.options = list(panel.error = "warning"), auto.key = list(column = 2))

HW8

library(sas7bdat)
dta8 <- read.sas7bdat("sales.sas7bdat")
dta8 %>%   
  mutate(region = factor(region, 1:4, c("Nothern", "Southern", "Eastern", "Western")),
         district = factor(district, 1:5, c("North East", "South East", "South West", "North West", "Central West")),
         quarter = factor(quarter, 1:4, c("1st", "2nd", "3rd", "4th")),
         month = factor(month, 1:12, month.abb),
         sales = ifelse(sales < 0, 0, sales),
         market = factor(market))
##     product category customer year month quarter market sales expense
## 1     Shoes    Shoes     Acme 2001   Jan     1st      1   300     240
## 2     Boots    Shoes     Acme 2001   Jan     1st      1  2200    1540
## 3  Slippers Slippers     Acme 2001   Jan     1st      1   900     540
## 4     Shoes    Shoes     Acme 2001   Feb     1st      1   100      80
## 5     Boots    Shoes     Acme 2001   Feb     1st      1  1400     980
## 6  Slippers Slippers     Acme 2001   Feb     1st      1     0       0
## 7     Shoes    Shoes     Acme 2001   Mar     1st      1   600     480
## 8     Boots    Shoes     Acme 2001   Mar     1st      1     0       0
## 9  Slippers Slippers     Acme 2001   Mar     1st      1  1400     840
## 10    Shoes    Shoes     Acme 2001   Apr     2nd      1  2600    2080
## 11    Boots    Shoes     Acme 2001   Apr     2nd      1  1500    1050
## 12 Slippers Slippers     Acme 2001   Apr     2nd      1  1000     600
## 13    Shoes    Shoes     Acme 2001   May     2nd      2  2400    1920
## 14    Boots    Shoes     Acme 2001   May     2nd      2     0    -280
## 15 Slippers Slippers     Acme 2001   May     2nd      2   700     420
## 16    Shoes    Shoes     Acme 2001   Jun     2nd      2  1000     800
## 17    Boots    Shoes     Acme 2001   Jun     2nd      2  1100     770
## 18 Slippers Slippers     Acme 2001   Jun     2nd      2   300     180
## 19    Shoes    Shoes     Acme 2001   Jul     3rd      2  3700    2960
## 20    Boots    Shoes     Acme 2001   Jul     3rd      2  1600    1120
## 21 Slippers Slippers     Acme 2001   Jul     3rd      2  2800    1680
## 22    Shoes    Shoes  TwoFeet 2001   Aug     3rd      2  2800    2240
## 23    Boots    Shoes  TwoFeet 2001   Aug     3rd      2  2800    1960
## 24 Slippers Slippers  TwoFeet 2001   Aug     3rd      2  1100     660
## 25    Shoes    Shoes     Acme 2001   Sep     3rd      2  1900    1520
## 26    Boots    Shoes     Acme 2001   Sep     3rd      2  2300    1610
## 27 Slippers Slippers     Acme 2001   Sep     3rd      2  1800    1080
## 28    Shoes    Shoes     Acme 2001   Oct     4th      2  2100    1680
## 29    Boots    Shoes     Acme 2001   Oct     4th      2  3200    2240
## 30 Slippers Slippers     Acme 2001   Oct     4th      2  4700    2820
## 31    Shoes    Shoes     BigX 2001   Nov     4th      2  2900    2320
## 32    Boots    Shoes     BigX 2001   Nov     4th      2  2500    1750
## 33 Slippers Slippers     BigX 2001   Nov     4th      2  3800    2280
## 34    Shoes    Shoes     Acme 2001   Dec     4th      2  2300    1840
## 35    Boots    Shoes     Acme 2001   Dec     4th      2  1900    1330
## 36 Slippers Slippers     Acme 2001   Dec     4th      2  1200     720
## 37    Shoes    Shoes     Acme 2002   Jan     1st      1  1000     800
## 38    Boots    Shoes     Acme 2002   Jan     1st      1     0    -980
## 39 Slippers Slippers     Acme 2002   Jan     1st      1  1100     660
## 40    Shoes    Shoes     Acme 2002   Feb     1st      1  1000     800
## 41    Boots    Shoes     Acme 2002   Feb     1st      1   700     490
## 42 Slippers Slippers     Acme 2002   Feb     1st      1  2300    1380
## 43    Shoes    Shoes     Acme 2002   Mar     1st      1   400     320
## 44    Boots    Shoes     Acme 2002   Mar     1st      1     0    -630
## 45 Slippers Slippers     Acme 2002   Mar     1st      1  1100     660
## 46    Shoes    Shoes     Acme 2002   Apr     2nd      1  2100    1680
## 47    Boots    Shoes     Acme 2002   Apr     2nd      1  1400     980
## 48 Slippers Slippers     Acme 2002   Apr     2nd      1   500     300
## 49    Shoes    Shoes     Acme 2002   May     2nd      2  1100     880
## 50    Boots    Shoes     Acme 2002   May     2nd      2   200     140
## 51 Slippers Slippers     Acme 2002   May     2nd      2  1300     780
## 52    Boots    Shoes     Acme 2002   Jun     2nd      2  1800    1260
## 53    Shoes    Shoes     Acme 2002   Jun     2nd      2  1700    1360
## 54 Slippers Slippers     Acme 2002   Jun     2nd      2  1400     840
## 55    Boots    Shoes     Acme 2002   Jul     3rd      2     0    -630
## 56    Shoes    Shoes     Acme 2002   Jul     3rd      2  1700    1360
## 57 Slippers Slippers     Acme 2002   Jul     3rd      2  1400     840
## 58    Boots    Shoes  TwoFeet 2002   Aug     3rd      2  3500    2450
## 59    Shoes    Shoes  TwoFeet 2002   Aug     3rd      2  1300    1040
## 60 Slippers Slippers  TwoFeet 2002   Aug     3rd      2  3200    1920
## 61    Boots    Shoes     Acme 2002   Sep     3rd      2  3000    2100
## 62    Shoes    Shoes     Acme 2002   Sep     3rd      2  3200    2560
## 63 Slippers Slippers     Acme 2002   Sep     3rd      2  1200     720
## 64    Boots    Shoes     Acme 2002   Oct     4th      2  3000    2100
## 65    Shoes    Shoes     Acme 2002   Oct     4th      2  4000    2400
## 66 Slippers Slippers     Acme 2002   Oct     4th      2  4000    2400
## 67    Boots    Shoes     BigX 2002   Nov     4th      2  3100    2170
## 68    Shoes    Shoes     BigX 2002   Nov     4th      2  1700    1360
## 69 Slippers Slippers     BigX 2002   Nov     4th      2  1400     840
## 70    Boots    Shoes     Acme 2002   Dec     4th      2  2300    1610
## 71    Shoes    Shoes     Acme 2002   Dec     4th      2  1800    1440
## 72 Slippers Slippers     Acme 2002   Dec     4th      2  3200    1920
##      region     district return constantv quantity
## 1   Nothern   North East      0         1       30
## 2   Nothern   North East      0         1      275
## 3   Nothern   North East      0         1      180
## 4   Nothern   North East      0         1       10
## 5   Nothern   North East      0         1      175
## 6   Nothern   North East      0         1        0
## 7   Nothern   North East      0         1       60
## 8   Nothern   North East      0         1        0
## 9   Nothern   North East      0         1      280
## 10  Nothern   North East      0         1      260
## 11  Nothern   North East      0         1      187
## 12  Nothern   North East      0         1      200
## 13  Nothern   North East      0         1      240
## 14  Nothern   North East      0         1       50
## 15  Nothern   North East      0         1      140
## 16  Nothern   North East      0         1      100
## 17  Nothern   North East      0         1      137
## 18  Nothern   North East      0         1       60
## 19  Nothern   North East      0         1      370
## 20  Nothern   North East      0         1      200
## 21  Nothern   North East      0         1      560
## 22 Southern   South West      3         1      280
## 23 Southern   South West      3         1      350
## 24 Southern   South West      3         1      220
## 25  Nothern   North East      0         1      190
## 26  Nothern   North East      0         1      287
## 27  Nothern   North East      0         1      360
## 28  Nothern   North East      0         1      210
## 29  Nothern   North East      0         1      400
## 30  Nothern   North East      0         1      940
## 31  Western Central West      5         1      290
## 32  Western Central West      5         1      312
## 33  Western Central West      5         1      760
## 34  Nothern   North East      0         1      230
## 35  Nothern   North East      0         1      237
## 36  Nothern   North East      0         1      240
## 37  Nothern   North East      0         1      100
## 38  Nothern   North East      0         1      175
## 39  Nothern   North East      0         1      220
## 40  Nothern   North East      0         1      100
## 41  Nothern   North East      0         1       87
## 42  Nothern   North East      0         1      460
## 43  Nothern   North East      0         1       40
## 44  Nothern   North East      0         1      112
## 45  Nothern   North East      0         1      220
## 46  Nothern   North East      0         1      210
## 47  Nothern   North East      0         1      175
## 48  Nothern   North East      0         1      100
## 49  Nothern   North East      0         1      110
## 50  Nothern   North East      0         1       25
## 51  Nothern   North East      0         1      260
## 52  Nothern   North East      0         1      225
## 53  Nothern   North East      0         1      170
## 54  Nothern   North East      0         1      280
## 55  Nothern   North East      0         1      112
## 56  Nothern   North East      0         1      170
## 57  Nothern   North East      0         1      280
## 58 Southern   South West      3         1      437
## 59 Southern   South West      3         1      130
## 60 Southern   South West      3         1      640
## 61  Nothern   North East      0         1      375
## 62  Nothern   North East      0         1      320
## 63  Nothern   North East      0         1      240
## 64  Nothern   North East      0         1      375
## 65  Nothern   North East      0         1      210
## 66  Nothern   North East      0         1      800
## 67  Western Central West      5         1      387
## 68  Western Central West      5         1      170
## 69  Western Central West      5         1      280
## 70  Nothern   North East      0         1      287
## 71  Nothern   North East      0         1      180
## 72  Nothern   North East      0         1      640
# 畫個不同月份跟季期的銷售量的圖
dta8 %>% 
  ggplot(aes(month, sales, color = quarter))+
  stat_summary(fun.data = mean_cl_boot, geom = "pointrange", alpha = .5)+
  stat_summary(aes(group = 1), fun.y = mean, geom = "line")+
  stat_smooth(aes(group = 1), method = lm, alpha = .5)+
  theme_bw()+
  facet_wrap(~year)+
  labs(x = "Months", y = "Products average Sales", title = "Sales")

HW9

# 讀取檔案後可以發現,基本上色彩在桃園市所做的分析當中並沒有太大的意義
# 在第一部分的分析上,突然使用盒鬚圖有點不知所云,似乎沒有一定要用的必要性
# 第一部分的第二個月分趨勢,前面分成每個月分的長條圖有點看不出變化,可以以下方的趨勢折線圖放在一起,並且加入整體的線條,比較容易看出差別

# 2.2部分對於長方圖直條圖的差異好像有點問題,然後並不是比例類型的變項用著色似乎不太容易看清楚
# 每個月分做成三個type的折線圖似乎可以看得比較清楚?

# 檔案有點讀不進去,修正中