5/7 homework

set up

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(tidyr)
pacman::p_load(tidyverse, lattice, magrittr)

ex2

dta <- read.csv("C:/Users/she22_000/Documents/nlsy86long.csv", header = T, sep = ",")

透過觀察性別與種族在閱讀跟數學分數上的分布圖，發現不同種族的男女與數學成績和閱讀成績呈正相關。

dta %>% 
  ggplot()+ geom_smooth(mapping=aes(math,read,color=sex))  +theme_light() + facet_grid(.~race)

## `geom_smooth()` using method = 'loess'

觀察不同就學階段的男女在數學分數與閱讀分數上的差異，發現在0年級也就是幼稚園階段男女的差異不明顯，但隨著就學階段的提升，性別在分數上好像產生了作用，但還需要進一步檢驗此差異是否顯著。

dta %>% gather(subject,score,8:9) %>%
  ggplot(.,aes(grade,score,color=sex)) +
  stat_summary(fun.data = mean_se , geom = "pointrange",
               position = position_dodge(0.3)) + 
  facet_grid(.~subject)+
  theme_light()

觀察不同就學階段及不同種族的學生在閱讀及數學分數上的差異，發現種族比起性別在數學及閱讀分數上產生了更大的作用，因種族的不同，數學及閱讀分數的差距在不同就學階段有更大的差距，不過其差距是否顯著需進一步檢驗。

dta %>% gather(subject,score,8:9) %>%
  ggplot(.,aes(year,score,color=race)) +
  stat_summary(fun.data = mean_se , geom = "pointrange",
               position = position_dodge(0.3)) + 
  facet_grid(.~subject)+
  theme_light()

ex3

dta3 <- read.csv("C:/Users/she22_000/Documents/alcohol_age.csv", header = T, sep = ",")%>% 
 mutate(grp = if_else(Age >= 21, "Yes", "No"))
## 圖1
xyplot(Alcohol ~ Age, group = grp,
       data = dta3, type = c("g", "r", "p"), auto.key = list(column = 2),
       xlab = "Age (year)", ylab = "Mortality rate from alcohol abuse (per 100,000)")

aggregate(Alcohol ~ grp, FUN = mean, data = dta3) ## 圖2

##   grp  Alcohol
## 1  No 1.032118
## 2 Yes 1.482557

ggplot(dta3, aes(Age, Alcohol))+
  geom_point(aes(color = grp), na.rm = TRUE)+
  geom_segment(aes(x = 19, xend = 21, y = 1.032, yend = 1.032), color = "tomato")+
  geom_segment(aes(x = 21, xend = 23, y = 1.483, yend = 1.483), color = "turquoise")+
  theme(legend.position = "none")+
  labs(x = "Age (year)", y = "Mortality rate from alcohol abuse (per 100,000)")

圖2更能解釋數據，因為可直接看出以年齡21歲為分界，造成的酗酒死亡率的趨勢。

ex4

dta4 <- read.csv("C:/Users/she22_000/Documents/dta04.csv", header = T)
colnames(dta4)<- c("country","25-34","35-44","45-54","55-64","65-74")
knitr::kable(dta4)

country	25-34	35-44	45-54	55-64	65-74
Canada	22	27	31	34	24
Israel	9	19	10	14	27
Japan	22	19	21	31	49
Austria	29	40	52	53	69
France	16	25	36	47	56
Germany	28	35	41	49	52
Hungary	48	65	84	81	107
Italy	7	8	11	18	27
Holland	8	11	18	20	28
Poland	26	29	36	32	28
Spain	4	7	10	16	22
Sweden	28	41	46	51	35
Swiss	22	34	41	50	51
UK	10	13	15	17	22
USA	20	22	28	33	37

ex5

dta5 <- read.table("C:/Users/she22_000/Documents/coping.txt", header = T)
knitr::kable(head(dta5))

annoy	sad	afraid	angry	approach	avoid	support	agressive	situation	sbj
4	2	2	2	1.00	2.00	1.00	2.50	Fail	S2
4	4	4	2	4.00	3.00	1.25	1.50	NoPart	S2
2	2	2	2	2.67	3.00	1.00	2.33	TeacNo	S2
4	3	4	4	4.00	1.50	3.25	1.00	Bully	S2
4	2	1	1	1.00	2.75	1.25	1.50	Work	S2
4	3	1	4	2.33	2.50	1.00	3.67	MomNo	S2

dta5 %>% gather(emotion,score,1:4) %>% 
  ggplot(.,aes(emotion,score,color=situation)) + 
  stat_summary(fun.data = mean_se,position = position_dodge(0.3))+
  theme_bw()+
  labs(x="emotion",y="Score")

上圖可看出不同情況時，所產生的情緒與分數之間的關係。

dta5 %>% gather(coping,score,5:8) %>% 
  ggplot(.,aes(coping,score,color=situation)) + 
  stat_summary(fun.data = mean_se,position = position_dodge(0.3))+
  theme_bw()+
  labs(x="coping",y="Score")

可看出在不同情況時，不同策略與分數之間的關係。

ex6

ex7

dta7 <- read.table("C:/Users/she22_000/Documents/beautyCourseEval.txt", header = T)
colnames(dta7)<- c("eval","beauty","sex","age","minority","tenure,","ID")
dta7 %>%  mutate(courseID=as.factor(ID)) %>%
  mutate(gender=factor(dta7$sex,levels = c(0,1),labels = c("male","female"))) %>%
  xyplot(eval ~ beauty|courseID, data = ., type = c("r", "g", "p"),
         index.cond=function(x, y)coef(lm(y ~ x))[1])

ex8

library(sas7bdat)
dta8 <- read.sas7bdat("sales.sas7bdat")
dta8 %>%   
  mutate(region = factor(region, 1:4, c("Nothern", "Southern", "Eastern", "Western")),
         district = factor(district, 1:5, c("North East", "South East", "South West", "North West", "Central West")),
         quarter = factor(quarter, 1:4, c("1st", "2nd", "3rd", "4th")),
         month = factor(month, 1:12, month.abb),
         sales = ifelse(sales < 0, 0, sales),
         market = factor(market))

##     product category customer year month quarter market sales expense
## 1     Shoes    Shoes     Acme 2001   Jan     1st      1   300     240
## 2     Boots    Shoes     Acme 2001   Jan     1st      1  2200    1540
## 3  Slippers Slippers     Acme 2001   Jan     1st      1   900     540
## 4     Shoes    Shoes     Acme 2001   Feb     1st      1   100      80
## 5     Boots    Shoes     Acme 2001   Feb     1st      1  1400     980
## 6  Slippers Slippers     Acme 2001   Feb     1st      1     0       0
## 7     Shoes    Shoes     Acme 2001   Mar     1st      1   600     480
## 8     Boots    Shoes     Acme 2001   Mar     1st      1     0       0
## 9  Slippers Slippers     Acme 2001   Mar     1st      1  1400     840
## 10    Shoes    Shoes     Acme 2001   Apr     2nd      1  2600    2080
## 11    Boots    Shoes     Acme 2001   Apr     2nd      1  1500    1050
## 12 Slippers Slippers     Acme 2001   Apr     2nd      1  1000     600
## 13    Shoes    Shoes     Acme 2001   May     2nd      2  2400    1920
## 14    Boots    Shoes     Acme 2001   May     2nd      2     0    -280
## 15 Slippers Slippers     Acme 2001   May     2nd      2   700     420
## 16    Shoes    Shoes     Acme 2001   Jun     2nd      2  1000     800
## 17    Boots    Shoes     Acme 2001   Jun     2nd      2  1100     770
## 18 Slippers Slippers     Acme 2001   Jun     2nd      2   300     180
## 19    Shoes    Shoes     Acme 2001   Jul     3rd      2  3700    2960
## 20    Boots    Shoes     Acme 2001   Jul     3rd      2  1600    1120
## 21 Slippers Slippers     Acme 2001   Jul     3rd      2  2800    1680
## 22    Shoes    Shoes  TwoFeet 2001   Aug     3rd      2  2800    2240
## 23    Boots    Shoes  TwoFeet 2001   Aug     3rd      2  2800    1960
## 24 Slippers Slippers  TwoFeet 2001   Aug     3rd      2  1100     660
## 25    Shoes    Shoes     Acme 2001   Sep     3rd      2  1900    1520
## 26    Boots    Shoes     Acme 2001   Sep     3rd      2  2300    1610
## 27 Slippers Slippers     Acme 2001   Sep     3rd      2  1800    1080
## 28    Shoes    Shoes     Acme 2001   Oct     4th      2  2100    1680
## 29    Boots    Shoes     Acme 2001   Oct     4th      2  3200    2240
## 30 Slippers Slippers     Acme 2001   Oct     4th      2  4700    2820
## 31    Shoes    Shoes     BigX 2001   Nov     4th      2  2900    2320
## 32    Boots    Shoes     BigX 2001   Nov     4th      2  2500    1750
## 33 Slippers Slippers     BigX 2001   Nov     4th      2  3800    2280
## 34    Shoes    Shoes     Acme 2001   Dec     4th      2  2300    1840
## 35    Boots    Shoes     Acme 2001   Dec     4th      2  1900    1330
## 36 Slippers Slippers     Acme 2001   Dec     4th      2  1200     720
## 37    Shoes    Shoes     Acme 2002   Jan     1st      1  1000     800
## 38    Boots    Shoes     Acme 2002   Jan     1st      1     0    -980
## 39 Slippers Slippers     Acme 2002   Jan     1st      1  1100     660
## 40    Shoes    Shoes     Acme 2002   Feb     1st      1  1000     800
## 41    Boots    Shoes     Acme 2002   Feb     1st      1   700     490
## 42 Slippers Slippers     Acme 2002   Feb     1st      1  2300    1380
## 43    Shoes    Shoes     Acme 2002   Mar     1st      1   400     320
## 44    Boots    Shoes     Acme 2002   Mar     1st      1     0    -630
## 45 Slippers Slippers     Acme 2002   Mar     1st      1  1100     660
## 46    Shoes    Shoes     Acme 2002   Apr     2nd      1  2100    1680
## 47    Boots    Shoes     Acme 2002   Apr     2nd      1  1400     980
## 48 Slippers Slippers     Acme 2002   Apr     2nd      1   500     300
## 49    Shoes    Shoes     Acme 2002   May     2nd      2  1100     880
## 50    Boots    Shoes     Acme 2002   May     2nd      2   200     140
## 51 Slippers Slippers     Acme 2002   May     2nd      2  1300     780
## 52    Boots    Shoes     Acme 2002   Jun     2nd      2  1800    1260
## 53    Shoes    Shoes     Acme 2002   Jun     2nd      2  1700    1360
## 54 Slippers Slippers     Acme 2002   Jun     2nd      2  1400     840
## 55    Boots    Shoes     Acme 2002   Jul     3rd      2     0    -630
## 56    Shoes    Shoes     Acme 2002   Jul     3rd      2  1700    1360
## 57 Slippers Slippers     Acme 2002   Jul     3rd      2  1400     840
## 58    Boots    Shoes  TwoFeet 2002   Aug     3rd      2  3500    2450
## 59    Shoes    Shoes  TwoFeet 2002   Aug     3rd      2  1300    1040
## 60 Slippers Slippers  TwoFeet 2002   Aug     3rd      2  3200    1920
## 61    Boots    Shoes     Acme 2002   Sep     3rd      2  3000    2100
## 62    Shoes    Shoes     Acme 2002   Sep     3rd      2  3200    2560
## 63 Slippers Slippers     Acme 2002   Sep     3rd      2  1200     720
## 64    Boots    Shoes     Acme 2002   Oct     4th      2  3000    2100
## 65    Shoes    Shoes     Acme 2002   Oct     4th      2  4000    2400
## 66 Slippers Slippers     Acme 2002   Oct     4th      2  4000    2400
## 67    Boots    Shoes     BigX 2002   Nov     4th      2  3100    2170
## 68    Shoes    Shoes     BigX 2002   Nov     4th      2  1700    1360
## 69 Slippers Slippers     BigX 2002   Nov     4th      2  1400     840
## 70    Boots    Shoes     Acme 2002   Dec     4th      2  2300    1610
## 71    Shoes    Shoes     Acme 2002   Dec     4th      2  1800    1440
## 72 Slippers Slippers     Acme 2002   Dec     4th      2  3200    1920
##      region     district return constantv quantity
## 1   Nothern   North East      0         1       30
## 2   Nothern   North East      0         1      275
## 3   Nothern   North East      0         1      180
## 4   Nothern   North East      0         1       10
## 5   Nothern   North East      0         1      175
## 6   Nothern   North East      0         1        0
## 7   Nothern   North East      0         1       60
## 8   Nothern   North East      0         1        0
## 9   Nothern   North East      0         1      280
## 10  Nothern   North East      0         1      260
## 11  Nothern   North East      0         1      187
## 12  Nothern   North East      0         1      200
## 13  Nothern   North East      0         1      240
## 14  Nothern   North East      0         1       50
## 15  Nothern   North East      0         1      140
## 16  Nothern   North East      0         1      100
## 17  Nothern   North East      0         1      137
## 18  Nothern   North East      0         1       60
## 19  Nothern   North East      0         1      370
## 20  Nothern   North East      0         1      200
## 21  Nothern   North East      0         1      560
## 22 Southern   South West      3         1      280
## 23 Southern   South West      3         1      350
## 24 Southern   South West      3         1      220
## 25  Nothern   North East      0         1      190
## 26  Nothern   North East      0         1      287
## 27  Nothern   North East      0         1      360
## 28  Nothern   North East      0         1      210
## 29  Nothern   North East      0         1      400
## 30  Nothern   North East      0         1      940
## 31  Western Central West      5         1      290
## 32  Western Central West      5         1      312
## 33  Western Central West      5         1      760
## 34  Nothern   North East      0         1      230
## 35  Nothern   North East      0         1      237
## 36  Nothern   North East      0         1      240
## 37  Nothern   North East      0         1      100
## 38  Nothern   North East      0         1      175
## 39  Nothern   North East      0         1      220
## 40  Nothern   North East      0         1      100
## 41  Nothern   North East      0         1       87
## 42  Nothern   North East      0         1      460
## 43  Nothern   North East      0         1       40
## 44  Nothern   North East      0         1      112
## 45  Nothern   North East      0         1      220
## 46  Nothern   North East      0         1      210
## 47  Nothern   North East      0         1      175
## 48  Nothern   North East      0         1      100
## 49  Nothern   North East      0         1      110
## 50  Nothern   North East      0         1       25
## 51  Nothern   North East      0         1      260
## 52  Nothern   North East      0         1      225
## 53  Nothern   North East      0         1      170
## 54  Nothern   North East      0         1      280
## 55  Nothern   North East      0         1      112
## 56  Nothern   North East      0         1      170
## 57  Nothern   North East      0         1      280
## 58 Southern   South West      3         1      437
## 59 Southern   South West      3         1      130
## 60 Southern   South West      3         1      640
## 61  Nothern   North East      0         1      375
## 62  Nothern   North East      0         1      320
## 63  Nothern   North East      0         1      240
## 64  Nothern   North East      0         1      375
## 65  Nothern   North East      0         1      210
## 66  Nothern   North East      0         1      800
## 67  Western Central West      5         1      387
## 68  Western Central West      5         1      170
## 69  Western Central West      5         1      280
## 70  Nothern   North East      0         1      287
## 71  Nothern   North East      0         1      180
## 72  Nothern   North East      0         1      640

dta8 %>% 
  ggplot(aes(month, sales, color = quarter))+
  stat_summary(fun.data = mean_cl_boot, geom = "pointrange", alpha = .5)+
  stat_summary(aes(group = 1), fun.y = mean, geom = "line")+
  stat_smooth(aes(group = 1), method = lm, alpha = .5)+
  theme_bw()+
  facet_wrap(~product)+
  labs(x = "Months", y = "Products average Sales", title = "Sales")

## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function

## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function

## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function

上圖為不同鞋款在不同月份及季的銷量圖。

dta8 %>% 
  ggplot(aes(month, sales, color = quarter))+
  stat_summary(fun.data = mean_cl_boot, geom = "pointrange", alpha = .5)+
  stat_summary(aes(group = 1), fun.y = mean, geom = "line")+
  stat_smooth(aes(group = 1), method = lm, alpha = .5)+
  theme_bw()+
  facet_wrap(~year)+
  labs(x = "Months", y = "Products average Sales", title = "Sales")

## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function

## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function

上圖為不同月份及季的總銷量圖。

ex9

在資料分析-月份趨勢中其“針對前五舉發種類，我們產生每個月的長條圖統計”的12張圖是浪費空間的，使用長條圖來展示數量的多寡或變化情形讓人很難一眼看出變化，可以直接呈現網頁下方的趨勢線，並再做一張所有交通違規種類的合併並趨勢圖。

5/7 homework

jiru shih

2018年5月13日

set up

ex2

ex3

ex4

ex5

ex6

ex7

ex8

ex9