library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
pacman::p_load(tidyverse, lattice, magrittr)
dta <- read.csv("C:/Users/she22_000/Documents/nlsy86long.csv", header = T, sep = ",")
透過觀察性別與種族在閱讀跟數學分數上的分布圖,發現不同種族的男女與數學成績和閱讀成績呈正相關。
dta %>%
ggplot()+ geom_smooth(mapping=aes(math,read,color=sex)) +theme_light() + facet_grid(.~race)
## `geom_smooth()` using method = 'loess'
觀察不同就學階段的男女在數學分數與閱讀分數上的差異,發現在0年級也就是幼稚園階段男女的差異不明顯,但隨著就學階段的提升,性別在分數上好像產生了作用,但還需要進一步檢驗此差異是否顯著。
dta %>% gather(subject,score,8:9) %>%
ggplot(.,aes(grade,score,color=sex)) +
stat_summary(fun.data = mean_se , geom = "pointrange",
position = position_dodge(0.3)) +
facet_grid(.~subject)+
theme_light()
觀察不同就學階段及不同種族的學生在閱讀及數學分數上的差異,發現種族比起性別在數學及閱讀分數上產生了更大的作用,因種族的不同,數學及閱讀分數的差距在不同就學階段有更大的差距,不過其差距是否顯著需進一步檢驗。
dta %>% gather(subject,score,8:9) %>%
ggplot(.,aes(year,score,color=race)) +
stat_summary(fun.data = mean_se , geom = "pointrange",
position = position_dodge(0.3)) +
facet_grid(.~subject)+
theme_light()
dta3 <- read.csv("C:/Users/she22_000/Documents/alcohol_age.csv", header = T, sep = ",")%>%
mutate(grp = if_else(Age >= 21, "Yes", "No"))
## 圖1
xyplot(Alcohol ~ Age, group = grp,
data = dta3, type = c("g", "r", "p"), auto.key = list(column = 2),
xlab = "Age (year)", ylab = "Mortality rate from alcohol abuse (per 100,000)")
aggregate(Alcohol ~ grp, FUN = mean, data = dta3) ## 圖2
## grp Alcohol
## 1 No 1.032118
## 2 Yes 1.482557
ggplot(dta3, aes(Age, Alcohol))+
geom_point(aes(color = grp), na.rm = TRUE)+
geom_segment(aes(x = 19, xend = 21, y = 1.032, yend = 1.032), color = "tomato")+
geom_segment(aes(x = 21, xend = 23, y = 1.483, yend = 1.483), color = "turquoise")+
theme(legend.position = "none")+
labs(x = "Age (year)", y = "Mortality rate from alcohol abuse (per 100,000)")
圖2更能解釋數據,因為可直接看出以年齡21歲為分界,造成的酗酒死亡率的趨勢。
dta4 <- read.csv("C:/Users/she22_000/Documents/dta04.csv", header = T)
colnames(dta4)<- c("country","25-34","35-44","45-54","55-64","65-74")
knitr::kable(dta4)
| country | 25-34 | 35-44 | 45-54 | 55-64 | 65-74 |
|---|---|---|---|---|---|
| Canada | 22 | 27 | 31 | 34 | 24 |
| Israel | 9 | 19 | 10 | 14 | 27 |
| Japan | 22 | 19 | 21 | 31 | 49 |
| Austria | 29 | 40 | 52 | 53 | 69 |
| France | 16 | 25 | 36 | 47 | 56 |
| Germany | 28 | 35 | 41 | 49 | 52 |
| Hungary | 48 | 65 | 84 | 81 | 107 |
| Italy | 7 | 8 | 11 | 18 | 27 |
| Holland | 8 | 11 | 18 | 20 | 28 |
| Poland | 26 | 29 | 36 | 32 | 28 |
| Spain | 4 | 7 | 10 | 16 | 22 |
| Sweden | 28 | 41 | 46 | 51 | 35 |
| Swiss | 22 | 34 | 41 | 50 | 51 |
| UK | 10 | 13 | 15 | 17 | 22 |
| USA | 20 | 22 | 28 | 33 | 37 |
dta5 <- read.table("C:/Users/she22_000/Documents/coping.txt", header = T)
knitr::kable(head(dta5))
| annoy | sad | afraid | angry | approach | avoid | support | agressive | situation | sbj |
|---|---|---|---|---|---|---|---|---|---|
| 4 | 2 | 2 | 2 | 1.00 | 2.00 | 1.00 | 2.50 | Fail | S2 |
| 4 | 4 | 4 | 2 | 4.00 | 3.00 | 1.25 | 1.50 | NoPart | S2 |
| 2 | 2 | 2 | 2 | 2.67 | 3.00 | 1.00 | 2.33 | TeacNo | S2 |
| 4 | 3 | 4 | 4 | 4.00 | 1.50 | 3.25 | 1.00 | Bully | S2 |
| 4 | 2 | 1 | 1 | 1.00 | 2.75 | 1.25 | 1.50 | Work | S2 |
| 4 | 3 | 1 | 4 | 2.33 | 2.50 | 1.00 | 3.67 | MomNo | S2 |
dta5 %>% gather(emotion,score,1:4) %>%
ggplot(.,aes(emotion,score,color=situation)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3))+
theme_bw()+
labs(x="emotion",y="Score")
上圖可看出不同情況時,所產生的情緒與分數之間的關係。
dta5 %>% gather(coping,score,5:8) %>%
ggplot(.,aes(coping,score,color=situation)) +
stat_summary(fun.data = mean_se,position = position_dodge(0.3))+
theme_bw()+
labs(x="coping",y="Score")
可看出在不同情況時,不同策略與分數之間的關係。
dta7 <- read.table("C:/Users/she22_000/Documents/beautyCourseEval.txt", header = T)
colnames(dta7)<- c("eval","beauty","sex","age","minority","tenure,","ID")
dta7 %>% mutate(courseID=as.factor(ID)) %>%
mutate(gender=factor(dta7$sex,levels = c(0,1),labels = c("male","female"))) %>%
xyplot(eval ~ beauty|courseID, data = ., type = c("r", "g", "p"),
index.cond=function(x, y)coef(lm(y ~ x))[1])
library(sas7bdat)
dta8 <- read.sas7bdat("sales.sas7bdat")
dta8 %>%
mutate(region = factor(region, 1:4, c("Nothern", "Southern", "Eastern", "Western")),
district = factor(district, 1:5, c("North East", "South East", "South West", "North West", "Central West")),
quarter = factor(quarter, 1:4, c("1st", "2nd", "3rd", "4th")),
month = factor(month, 1:12, month.abb),
sales = ifelse(sales < 0, 0, sales),
market = factor(market))
## product category customer year month quarter market sales expense
## 1 Shoes Shoes Acme 2001 Jan 1st 1 300 240
## 2 Boots Shoes Acme 2001 Jan 1st 1 2200 1540
## 3 Slippers Slippers Acme 2001 Jan 1st 1 900 540
## 4 Shoes Shoes Acme 2001 Feb 1st 1 100 80
## 5 Boots Shoes Acme 2001 Feb 1st 1 1400 980
## 6 Slippers Slippers Acme 2001 Feb 1st 1 0 0
## 7 Shoes Shoes Acme 2001 Mar 1st 1 600 480
## 8 Boots Shoes Acme 2001 Mar 1st 1 0 0
## 9 Slippers Slippers Acme 2001 Mar 1st 1 1400 840
## 10 Shoes Shoes Acme 2001 Apr 2nd 1 2600 2080
## 11 Boots Shoes Acme 2001 Apr 2nd 1 1500 1050
## 12 Slippers Slippers Acme 2001 Apr 2nd 1 1000 600
## 13 Shoes Shoes Acme 2001 May 2nd 2 2400 1920
## 14 Boots Shoes Acme 2001 May 2nd 2 0 -280
## 15 Slippers Slippers Acme 2001 May 2nd 2 700 420
## 16 Shoes Shoes Acme 2001 Jun 2nd 2 1000 800
## 17 Boots Shoes Acme 2001 Jun 2nd 2 1100 770
## 18 Slippers Slippers Acme 2001 Jun 2nd 2 300 180
## 19 Shoes Shoes Acme 2001 Jul 3rd 2 3700 2960
## 20 Boots Shoes Acme 2001 Jul 3rd 2 1600 1120
## 21 Slippers Slippers Acme 2001 Jul 3rd 2 2800 1680
## 22 Shoes Shoes TwoFeet 2001 Aug 3rd 2 2800 2240
## 23 Boots Shoes TwoFeet 2001 Aug 3rd 2 2800 1960
## 24 Slippers Slippers TwoFeet 2001 Aug 3rd 2 1100 660
## 25 Shoes Shoes Acme 2001 Sep 3rd 2 1900 1520
## 26 Boots Shoes Acme 2001 Sep 3rd 2 2300 1610
## 27 Slippers Slippers Acme 2001 Sep 3rd 2 1800 1080
## 28 Shoes Shoes Acme 2001 Oct 4th 2 2100 1680
## 29 Boots Shoes Acme 2001 Oct 4th 2 3200 2240
## 30 Slippers Slippers Acme 2001 Oct 4th 2 4700 2820
## 31 Shoes Shoes BigX 2001 Nov 4th 2 2900 2320
## 32 Boots Shoes BigX 2001 Nov 4th 2 2500 1750
## 33 Slippers Slippers BigX 2001 Nov 4th 2 3800 2280
## 34 Shoes Shoes Acme 2001 Dec 4th 2 2300 1840
## 35 Boots Shoes Acme 2001 Dec 4th 2 1900 1330
## 36 Slippers Slippers Acme 2001 Dec 4th 2 1200 720
## 37 Shoes Shoes Acme 2002 Jan 1st 1 1000 800
## 38 Boots Shoes Acme 2002 Jan 1st 1 0 -980
## 39 Slippers Slippers Acme 2002 Jan 1st 1 1100 660
## 40 Shoes Shoes Acme 2002 Feb 1st 1 1000 800
## 41 Boots Shoes Acme 2002 Feb 1st 1 700 490
## 42 Slippers Slippers Acme 2002 Feb 1st 1 2300 1380
## 43 Shoes Shoes Acme 2002 Mar 1st 1 400 320
## 44 Boots Shoes Acme 2002 Mar 1st 1 0 -630
## 45 Slippers Slippers Acme 2002 Mar 1st 1 1100 660
## 46 Shoes Shoes Acme 2002 Apr 2nd 1 2100 1680
## 47 Boots Shoes Acme 2002 Apr 2nd 1 1400 980
## 48 Slippers Slippers Acme 2002 Apr 2nd 1 500 300
## 49 Shoes Shoes Acme 2002 May 2nd 2 1100 880
## 50 Boots Shoes Acme 2002 May 2nd 2 200 140
## 51 Slippers Slippers Acme 2002 May 2nd 2 1300 780
## 52 Boots Shoes Acme 2002 Jun 2nd 2 1800 1260
## 53 Shoes Shoes Acme 2002 Jun 2nd 2 1700 1360
## 54 Slippers Slippers Acme 2002 Jun 2nd 2 1400 840
## 55 Boots Shoes Acme 2002 Jul 3rd 2 0 -630
## 56 Shoes Shoes Acme 2002 Jul 3rd 2 1700 1360
## 57 Slippers Slippers Acme 2002 Jul 3rd 2 1400 840
## 58 Boots Shoes TwoFeet 2002 Aug 3rd 2 3500 2450
## 59 Shoes Shoes TwoFeet 2002 Aug 3rd 2 1300 1040
## 60 Slippers Slippers TwoFeet 2002 Aug 3rd 2 3200 1920
## 61 Boots Shoes Acme 2002 Sep 3rd 2 3000 2100
## 62 Shoes Shoes Acme 2002 Sep 3rd 2 3200 2560
## 63 Slippers Slippers Acme 2002 Sep 3rd 2 1200 720
## 64 Boots Shoes Acme 2002 Oct 4th 2 3000 2100
## 65 Shoes Shoes Acme 2002 Oct 4th 2 4000 2400
## 66 Slippers Slippers Acme 2002 Oct 4th 2 4000 2400
## 67 Boots Shoes BigX 2002 Nov 4th 2 3100 2170
## 68 Shoes Shoes BigX 2002 Nov 4th 2 1700 1360
## 69 Slippers Slippers BigX 2002 Nov 4th 2 1400 840
## 70 Boots Shoes Acme 2002 Dec 4th 2 2300 1610
## 71 Shoes Shoes Acme 2002 Dec 4th 2 1800 1440
## 72 Slippers Slippers Acme 2002 Dec 4th 2 3200 1920
## region district return constantv quantity
## 1 Nothern North East 0 1 30
## 2 Nothern North East 0 1 275
## 3 Nothern North East 0 1 180
## 4 Nothern North East 0 1 10
## 5 Nothern North East 0 1 175
## 6 Nothern North East 0 1 0
## 7 Nothern North East 0 1 60
## 8 Nothern North East 0 1 0
## 9 Nothern North East 0 1 280
## 10 Nothern North East 0 1 260
## 11 Nothern North East 0 1 187
## 12 Nothern North East 0 1 200
## 13 Nothern North East 0 1 240
## 14 Nothern North East 0 1 50
## 15 Nothern North East 0 1 140
## 16 Nothern North East 0 1 100
## 17 Nothern North East 0 1 137
## 18 Nothern North East 0 1 60
## 19 Nothern North East 0 1 370
## 20 Nothern North East 0 1 200
## 21 Nothern North East 0 1 560
## 22 Southern South West 3 1 280
## 23 Southern South West 3 1 350
## 24 Southern South West 3 1 220
## 25 Nothern North East 0 1 190
## 26 Nothern North East 0 1 287
## 27 Nothern North East 0 1 360
## 28 Nothern North East 0 1 210
## 29 Nothern North East 0 1 400
## 30 Nothern North East 0 1 940
## 31 Western Central West 5 1 290
## 32 Western Central West 5 1 312
## 33 Western Central West 5 1 760
## 34 Nothern North East 0 1 230
## 35 Nothern North East 0 1 237
## 36 Nothern North East 0 1 240
## 37 Nothern North East 0 1 100
## 38 Nothern North East 0 1 175
## 39 Nothern North East 0 1 220
## 40 Nothern North East 0 1 100
## 41 Nothern North East 0 1 87
## 42 Nothern North East 0 1 460
## 43 Nothern North East 0 1 40
## 44 Nothern North East 0 1 112
## 45 Nothern North East 0 1 220
## 46 Nothern North East 0 1 210
## 47 Nothern North East 0 1 175
## 48 Nothern North East 0 1 100
## 49 Nothern North East 0 1 110
## 50 Nothern North East 0 1 25
## 51 Nothern North East 0 1 260
## 52 Nothern North East 0 1 225
## 53 Nothern North East 0 1 170
## 54 Nothern North East 0 1 280
## 55 Nothern North East 0 1 112
## 56 Nothern North East 0 1 170
## 57 Nothern North East 0 1 280
## 58 Southern South West 3 1 437
## 59 Southern South West 3 1 130
## 60 Southern South West 3 1 640
## 61 Nothern North East 0 1 375
## 62 Nothern North East 0 1 320
## 63 Nothern North East 0 1 240
## 64 Nothern North East 0 1 375
## 65 Nothern North East 0 1 210
## 66 Nothern North East 0 1 800
## 67 Western Central West 5 1 387
## 68 Western Central West 5 1 170
## 69 Western Central West 5 1 280
## 70 Nothern North East 0 1 287
## 71 Nothern North East 0 1 180
## 72 Nothern North East 0 1 640
dta8 %>%
ggplot(aes(month, sales, color = quarter))+
stat_summary(fun.data = mean_cl_boot, geom = "pointrange", alpha = .5)+
stat_summary(aes(group = 1), fun.y = mean, geom = "line")+
stat_smooth(aes(group = 1), method = lm, alpha = .5)+
theme_bw()+
facet_wrap(~product)+
labs(x = "Months", y = "Products average Sales", title = "Sales")
## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function
## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function
## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function
上圖為不同鞋款在不同月份及季的銷量圖。
dta8 %>%
ggplot(aes(month, sales, color = quarter))+
stat_summary(fun.data = mean_cl_boot, geom = "pointrange", alpha = .5)+
stat_summary(aes(group = 1), fun.y = mean, geom = "line")+
stat_smooth(aes(group = 1), method = lm, alpha = .5)+
theme_bw()+
facet_wrap(~year)+
labs(x = "Months", y = "Products average Sales", title = "Sales")
## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function
## Warning: Computation failed in `stat_summary()`:
## Hmisc package required for this function
上圖為不同月份及季的總銷量圖。
在資料分析-月份趨勢中其“針對前五舉發種類,我們產生每個月的長條圖統計”的12張圖是浪費空間的,使用長條圖來展示數量的多寡或變化情形讓人很難一眼看出變化,可以直接呈現網頁下方的趨勢線,並再做一張所有交通違規種類的合併並趨勢圖。