Comment on what each code chunk in the following classroom markdown file is trying to achive and on its output.
載入WWGbook package中的classroom dataset
pacman::p_load(WWGbook)
data(classroom, package="WWGbook")#了解這個資料中的訊息
?classroomThe Study of Instructional Improvement (SII; Hill, Rowan, and Ball, 2004) was carried out by researchers at the University of Michigan to study the math achievement scores of first- and third-grade students in randomly selected classrooms from a national U.S. sample of elementary schools.
#顯示classroom的資料結構
str(classroom)'data.frame': 1190 obs. of 12 variables:
$ sex : int 1 0 1 0 0 1 0 0 1 0 ...
$ minority: int 1 1 1 1 1 1 1 1 1 1 ...
$ mathkind: int 448 460 511 449 425 450 452 443 422 480 ...
$ mathgain: int 32 109 56 83 53 65 51 66 88 -7 ...
$ ses : num 0.46 -0.27 -0.03 -0.38 -0.03 0.76 -0.03 0.2 0.64 0.13 ...
$ yearstea: num 1 1 1 2 2 2 2 2 2 2 ...
$ mathknow: num NA NA NA -0.11 -0.11 -0.11 -0.11 -0.11 -0.11 -0.11 ...
$ housepov: num 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 ...
$ mathprep: num 2 2 2 3.25 3.25 3.25 3.25 3.25 3.25 3.25 ...
$ classid : int 160 160 160 217 217 217 217 217 217 217 ...
$ schoolid: int 1 1 1 1 1 1 1 1 1 1 ...
$ childid : int 1 2 3 4 5 6 7 8 9 10 ...
A data frame with 1190 observations on the following 12 variables.
split data to unique school ID and only keep “schoolid”, “housepov” two variables
dta_schl <- classroom[duplicated(classroom$schoolid)==FALSE,
c("schoolid", "housepov")]duplicated 可以刪掉重複值。藉由duplicated為FALSE,保留唯一筆schoolid
dta_cls <- classroom[duplicated(classroom$classid)==FALSE,
c(11, 10, 6,7,9)]刪掉重複classid,並保留schoolid、classid、yearstea、mathknow、mathprep
dta_chld <- classroom[, c(12, 10, 11, 1:5)]排序欄位:childid、classid、schoolid、sex、minority、mathkind、mathgain、ses
sapply(list(dta_schl, dta_cls, dta_chld), dim) [,1] [,2] [,3]
[1,] 107 312 1190
[2,] 2 5 8
將3個dataset放入list,利用sapply看每個dataset的dimention
merge dataset by column name
dta_12 <- merge(x=dta_chld, y=dta_cls, by=c("classid", "schoolid"))dta_13 <- merge(x=dta_chld, y=dta_cls, by=c("classid", "schoolid"))dta_23 <- merge(x=dta_cls, y=dta_schl, by="schoolid")dta_123 <- merge(x=dta_12, y=dta_schl, by=c("schoolid"))sapply(list(dta_12, dta_13, dta_23, dta_123), dim) [,1] [,2] [,3] [,4]
[1,] 1190 1190 312 1190
[2,] 11 11 6 12
show 每一個merge完的dataset的dimention
library(dplyr)
dta_chldv<-dta_chld|> filter(schoolid== 1) #11筆
dta_clsv<-dta_cls|> filter(schoolid== 1) #2筆
dta_12s<- merge(x=dta_chldv, y=dta_clsv, by="schoolid") #22筆Merge the two data sets: state.x77{datasets} and USArrests{datasets} and compute all pair-wise correlations for numerical variables. Is there anything interesting to report?
# attributes of dataset
attributes(state.x77) $dim
[1] 50 8
$dimnames
$dimnames[[1]]
[1] "Alabama" "Alaska" "Arizona" "Arkansas"
[5] "California" "Colorado" "Connecticut" "Delaware"
[9] "Florida" "Georgia" "Hawaii" "Idaho"
[13] "Illinois" "Indiana" "Iowa" "Kansas"
[17] "Kentucky" "Louisiana" "Maine" "Maryland"
[21] "Massachusetts" "Michigan" "Minnesota" "Mississippi"
[25] "Missouri" "Montana" "Nebraska" "Nevada"
[29] "New Hampshire" "New Jersey" "New Mexico" "New York"
[33] "North Carolina" "North Dakota" "Ohio" "Oklahoma"
[37] "Oregon" "Pennsylvania" "Rhode Island" "South Carolina"
[41] "South Dakota" "Tennessee" "Texas" "Utah"
[45] "Vermont" "Virginia" "Washington" "West Virginia"
[49] "Wisconsin" "Wyoming"
$dimnames[[2]]
[1] "Population" "Income" "Illiteracy" "Life Exp" "Murder"
[6] "HS Grad" "Frost" "Area"
attributes(USArrests) $names
[1] "Murder" "Assault" "UrbanPop" "Rape"
$class
[1] "data.frame"
$row.names
[1] "Alabama" "Alaska" "Arizona" "Arkansas"
[5] "California" "Colorado" "Connecticut" "Delaware"
[9] "Florida" "Georgia" "Hawaii" "Idaho"
[13] "Illinois" "Indiana" "Iowa" "Kansas"
[17] "Kentucky" "Louisiana" "Maine" "Maryland"
[21] "Massachusetts" "Michigan" "Minnesota" "Mississippi"
[25] "Missouri" "Montana" "Nebraska" "Nevada"
[29] "New Hampshire" "New Jersey" "New Mexico" "New York"
[33] "North Carolina" "North Dakota" "Ohio" "Oklahoma"
[37] "Oregon" "Pennsylvania" "Rhode Island" "South Carolina"
[41] "South Dakota" "Tennessee" "Texas" "Utah"
[45] "Vermont" "Virginia" "Washington" "West Virginia"
[49] "Wisconsin" "Wyoming"
# merge
dta2 <- merge(state.x77,USArrests, by="row.names")
#column name
names(dta2) [1] "Row.names" "Population" "Income" "Illiteracy" "Life Exp"
[6] "Murder.x" "HS Grad" "Frost" "Area" "Murder.y"
[11] "Assault" "UrbanPop" "Rape"
merge state.x77 and USArrests by row.names.
pacman::p_load(dplyr, magrittr)
# rename
dta2 %<>%
dplyr::rename(State=Row.names, Murder1976=Murder.x, Murder1973=Murder.y)
#
dta2$State <- as.factor(dta2$State)
str(dta2)'data.frame': 50 obs. of 13 variables:
$ State : Factor w/ 50 levels "Alabama","Alaska",..: 1 2 3 4 5 6 7 8 9 10 ...
$ Population: num 3615 365 2212 2110 21198 ...
$ Income : num 3624 6315 4530 3378 5114 ...
$ Illiteracy: num 2.1 1.5 1.8 1.9 1.1 0.7 1.1 0.9 1.3 2 ...
$ Life Exp : num 69 69.3 70.5 70.7 71.7 ...
$ Murder1976: num 15.1 11.3 7.8 10.1 10.3 6.8 3.1 6.2 10.7 13.9 ...
$ HS Grad : num 41.3 66.7 58.1 39.9 62.6 63.9 56 54.6 52.6 40.6 ...
$ Frost : num 20 152 15 65 20 166 139 103 11 60 ...
$ Area : num 50708 566432 113417 51945 156361 ...
$ Murder1973: num 13.2 10 8.1 8.8 9 7.9 3.3 5.9 15.4 17.4 ...
$ Assault : int 236 263 294 190 276 204 110 238 335 211 ...
$ UrbanPop : int 58 48 80 50 91 78 77 72 80 60 ...
$ Rape : num 21.2 44.5 31 19.5 40.6 38.7 11.1 15.8 31.9 25.8 ...
pacman::p_load(dplyr, magrittr, corrplot)
# merge by row name (states)
dat2 <- merge(state.x77, USArrests, by="row.names", all=TRUE)
# column name
dat2 %<>%
dplyr::rename(State = Row.names,
Murder1976 = Murder.x,
Murder1973 = Murder.y)
# change type and show head
dat2<-dat2 |>
mutate(State= as.factor(State))res <-round(cor(dta2[,-1]), 2)
knitr::kable(res)#要有括號才跑得出來| Population | Income | Illiteracy | Life Exp | Murder1976 | HS Grad | Frost | Area | Murder1973 | Assault | UrbanPop | Rape | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Population | 1.00 | 0.21 | 0.11 | -0.07 | 0.34 | -0.10 | -0.33 | 0.02 | 0.32 | 0.32 | 0.51 | 0.31 |
| Income | 0.21 | 1.00 | -0.44 | 0.34 | -0.23 | 0.62 | 0.23 | 0.36 | -0.22 | 0.04 | 0.48 | 0.36 |
| Illiteracy | 0.11 | -0.44 | 1.00 | -0.59 | 0.70 | -0.66 | -0.67 | 0.08 | 0.71 | 0.51 | -0.06 | 0.15 |
| Life Exp | -0.07 | 0.34 | -0.59 | 1.00 | -0.78 | 0.58 | 0.26 | -0.11 | -0.78 | -0.63 | 0.27 | -0.27 |
| Murder1976 | 0.34 | -0.23 | 0.70 | -0.78 | 1.00 | -0.49 | -0.54 | 0.23 | 0.93 | 0.74 | 0.02 | 0.58 |
| HS Grad | -0.10 | 0.62 | -0.66 | 0.58 | -0.49 | 1.00 | 0.37 | 0.33 | -0.52 | -0.23 | 0.36 | 0.27 |
| Frost | -0.33 | 0.23 | -0.67 | 0.26 | -0.54 | 0.37 | 1.00 | 0.06 | -0.54 | -0.47 | -0.25 | -0.28 |
| Area | 0.02 | 0.36 | 0.08 | -0.11 | 0.23 | 0.33 | 0.06 | 1.00 | 0.15 | 0.23 | -0.06 | 0.52 |
| Murder1973 | 0.32 | -0.22 | 0.71 | -0.78 | 0.93 | -0.52 | -0.54 | 0.15 | 1.00 | 0.80 | 0.07 | 0.56 |
| Assault | 0.32 | 0.04 | 0.51 | -0.63 | 0.74 | -0.23 | -0.47 | 0.23 | 0.80 | 1.00 | 0.26 | 0.67 |
| UrbanPop | 0.51 | 0.48 | -0.06 | 0.27 | 0.02 | 0.36 | -0.25 | -0.06 | 0.07 | 0.26 | 1.00 | 0.41 |
| Rape | 0.31 | 0.36 | 0.15 | -0.27 | 0.58 | 0.27 | -0.28 | 0.52 | 0.56 | 0.67 | 0.41 | 1.00 |
class(res)[1] "matrix" "array"
library(corrplot)
corrplot(res, type = "upper", order = "hclust",
tl.col = "black", tl.srt = 45)The correlation matrix is reordered according to the correlation coefficient using “hclust” method.
tl.col (for text label color) and tl.srt (for text label string rotation) are used to change text colors and rotations.
Possible values for the argument type are : “upper”, “lower”, “full”
結論 Illiteracy has positive correlation on Rape, Assault and Murder.
Summarize the backpain{HSAUR3} into the following format:
You should provide comments for each code chunk.
# load package
pacman::p_load(HSAUR3)
# Input data
data("backpain", package="HSAUR3")
dta3 <- backpain
head(dta3) ID status driver suburban
1 1 case yes yes
2 1 control yes no
3 2 case yes yes
4 2 control yes yes
5 3 case yes no
6 3 control yes yes
str(dta3)'data.frame': 434 obs. of 4 variables:
$ ID : Factor w/ 217 levels "1","2","3","4",..: 1 1 2 2 3 3 4 4 5 5 ...
$ status : Factor w/ 2 levels "case","control": 1 2 1 2 1 2 1 2 1 2 ...
$ driver : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 1 1 2 2 ...
$ suburban: Factor w/ 2 levels "no","yes": 2 1 2 2 1 2 1 1 1 2 ...
A <- with(dta3,table(driver, suburban, status))
tbl <-with(dta3,ftable(driver, suburban, status))
tbl1<-with(dta3,ftable(addmargins(A, c(1, 3),
FUN = list(list(Sum = sum), Sum = sum))))Margins computed over dimensions
in the following order:
1: driver
2: status
tbl status case control
driver suburban
no no 26 47
yes 6 7
yes no 64 63
yes 121 100
tbl1 status case control Sum
driver suburban
no no 26 47 73
yes 6 7 13
yes no 64 63 127
yes 121 100 221
Sum no 90 110 200
yes 127 107 234
addmargins: Puts Arbitrary Margins on Multidimensional Tables or Arrays
pacman::p_load(dplyr, magrittr ,tidyr)
dta3 |>
pivot_wider(names_from = status, values_from = status)|>
group_by(driver, suburban) |>
summarize(Case = sum(!is.na(case)), # sum of case number
Control = sum(!is.na(control)), # sum of control number
Total = as.numeric(Case)+as.numeric(Control)) %>% # sum of total
as.data.frame driver suburban Case Control Total
1 no no 26 47 73
2 no yes 6 7 13
3 yes no 64 63 127
4 yes yes 121 100 221
The data set Vocab{carData} gives observations on gender, education and vocabulary, from respondents to U.S. General Social Surveys, 1972-2004. Summarize the relationship between education and vocabulary over the years by gender.
# load package
pacman::p_load(carData)
# Input data
data("Vocab", package="carData")
dat4 <- Vocab
# columan rename
dat4 <- dat4 %>%
dplyr::rename(Year = year,
Sex = sex,
Education = education,
Vocabulary = vocabulary)
# plot
dat4 %>%
mutate(Year = as.factor(Year)) %>% # for show the year number
lattice::xyplot(Vocabulary ~ Education | Year,
groups = Sex,
type = c("p", "g", "r"), data = .,
cex= .5,
pch = 19,
auto.key = list(columns = 2),
xlab = "Education", ylab = "Vocabulary",
layout = c(4,6),
par.settings = list(superpose.symbol = list(pch = 19, cex = 1.5,
col = c("#D95F02", "#7570B3")),
superpose.line = list(col = c("#D95F02", "#7570B3"),
lwd = 1.5)))Supply comments to each code chunk in the following survey rmarkdown file and preview it as an R notebook or knit to html.
# 載入tidyverse package
pacman::p_load(tidyverse)# 從網路上載下csv
dta <- read_csv("http://kbroman.org/datacarp/portal_data_joined.csv")# 看一下資料數據型態
glimpse(dta)Rows: 34,786
Columns: 13
$ record_id <dbl> 1, 72, 224, 266, 349, 363, 435, 506, 588, 661, 748, 84~
$ month <dbl> 7, 8, 9, 10, 11, 11, 12, 1, 2, 3, 4, 5, 6, 8, 9, 10, 1~
$ day <dbl> 16, 19, 13, 16, 12, 12, 10, 8, 18, 11, 8, 6, 9, 5, 4, ~
$ year <dbl> 1977, 1977, 1977, 1977, 1977, 1977, 1977, 1978, 1978, ~
$ plot_id <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ~
$ species_id <chr> "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", "NL", ~
$ sex <chr> "M", "M", NA, NA, NA, NA, NA, NA, "M", NA, NA, "M", "M~
$ hindfoot_length <dbl> 32, 31, NA, NA, NA, NA, NA, NA, NA, NA, NA, 32, NA, 34~
$ weight <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 218, NA, NA, 204, 200,~
$ genus <chr> "Neotoma", "Neotoma", "Neotoma", "Neotoma", "Neotoma",~
$ species <chr> "albigula", "albigula", "albigula", "albigula", "albig~
$ taxa <chr> "Rodent", "Rodent", "Rodent", "Rodent", "Rodent", "Rod~
$ plot_type <chr> "Control", "Control", "Control", "Control", "Control",~
# 資料維度
dim(dta)[1] 34786 13
# 用dplyr::select選擇dta中的plot_id, species_id, weight三個變項
dplyr::select(dta, plot_id, species_id, weight) %>% head()# A tibble: 6 x 3
plot_id species_id weight
<dbl> <chr> <dbl>
1 2 NL NA
2 2 NL NA
3 2 NL NA
4 2 NL NA
5 2 NL NA
6 2 NL NA
# select 也可以指定要刪掉哪幾個變項就好(刪掉record_id,species_id)
dplyr::select(dta, -record_id, -species_id) %>% head()# A tibble: 6 x 11
month day year plot_id sex hindfoot_length weight genus species taxa
<dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr>
1 7 16 1977 2 M 32 NA Neotoma albigula Rodent
2 8 19 1977 2 M 31 NA Neotoma albigula Rodent
3 9 13 1977 2 <NA> NA NA Neotoma albigula Rodent
4 10 16 1977 2 <NA> NA NA Neotoma albigula Rodent
5 11 12 1977 2 <NA> NA NA Neotoma albigula Rodent
6 11 12 1977 2 <NA> NA NA Neotoma albigula Rodent
# ... with 1 more variable: plot_type <chr>
# 利用filter篩選年代為1995年的資料
dplyr::filter(dta, year == 1995) %>% head()# A tibble: 6 x 13
record_id month day year plot_id species_id sex hindfoot_length weight
<dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl>
1 22314 6 7 1995 2 NL M 34 NA
2 22728 9 23 1995 2 NL F 32 165
3 22899 10 28 1995 2 NL F 32 171
4 23032 12 2 1995 2 NL F 33 NA
5 22003 1 11 1995 2 DM M 37 41
6 22042 2 4 1995 2 DM F 36 45
# ... with 4 more variables: genus <chr>, species <chr>, taxa <chr>,
# plot_type <chr>
# 先filter體重<= 5,用select選擇species_id, sex, weight三個變項
head(dplyr::select(dplyr::filter(dta, weight <= 5), species_id, sex, weight))# A tibble: 6 x 3
species_id sex weight
<chr> <chr> <dbl>
1 PF M 5
2 PF F 5
3 PF F 5
4 PF F 4
5 PF F 5
6 PF F 4
# 這個與放面的語法一樣,透過%>%可以讓code更直觀
dta %>%
dplyr::filter(weight <= 5) %>%
dplyr::select(species_id, sex, weight) %>%
head# A tibble: 6 x 3
species_id sex weight
<chr> <chr> <dbl>
1 PF M 5
2 PF F 5
3 PF F 5
4 PF F 4
5 PF F 5
6 PF F 4
# 用mutate 建立weight_kg與weight_lb兩個變項,並分別給定計算方式
dta %>%
mutate(weight_kg = weight / 1000,
weight_lb = weight_kg * 2.2) %>%
head()# A tibble: 6 x 15
record_id month day year plot_id species_id sex hindfoot_length weight
<dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl>
1 1 7 16 1977 2 NL M 32 NA
2 72 8 19 1977 2 NL M 31 NA
3 224 9 13 1977 2 NL <NA> NA NA
4 266 10 16 1977 2 NL <NA> NA NA
5 349 11 12 1977 2 NL <NA> NA NA
6 363 11 12 1977 2 NL <NA> NA NA
# ... with 6 more variables: genus <chr>, species <chr>, taxa <chr>,
# plot_type <chr>, weight_kg <dbl>, weight_lb <dbl>
# filter篩選體重沒有na的資料,group_by依照 sex與species_id,計算mean weight,並遞減排序
dta %>%
filter(!is.na(weight)) %>%
group_by(sex, species_id) %>%
summarize(mean_weight = mean(weight)) %>%
arrange(desc(mean_weight)) %>%
head()# A tibble: 6 x 3
# Groups: sex [3]
sex species_id mean_weight
<chr> <chr> <dbl>
1 <NA> NL 168.
2 M NL 166.
3 F NL 154.
4 M SS 130
5 <NA> SH 130
6 M DS 122.
dta %>%
group_by(sex) %>%
tally #與count()相似,但必須先寫group_by()才行# A tibble: 3 x 2
sex n
<chr> <int>
1 F 15690
2 M 17348
3 <NA> 1748
dta %>%
count(sex) #count()除了計算數量還包含執行group_by的功能# A tibble: 3 x 2
sex n
<chr> <int>
1 F 15690
2 M 17348
3 <NA> 1748
#用summarize可以建立一個叫count的變項,並透過group_by sex後n()計算row的總數
dta %>%
group_by(sex) %>%
summarize(count = n()) # A tibble: 3 x 2
sex count
<chr> <int>
1 F 15690
2 M 17348
3 <NA> 1748
# 建立count欄位,並用sum加總沒有Missing的year數量
dta %>%
group_by(sex) %>%
summarize(count = sum(!is.na(year)))# A tibble: 3 x 2
sex count
<chr> <int>
1 F 15690
2 M 17348
3 <NA> 1748
dta_gw <- dta %>%
filter(!is.na(weight)) %>% #篩選出沒有na的weight列
group_by(genus, plot_id) %>% # group_by genus與plot_id
summarize(mean_weight = mean(weight)) #計算體重平均並放入mean_weight變項中# 看資料的數據屬性
glimpse(dta_gw)Rows: 196
Columns: 3
Groups: genus [10]
$ genus <chr> "Baiomys", "Baiomys", "Baiomys", "Baiomys", "Baiomys", "Ba~
$ plot_id <dbl> 1, 2, 3, 5, 18, 19, 20, 21, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,~
$ mean_weight <dbl> 7.000000, 6.000000, 8.611111, 7.750000, 9.500000, 9.533333~
# can be used to “spread” a key-value pair across multiple columns.
# 與pivot_wider一樣功能?
dta_w <- dta_gw %>%
spread(key = genus, value = mean_weight)key: Column whose values will become variable names
value: Column where values will fill under new variables created from key
glimpse(dta_w)Rows: 24
Columns: 11
$ plot_id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,~
$ Baiomys <dbl> 7.000000, 6.000000, 8.611111, NA, 7.750000, NA, NA, NA~
$ Chaetodipus <dbl> 22.19939, 25.11014, 24.63636, 23.02381, 17.98276, 24.8~
$ Dipodomys <dbl> 60.23214, 55.68259, 52.04688, 57.52454, 51.11356, 58.6~
$ Neotoma <dbl> 156.2222, 169.1436, 158.2414, 164.1667, 190.0370, 179.~
$ Onychomys <dbl> 27.67550, 26.87302, 26.03241, 28.09375, 27.01695, 25.8~
$ Perognathus <dbl> 9.625000, 6.947368, 7.507812, 7.824427, 8.658537, 7.80~
$ Peromyscus <dbl> 22.22222, 22.26966, 21.37037, 22.60000, 21.23171, 21.8~
$ Reithrodontomys <dbl> 11.375000, 10.680556, 10.516588, 10.263158, 11.154545,~
$ Sigmodon <dbl> NA, 70.85714, 65.61404, 82.00000, 82.66667, 68.77778, ~
$ Spermophilus <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13~
dta_gw %>%
spread(genus, mean_weight, fill = 0) %>% # fill把NA設定為0
head()# A tibble: 6 x 11
plot_id Baiomys Chaetodipus Dipodomys Neotoma Onychomys Perognathus Peromyscus
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 7 22.2 60.2 156. 27.7 9.62 22.2
2 2 6 25.1 55.7 169. 26.9 6.95 22.3
3 3 8.61 24.6 52.0 158. 26.0 7.51 21.4
4 4 0 23.0 57.5 164. 28.1 7.82 22.6
5 5 7.75 18.0 51.1 190. 27.0 8.66 21.2
6 6 0 24.9 58.6 180. 25.9 7.81 21.8
# ... with 3 more variables: Reithrodontomys <dbl>, Sigmodon <dbl>,
# Spermophilus <dbl>
# Gather columns into key-value pairs
# 轉long form的意思
dta_l <- dta_w %>%
gather(key = genus,
value = mean_weight,
-plot_id)#因為針對整個dta_w進行轉置,會錯誤納入plot_id為genus中,所以要排除他glimpse(dta_l)#因為沒有一起進入轉置,所以才能出現plot_idRows: 240
Columns: 3
$ plot_id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,~
$ genus <chr> "Baiomys", "Baiomys", "Baiomys", "Baiomys", "Baiomys", "Ba~
$ mean_weight <dbl> 7.000000, 6.000000, 8.611111, NA, 7.750000, NA, NA, NA, NA~
dta_w %>%
gather(key = genus,
value = mean_weight,
Baiomys:Spermophilus) %>% #這邊直接指定屬genus範圍是Baiomys:Spermophilus
head()# A tibble: 6 x 3
plot_id genus mean_weight
<dbl> <chr> <dbl>
1 1 Baiomys 7
2 2 Baiomys 6
3 3 Baiomys 8.61
4 4 Baiomys NA
5 5 Baiomys 7.75
6 6 Baiomys NA
# filter by not NA on weight, hindfoot_length, sex sequently
dta_complete <- dta %>%
filter(!is.na(weight),
!is.na(hindfoot_length),
!is.na(sex)) # 先依照species_id計算數量,並篩選出>= 50
species_counts <- dta_complete %>%
count(species_id) %>%
filter(n >= 50)# 篩選有在dta_complete的species_id也有在species_counts$species_id
# 意思與保留species_id計算數量>= 50相同
dta_complete <- dta_complete %>%
filter(species_id %in% species_counts$species_id)