## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
#first list the sheet names of my info
excel_sheets("./data/surveys_data.xlsx")
## [1] "surveys" "plot_info" "species_info"
#load the data first
survey_data<-read_excel("./data/surveys_data.xlsx", sheet=1)
#next load plot info
plot_info<-read_excel("./data/surveys_data.xlsx", sheet=2)
#finally load the species info
species_info<-read_excel("./data/surveys_data.xlsx", sheet=3)
| Data Preparation:survey_data |
#check structure of data
#-this function shows variable names (next to "$"), variable format and the first few lines of code.
str(survey_data)
## tibble [35,549 x 10] (S3: tbl_df/tbl/data.frame)
## $ record_id : num [1:35549] 1 2 3 4 5 6 7 8 9 10 ...
## $ month : num [1:35549] 7 7 7 7 7 7 7 7 7 7 ...
## $ day : num [1:35549] 16 16 16 16 16 16 16 16 16 16 ...
## $ year : num [1:35549] 1977 1977 1977 1977 1977 ...
## $ plot_id : num [1:35549] 2 3 2 7 3 1 2 1 1 6 ...
## $ species_id : chr [1:35549] "NL" "NL" "DM" "DM" ...
## $ sex : chr [1:35549] "M" "M" "F" "M" ...
## $ hindfoot_length: num [1:35549] 32 33 37 36 35 14 NA 37 34 20 ...
## $ weight : num [1:35549] NA NA NA NA NA NA NA NA NA NA ...
## $ date : POSIXct[1:35549], format: "1977-07-16" "1977-07-16" ...
#first I will recode sex as numeric.
#to recode sex and set value labels, I will use...
survey_data$sex<-ifelse(survey_data$sex=="M", 0, ifelse(survey_data$sex=="F", 1, NA))
survey_data$sex<-factor(survey_data$sex, levels=c(0,1), labels=c("Male", "Female"))
| Data Preparation:plot_info |
#first the data structure will be explored to determine the steps required
str(plot_info)
## tibble [24 x 2] (S3: tbl_df/tbl/data.frame)
## $ plot_id : num [1:24] 1 2 3 4 5 6 7 8 9 10 ...
## $ plot_type: chr [1:24] "Spectab exclosure" "Control" "Long-term Krat Exclosure" "Control" ...
head(plot_info) #prints first few lines of data so you can see the format
## # A tibble: 6 x 2
## plot_id plot_type
## <dbl> <chr>
## 1 1 Spectab exclosure
## 2 2 Control
## 3 3 Long-term Krat Exclosure
## 4 4 Control
## 5 5 Rodent Exclosure
## 6 6 Short-term Krat Exclosure
#now plot_id will be labelled according to the plot_type info in the plot_info dataset.
survey_data$plot_id<-factor(survey_data$plot_id, #the variable we want labelled
levels=plot_info$plot_id, #the levels of the factors
labels=plot_info$plot_type) #the value labels matching those levels
#first the data structure will be explored, to determine the steps required
str(species_info)
## tibble [54 x 4] (S3: tbl_df/tbl/data.frame)
## $ species_id: chr [1:54] "AB" "AH" "AS" "BA" ...
## $ genus : chr [1:54] "Amphispiza" "Ammospermophilus" "Ammodramus" "Baiomys" ...
## $ species : chr [1:54] "bilineata" "harrisi" "savannarum" "taylori" ...
## $ taxa : chr [1:54] "Bird" "Rodent" "Bird" "Rodent" ...
#next the "genus" and "species" variables will be merged into one column
#Note:the last piece of code adds a space between genus and species...
species_info$species_lab<-paste(species_info$genus, species_info$species, " ")
#check the first 10 rows
species_info[1:10,]
## # A tibble: 10 x 5
## species_id genus species taxa species_lab
## <chr> <chr> <chr> <chr> <chr>
## 1 AB Amphispiza bilineata Bird "Amphispiza bilineata "
## 2 AH Ammospermophilus harrisi Rodent "Ammospermophilus harris~
## 3 AS Ammodramus savannarum Bird "Ammodramus savannarum "
## 4 BA Baiomys taylori Rodent "Baiomys taylori "
## 5 CB Campylorhynchus brunneicapillus Bird "Campylorhynchus brunnei~
## 6 CM Calamospiza melanocorys Bird "Calamospiza melanocorys~
## 7 CQ Callipepla squamata Bird "Callipepla squamata "
## 8 CS Crotalus scutalatus Reptile "Crotalus scutalatus "
## 9 CT Cnemidophorus tigris Reptile "Cnemidophorus tigris "
## 10 CU Cnemidophorus uniparens Reptile "Cnemidophorus uniparens~
#there is lagging space present, which will be removed as follows...
species_info$species_lab<-trimws(species_info$species_lab, which = "right")
#check the first 10 rows again
species_info[1:10,]
## # A tibble: 10 x 5
## species_id genus species taxa species_lab
## <chr> <chr> <chr> <chr> <chr>
## 1 AB Amphispiza bilineata Bird Amphispiza bilineata
## 2 AH Ammospermophilus harrisi Rodent Ammospermophilus harrisi
## 3 AS Ammodramus savannarum Bird Ammodramus savannarum
## 4 BA Baiomys taylori Rodent Baiomys taylori
## 5 CB Campylorhynchus brunneicapillus Bird Campylorhynchus brunneic~
## 6 CM Calamospiza melanocorys Bird Calamospiza melanocorys
## 7 CQ Callipepla squamata Bird Callipepla squamata
## 8 CS Crotalus scutalatus Reptile Crotalus scutalatus
## 9 CT Cnemidophorus tigris Reptile Cnemidophorus tigris
## 10 CU Cnemidophorus uniparens Reptile Cnemidophorus uniparens
#now the species id values in survey_data will be labelled
survey_data$species_id<-factor(survey_data$species_id, levels=species_info$species_id, labels=species_info$species_lab)
#genus and taxa will also be added
survey_data<-merge(survey_data, species_info[,c(2,4,5)], by.x="species_id", by.y="species_lab")
#check main data
str(survey_data)
## 'data.frame': 34786 obs. of 12 variables:
## $ species_id : Factor w/ 54 levels "Amphispiza bilineata",..: 3 3 2 2 2 2 2 2 2 2 ...
## $ record_id : num 18932 20588 27074 12824 22059 ...
## $ month : num 8 1 10 5 2 4 5 10 10 12 ...
## $ day : num 7 24 26 28 4 21 15 24 25 14 ...
## $ year : num 1991 1993 1997 1987 1995 ...
## $ plot_id : Factor w/ 5 levels "Spectab exclosure",..: 3 2 2 5 2 1 4 3 5 4 ...
## $ sex : Factor w/ 2 levels "Male","Female": NA NA NA NA NA NA NA NA NA NA ...
## $ hindfoot_length: num NA NA NA NA NA NA NA NA NA NA ...
## $ weight : num NA NA NA NA NA NA NA NA NA NA ...
## $ date : POSIXct, format: "1991-08-07" "1993-01-24" ...
## $ genus : chr "Ammodramus" "Ammodramus" "Ammospermophilus" "Ammospermophilus" ...
## $ taxa : chr "Bird" "Bird" "Rodent" "Rodent" ...
head(survey_data)
## species_id record_id month day year plot_id
## 1 Ammodramus savannarum 18932 8 7 1991 Long-term Krat Exclosure
## 2 Ammodramus savannarum 20588 1 24 1993 Control
## 3 Ammospermophilus harrisi 27074 10 26 1997 Control
## 4 Ammospermophilus harrisi 12824 5 28 1987 Short-term Krat Exclosure
## 5 Ammospermophilus harrisi 22059 2 4 1995 Control
## 6 Ammospermophilus harrisi 18618 4 21 1991 Spectab exclosure
## sex hindfoot_length weight date genus taxa
## 1 <NA> NA NA 1991-08-07 Ammodramus Bird
## 2 <NA> NA NA 1993-01-24 Ammodramus Bird
## 3 <NA> NA NA 1997-10-26 Ammospermophilus Rodent
## 4 <NA> NA NA 1987-05-28 Ammospermophilus Rodent
## 5 <NA> NA NA 1995-02-04 Ammospermophilus Rodent
## 6 <NA> NA NA 1991-04-21 Ammospermophilus Rodent
#remove labels with missing data
survey_data$species_id<-droplevels(survey_data$species_id)
#remove items with missing sex or weight data, creating new datasets
survey_data$sexmiss<-ifelse(is.na(survey_data$sex), 1, 0) #creating index number to indicate missing sex
survey_data$weightmiss<-ifelse(is.na(survey_data$weight), 1, 0)
survey_data$anymiss<-ifelse(survey_data$sexmiss==1 | survey_data$weightmiss==1, 1, 0) #missing either
table(survey_data$sexmiss, survey_data$anymiss) #check all sexmiss are counted
##
## 0 1
## 0 32182 856
## 1 0 1748
table(survey_data$weightmiss, survey_data$anymiss) #ditto for weightmiss
##
## 0 1
## 0 32182 101
## 1 0 2503
table(survey_data$anymiss) #2604 to be excluded
##
## 0 1
## 32182 2604
survey_data2<-survey_data[!survey_data$anymiss==1, ] #new data excluding missing by indicator
dim(survey_data) #34786 15 (read as row x column)
## [1] 34786 15
dim(survey_data2) #32182 15 (34786-2604 = 32182 ...confirm exclusion worked)
## [1] 32182 15
str(survey_data2)
## 'data.frame': 32182 obs. of 15 variables:
## $ species_id : Factor w/ 48 levels "Amphispiza bilineata",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ record_id : num 17486 19121 19249 17374 18252 ...
## $ month : num 4 10 11 4 1 3 12 11 3 5 ...
## $ day : num 26 10 13 24 12 7 15 14 14 25 ...
## $ year : num 1990 1991 1991 1990 1991 ...
## $ plot_id : Factor w/ 5 levels "Spectab exclosure",..: 3 2 3 1 3 3 3 3 3 3 ...
## $ sex : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2 2 2 ...
## $ hindfoot_length: num 14 13 12 14 14 14 14 16 14 14 ...
## $ weight : num 7 6 8 7 9 9 8 9 8 10 ...
## $ date : POSIXct, format: "1990-04-26" "1991-10-10" ...
## $ genus : chr "Baiomys" "Baiomys" "Baiomys" "Baiomys" ...
## $ taxa : chr "Rodent" "Rodent" "Rodent" "Rodent" ...
## $ sexmiss : num 0 0 0 0 0 0 0 0 0 0 ...
## $ weightmiss : num 0 0 0 0 0 0 0 0 0 0 ...
## $ anymiss : num 0 0 0 0 0 0 0 0 0 0 ...
head(survey_data2)
## species_id record_id month day year plot_id sex
## 743 Baiomys taylori 17486 4 26 1990 Long-term Krat Exclosure Male
## 744 Baiomys taylori 19121 10 10 1991 Control Male
## 745 Baiomys taylori 19249 11 13 1991 Long-term Krat Exclosure Male
## 746 Baiomys taylori 17374 4 24 1990 Spectab exclosure Male
## 747 Baiomys taylori 18252 1 12 1991 Long-term Krat Exclosure Female
## 748 Baiomys taylori 19775 3 7 1992 Long-term Krat Exclosure Female
## hindfoot_length weight date genus taxa sexmiss weightmiss anymiss
## 743 14 7 1990-04-26 Baiomys Rodent 0 0 0
## 744 13 6 1991-10-10 Baiomys Rodent 0 0 0
## 745 12 8 1991-11-13 Baiomys Rodent 0 0 0
## 746 14 7 1990-04-24 Baiomys Rodent 0 0 0
## 747 14 9 1991-01-12 Baiomys Rodent 0 0 0
## 748 14 9 1992-03-07 Baiomys Rodent 0 0 0
names(survey_data2) #prints name and column number --use to exclude unwanted columns
## [1] "species_id" "record_id" "month" "day"
## [5] "year" "plot_id" "sex" "hindfoot_length"
## [9] "weight" "date" "genus" "taxa"
## [13] "sexmiss" "weightmiss" "anymiss"
survey_data2<-survey_data2[, c(1:12)]
#View plot_id as table
table(survey_data2$plot_id)
##
## Spectab exclosure Control Long-term Krat Exclosure
## 3705 14611 4676
## Rodent Exclosure Short-term Krat Exclosure
## 3797 5393
| Exploratory Data Analysis |
| Calculate, counts, means and standard deviations, based on the
plot_id variable |
#counts
summPlot_id<-survey_data2 %>%
group_by(plot_id) %>%
count() %>% #count by plot_id
as.data.frame() #output as dataframe not tibble
summPlot_id
## plot_id n
## 1 Spectab exclosure 3705
## 2 Control 14611
## 3 Long-term Krat Exclosure 4676
## 4 Rodent Exclosure 3797
## 5 Short-term Krat Exclosure 5393
summSex<-survey_data2 %>%
select(plot_id, sex) %>%
group_by(plot_id) %>%
count(sex) %>%
as.data.frame()
summSex
## plot_id sex n
## 1 Spectab exclosure Male 2056
## 2 Spectab exclosure Female 1649
## 3 Control Male 7942
## 4 Control Female 6669
## 5 Long-term Krat Exclosure Male 2226
## 6 Long-term Krat Exclosure Female 2450
## 7 Rodent Exclosure Male 1964
## 8 Rodent Exclosure Female 1833
## 9 Short-term Krat Exclosure Male 2691
## 10 Short-term Krat Exclosure Female 2702
#means
summMeans <-survey_data2 %>%
select(plot_id, hindfoot_length, weight) %>%
group_by(plot_id) %>%
summarise(round(across(.cols = everything(), ~mean(., na.rm=TRUE)),2)) %>%
as.data.frame()
summMeans
## plot_id hindfoot_length weight
## 1 Spectab exclosure 33.79 51.58
## 2 Control 32.19 48.46
## 3 Long-term Krat Exclosure 22.35 27.18
## 4 Rodent Exclosure 24.50 32.48
## 5 Short-term Krat Exclosure 27.28 41.07
#sd
summSD<-survey_data2 %>%
select(plot_id, hindfoot_length, weight) %>%
group_by(plot_id) %>%
summarise(round(across(.cols=everything(), ~sd(., na.rm=TRUE)),2)) %>%
as.data.frame()
summSD
## plot_id hindfoot_length weight
## 1 Spectab exclosure 8.85 36.02
## 2 Control 8.92 36.44
## 3 Long-term Krat Exclosure 5.97 29.21
## 4 Rodent Exclosure 8.68 33.84
## 5 Short-term Krat Exclosure 9.57 38.58
| Summarise the Data and Build Table |
head(survey_data2)
## species_id record_id month day year plot_id sex
## 743 Baiomys taylori 17486 4 26 1990 Long-term Krat Exclosure Male
## 744 Baiomys taylori 19121 10 10 1991 Control Male
## 745 Baiomys taylori 19249 11 13 1991 Long-term Krat Exclosure Male
## 746 Baiomys taylori 17374 4 24 1990 Spectab exclosure Male
## 747 Baiomys taylori 18252 1 12 1991 Long-term Krat Exclosure Female
## 748 Baiomys taylori 19775 3 7 1992 Long-term Krat Exclosure Female
## hindfoot_length weight date genus taxa
## 743 14 7 1990-04-26 Baiomys Rodent
## 744 13 6 1991-10-10 Baiomys Rodent
## 745 12 8 1991-11-13 Baiomys Rodent
## 746 14 7 1990-04-24 Baiomys Rodent
## 747 14 9 1991-01-12 Baiomys Rodent
## 748 14 9 1992-03-07 Baiomys Rodent
str(survey_data2)
## 'data.frame': 32182 obs. of 12 variables:
## $ species_id : Factor w/ 48 levels "Amphispiza bilineata",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ record_id : num 17486 19121 19249 17374 18252 ...
## $ month : num 4 10 11 4 1 3 12 11 3 5 ...
## $ day : num 26 10 13 24 12 7 15 14 14 25 ...
## $ year : num 1990 1991 1991 1990 1991 ...
## $ plot_id : Factor w/ 5 levels "Spectab exclosure",..: 3 2 3 1 3 3 3 3 3 3 ...
## $ sex : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 2 2 2 2 2 ...
## $ hindfoot_length: num 14 13 12 14 14 14 14 16 14 14 ...
## $ weight : num 7 6 8 7 9 9 8 9 8 10 ...
## $ date : POSIXct, format: "1990-04-26" "1991-10-10" ...
## $ genus : chr "Baiomys" "Baiomys" "Baiomys" "Baiomys" ...
## $ taxa : chr "Rodent" "Rodent" "Rodent" "Rodent" ...
#build table
t1<- data.frame(vars= c(paste0("**Overall Count** "," *n*"),
paste0("**Sex** ", " *n*"),
"*Female*", "*Male*",
paste0("**Hindfoot Length** ", "mm", " *mean (SD)*"),
paste0("**Weight** ", "g", " *mean (SD)*")),
Spectab_exclosure = c(paste0(summPlot_id$n[1]),
"",
paste0(summSex$n[1:2]),
paste0(summMeans$hindfoot_length[1], " (", summSD$hindfoot_length[1], ")"),
paste0(summMeans$weight[1], "(", summSD$weight[1], ")")),
Control = c(paste0(summPlot_id$n[2]),
"",
paste0(summSex$n[3:4]),
paste0(summMeans$hindfoot_length[2], " (", summSD$hindfoot_length[2], ")"),
paste0(summMeans$weight[2], "(", summSD$weight[2], ")")),
Long_term_Krat_Exclosure = c(paste0(summPlot_id$n[3]),
"",
paste0(summSex$n[5:6]),
paste0(summMeans$hindfoot_length[3], " (", summSD$hindfoot_length[3], ")"),
paste0(summMeans$weight[3], "(", summSD$weight[3], ")")),
Rodent_Exclosure = c(paste0(summPlot_id$n[4]),
"",
paste0(summSex$n[7:8]),
paste0(summMeans$hindfoot_length[4], " (", summSD$hindfoot_length[4], ")"),
paste0(summMeans$weight[4], "(", summSD$weight[4], ")")),
Short_term_Krat_Exclosure =c(paste0(summPlot_id$n[5]),
"",
paste0(summSex$n[9:10]),
paste0(summMeans$hindfoot_length[5], " (", summSD$hindfoot_length[5], ")"),
paste0(summMeans$weight[5], "(", summSD$weight[5], ")")),
stringsAsFactors = FALSE)
t1
## vars Spectab_exclosure Control
## 1 **Overall Count** *n* 3705 14611
## 2 **Sex** *n*
## 3 *Female* 2056 7942
## 4 *Male* 1649 6669
## 5 **Hindfoot Length** mm *mean (SD)* 33.79 (8.85) 32.19 (8.92)
## 6 **Weight** g *mean (SD)* 51.58(36.02) 48.46(36.44)
## Long_term_Krat_Exclosure Rodent_Exclosure Short_term_Krat_Exclosure
## 1 4676 3797 5393
## 2
## 3 2226 1964 2691
## 4 2450 1833 2702
## 5 22.35 (5.97) 24.5 (8.68) 27.28 (9.57)
## 6 27.18(29.21) 32.48(33.84) 41.07(38.58)
| Pimp the table with KableExtra |
kable(t1,
caption = ("**Summary of study variables by Plot_id**"), # adds table caption
col.names = c("", "Spectab exclosure", "Control", "Long_term_Krat_Exclosure", " Rodent_Exclosure", "Short_term_Krat_Exclosure"), # no title to first column
align="lcccc",
type="") %>%
column_spec(1, width_min="7cm", border_right = TRUE) %>%
column_spec(c(2,3,4,5,6), width_min="3cm") %>%
row_spec(0, bold=T, color="white", background="#666666") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
add_indent(c(3,4)) %>% # indent rows for measures with levels
add_header_above(c(" " = 1, "Plot_id" = 5), bold=T, color="white", background="#666666", include_empty = T)
Summary of study variables by Plot_id
|
|
|
|
|
Spectab exclosure
|
Control
|
Long_term_Krat_Exclosure
|
Rodent_Exclosure
|
Short_term_Krat_Exclosure
|
|
Overall Count n
|
3705
|
14611
|
4676
|
3797
|
5393
|
|
Sex n
|
|
|
|
|
|
|
Female
|
2056
|
7942
|
2226
|
1964
|
2691
|
|
Male
|
1649
|
6669
|
2450
|
1833
|
2702
|
|
Hindfoot Length mm mean (SD)
|
33.79 (8.85)
|
32.19 (8.92)
|
22.35 (5.97)
|
24.5 (8.68)
|
27.28 (9.57)
|
|
Weight g mean (SD)
|
51.58(36.02)
|
48.46(36.44)
|
27.18(29.21)
|
32.48(33.84)
|
41.07(38.58)
|
| Data Visualisation with ggpubr |
# remove levels where species_id has 0 records
survey_data2$species_id <- droplevels(survey_data2$species_id)
survey_data2$genus <- as.factor(survey_data2$genus)
# length by sex
p1 <- ggplot(survey_data2, aes(x=hindfoot_length, fill=sex)) +
geom_histogram(aes(y = ..density..), colour="black", bins=50, alpha=0.5) +
geom_density(size=1, alpha=0.4) +
theme_bw() + facet_grid(sex ~ .) + theme(legend.position = "none") +
xlab("Hindfoot Length (mm)") + ylab("Density")
p1a <- ggplot(survey_data2, aes(x=as.factor(genus), y=hindfoot_length, colour = as.factor(genus))) +
geom_boxplot(fill="white", weight=1.5) +
geom_point(#fill="white",
position="jitter",
alpha=0.1) +
xlab("Genus") + ylab("Hindfoot Length (mm)") +
theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90)) +
coord_flip()
p2 <- ggplot(survey_data2, aes(x=weight, fill=sex)) +
geom_histogram(aes(y = ..density..), colour="black", bins=50, alpha=0.5) +
geom_density(size=1, alpha=0.4) +
theme_bw() + facet_grid(sex ~ .) + theme(legend.position = "none") +
xlab("Weight (g)") + ylab("Density")
p2a <- ggplot(survey_data2, aes(x=as.factor(genus), y=weight, colour = as.factor(genus))) +
geom_boxplot(fill="white", weight=1.5) +
geom_point(#fill="white",
position="jitter",
alpha=0.1) +
xlab("Genus") + ylab("Weight (g)") +
theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90)) +
coord_flip()
ggarrange(p1, p1a,labels = c("a", "b"))
## Warning: Removed 1506 rows containing non-finite values (stat_bin).
## Warning: Removed 1506 rows containing non-finite values (stat_density).
## Warning: Removed 1506 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1506 rows containing missing values (geom_point).

ggarrange(p2, p2a,labels = c("a", "b"))
