Code chunks w/o output are for comparison
Remember “?command” for help
https://rseek.org
The “print” commands are unnecessary code from the 1970s
library(tidyverse, quietly = TRUE)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v dplyr 1.0.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(DT)
bio <- read_csv("G:/My Drive/homework/Lovely J/inchBio.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## netID = col_double(),
## fishID = col_double(),
## species = col_character(),
## tl = col_double(),
## w = col_double(),
## scale = col_logical()
## )
datatable(bio)
summary(bio)
## netID fishID species tl
## Min. : 4.00 Min. : 7.0 Length:511 Min. : 28.0
## 1st Qu.: 12.00 1st Qu.:172.0 Class :character 1st Qu.: 61.0
## Median :101.00 Median :568.0 Mode :character Median :150.0
## Mean : 79.77 Mean :488.6 Mean :156.6
## 3rd Qu.:114.00 3rd Qu.:760.5 3rd Qu.:226.0
## Max. :206.00 Max. :915.0 Max. :429.0
## w scale
## Min. : 0.2 Mode :logical
## 1st Qu.: 2.0 FALSE:200
## Median : 54.5 TRUE :311
## Mean : 126.8
## 3rd Qu.: 190.5
## Max. :1070.0
glimpse(bio)
## Rows: 511
## Columns: 6
## $ netID <dbl> 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13,...
## $ fishID <dbl> 16, 23, 30, 44, 50, 65, 68, 69, 70, 71, 73, 74, 75, 76, 77,...
## $ species <chr> "Bluegill", "Bluegill", "Bluegill", "Bluegill", "Bluegill",...
## $ tl <dbl> 61, 66, 70, 38, 42, 54, 36, 59, 39, 34, 40, 35, 32, 37, 38,...
## $ w <dbl> 2.9, 4.5, 5.2, 0.5, 1.0, 2.1, 0.5, 2.0, 0.5, 0.5, 1.0, 0.5,...
## $ scale <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL...
print(paste("Our data has", ncol(bio), "variables (columns)"))
## [1] "Our data has 6 variables (columns)"
dim(bio)[2]
names(bio)
print(paste("Our data has", nrow(bio), "observations (rows)."))
## [1] "Our data has 511 observations (rows)."
dim(bio)[1]
head(bio, n = 6)
## # A tibble: 6 x 6
## netID fishID species tl w scale
## <dbl> <dbl> <chr> <dbl> <dbl> <lgl>
## 1 12 16 Bluegill 61 2.9 FALSE
## 2 12 23 Bluegill 66 4.5 FALSE
## 3 12 30 Bluegill 70 5.2 FALSE
## 4 12 44 Bluegill 38 0.5 FALSE
## 5 12 50 Bluegill 42 1 FALSE
## 6 12 65 Bluegill 54 2.1 FALSE
tail(bio, n = 6)
## # A tibble: 6 x 6
## netID fishID species tl w scale
## <dbl> <dbl> <chr> <dbl> <dbl> <lgl>
## 1 121 808 Black Crappie 323 509 TRUE
## 2 121 809 Black Crappie 282 352 TRUE
## 3 121 812 Black Crappie 142 37 TRUE
## 4 110 863 Black Crappie 307 415 TRUE
## 5 129 870 Black Crappie 279 344 TRUE
## 6 129 879 Black Crappie 302 397 TRUE
bio[1:6, ]
bio[506:511, ]
bio[c(1:6, 506:511), ]
bio %>% slice_head(n = 6)
bio %>% slice_tail(n = 6)
bio %>% slice(c(1:6, 506:511))
bioSpecies <- bio$species
head(bioSpecies, n = 5)
## [1] "Bluegill" "Bluegill" "Bluegill" "Bluegill" "Bluegill"
tail(bioSpecies, n = 5)
## [1] "Black Crappie" "Black Crappie" "Black Crappie" "Black Crappie"
## [5] "Black Crappie"
bioSpecies <-
bio %>%
select(species)
Species_Freq <- table(bioSpecies)
print(Species_Freq)
## bioSpecies
## Black Crappie Bluegill Bluntnose Minnow Iowa Darter
## 25 208 100 31
## Largemouth Bass Pumpkinseed Tadpole Madtom Yellow Perch
## 90 13 6 38
Species_df <- as.data.frame(Species_Freq)
names(Species_df)
## [1] "bioSpecies" "Freq"
Species_df <- rename(Species_df, c("Species" = "bioSpecies", "Frequency" = "Freq"))
print(Species_df)
## Species Frequency
## 1 Black Crappie 25
## 2 Bluegill 208
## 3 Bluntnose Minnow 100
## 4 Iowa Darter 31
## 5 Largemouth Bass 90
## 6 Pumpkinseed 13
## 7 Tadpole Madtom 6
## 8 Yellow Perch 38
names(Species_df) <- c("Species", "Freqency")
Professor might mean proportions instead of percents. This is following instructions exactly.
Species_Pct <-
round(
prop.table(Species_Freq) * 100,
digits = 3
)
print(Species_Pct)
## bioSpecies
## Black Crappie Bluegill Bluntnose Minnow Iowa Darter
## 4.892 40.705 19.569 6.067
## Largemouth Bass Pumpkinseed Tadpole Madtom Yellow Perch
## 17.613 2.544 1.174 7.436
Similar to Task 07
SpeciesPct_df <- as.data.frame(Species_Pct)
SpeciesPct_df <- dplyr::rename(SpeciesPct_df, c("Species" = "bioSpecies", "Percentage" = "Freq"))
print(SpeciesPct_df)
## Species Percentage
## 1 Black Crappie 4.892
## 2 Bluegill 40.705
## 3 Bluntnose Minnow 19.569
## 4 Iowa Darter 6.067
## 5 Largemouth Bass 17.613
## 6 Pumpkinseed 2.544
## 7 Tadpole Madtom 1.174
## 8 Yellow Perch 7.436
SpeciesPct_df %>% arrange(desc(Percentage))
## Species Percentage
## 1 Bluegill 40.705
## 2 Bluntnose Minnow 19.569
## 3 Largemouth Bass 17.613
## 4 Yellow Perch 7.436
## 5 Iowa Darter 6.067
## 6 Black Crappie 4.892
## 7 Pumpkinseed 2.544
## 8 Tadpole Madtom 1.174
SpeciesPct_df[order(-SpeciesPct_df$Percentage), ]
FinalTable <-
Species_df %>% # Task 07
mutate(CumFrequencies = cumsum(Frequency),
Percentages = round(Frequency / sum(Frequency) * 100, digits = 2),
CumPercentage = cumsum(Percentages)
)
print(FinalTable)
## Species Frequency CumFrequencies Percentages CumPercentage
## 1 Black Crappie 25 25 4.89 4.89
## 2 Bluegill 208 233 40.70 45.59
## 3 Bluntnose Minnow 100 333 19.57 65.16
## 4 Iowa Darter 31 364 6.07 71.23
## 5 Largemouth Bass 90 454 17.61 88.84
## 6 Pumpkinseed 13 467 2.54 91.38
## 7 Tadpole Madtom 6 473 1.17 92.55
## 8 Yellow Perch 38 511 7.44 99.99
Save as plot1
max(Species_df$Frequency)
## [1] 208
barplot(Species_Freq , # Task 06
main = "Fish Counts per species", # part (1)
sub = "Data set InchBio", # part (2)
ylab = "COUNTS", # part (3)
las = 1, # part (4)
cex.names = 0.6, # part (5) shows correctly when zoomed
ylim = c(0, 250), # part (6)
col = c(1:8), # part (7) palette("R3"); palette()
)
Save as plot2
barplot(FinalTable$Frequency,
width = 1,
space = 0.15,
border = NA,
axes = FALSE, # alone TRUE looks better
ylim = c(0, 600),
cex.names = 0.7, # shows correctly when zoomed
names.arg = FinalTable$Species,
main = "Individual observations per fish species",
las = 1) # ?par
https://www.statmethods.net/advgraphs/axes.html
barplot(FinalTable$Frequency,
names.arg = FinalTable$Species,
width = 1,
space = 0.15,
border = NA,
axes = FALSE,
ylim = c(0, 600),
cex.names = 0.7,
main = "Individual observations per fish species",
las = 1)
axis(side = 2,
at = c(0, 300),
col = "grey62",
col.axis = "grey62",
cex.axis = 0.8,
las = 2) # extra
https://www.wolframalpha.com/input/?i=%23D930DF
barplot(FinalTable$Frequency,
names.arg = FinalTable$Species,
width = 1,
space = 0.15,
border = NA,
axes = FALSE,
ylim = c(0, 600),
cex.names = 0.7,
main = "Individual observations per fish species",
las = 1)
axis(side = 2,
at = c(0, 300),
col = "grey62",
col.axis = "grey62",
cex.axis = 0.8,
las = 1)
axis(side = 4,
at = c(0, FinalTable$CumFrequencies),
col = "#D930DF",
col.axis = "#D930DF",
cex.axis = 0.8,
las = 1) # extra
https://www.statmethods.net/advgraphs/axes.html
par(mai=c(1,1,1,1))
barplot(FinalTable$Frequency,
names.arg = FinalTable$Species,
width = 1,
space = 0.15,
border = NA,
axes = FALSE,
ylim = c(0, 600),
cex.names = 0.7,
main = "Individual observations per fish species",
las = 1)
axis(side = 2,
at = c(0, 300),
col = "grey62",
col.axis = "grey62",
cex.axis = 0.8,
las = 1)
axis(side = 4,
at = c(0, FinalTable$CumFrequencies),
col = "#D930DF",
col.axis = "#D930DF",
cex.axis = 0.8,
las = 1)
mtext(side = 2,
"Counts",
line = -1) # extra
mtext(side = 4,
"Cumulative Frequencies",
line = -1) # extra
par(mai=c(1,1,1,1))
barplot(FinalTable$Frequency,
names.arg = FinalTable$Species,
width = 1,
space = 0.15,
border = NA,
axes = FALSE,
ylim = c(0, 600),
cex.names = 0.7,
main = "Individual observations per fish species",
las = 1)
axis(side = 2,
at = c(0, 300),
col = "grey62",
col.axis = "grey62",
cex.axis = 0.8,
las = 1)
axis(side = 4,
at = c(0, FinalTable$CumFrequencies),
col = "#D930DF",
col.axis = "#D930DF",
cex.axis = 0.8,
las = 1)
mtext(side = 2,
"Counts",
line = -1)
mtext(side = 4,
"Cumulative Frequencies",
line = -1)
# Given
lines(FinalTable$CumFrequencies,
type = "b",
cex = 0.7,
pch = 19,
col = "cyan4")
https://community.rstudio.com/t/boxplot-axis-and-text/52545/7
# ?boxplot
boxplot(bio$tl ~ bio$species,
horizontal = FALSE, # default
varwidth = TRUE,
las = 1,
main = "Total Length (width denotes relative count)",
xlab = "Fish Species",
ylab = "millimeters",
col = 1:8,
cex.axis = 0.5
)