Initial

Code chunks w/o output are for comparison
Remember “?command” for help
https://rseek.org
The “print” commands are unnecessary code from the 1970s

library(tidyverse, quietly = TRUE)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.6     v dplyr   1.0.4
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(DT)

Task 01

bio <- read_csv("G:/My Drive/homework/Lovely J/inchBio.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   netID = col_double(),
##   fishID = col_double(),
##   species = col_character(),
##   tl = col_double(),
##   w = col_double(),
##   scale = col_logical()
## )
datatable(bio)
summary(bio)
##      netID            fishID        species                tl       
##  Min.   :  4.00   Min.   :  7.0   Length:511         Min.   : 28.0  
##  1st Qu.: 12.00   1st Qu.:172.0   Class :character   1st Qu.: 61.0  
##  Median :101.00   Median :568.0   Mode  :character   Median :150.0  
##  Mean   : 79.77   Mean   :488.6                      Mean   :156.6  
##  3rd Qu.:114.00   3rd Qu.:760.5                      3rd Qu.:226.0  
##  Max.   :206.00   Max.   :915.0                      Max.   :429.0  
##        w            scale        
##  Min.   :   0.2   Mode :logical  
##  1st Qu.:   2.0   FALSE:200      
##  Median :  54.5   TRUE :311      
##  Mean   : 126.8                  
##  3rd Qu.: 190.5                  
##  Max.   :1070.0
glimpse(bio)
## Rows: 511
## Columns: 6
## $ netID   <dbl> 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13,...
## $ fishID  <dbl> 16, 23, 30, 44, 50, 65, 68, 69, 70, 71, 73, 74, 75, 76, 77,...
## $ species <chr> "Bluegill", "Bluegill", "Bluegill", "Bluegill", "Bluegill",...
## $ tl      <dbl> 61, 66, 70, 38, 42, 54, 36, 59, 39, 34, 40, 35, 32, 37, 38,...
## $ w       <dbl> 2.9, 4.5, 5.2, 0.5, 1.0, 2.1, 0.5, 2.0, 0.5, 0.5, 1.0, 0.5,...
## $ scale   <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL...

Task 02

print(paste("Our data has", ncol(bio), "variables (columns)"))
## [1] "Our data has 6 variables (columns)"
dim(bio)[2]
names(bio)

Task 03

print(paste("Our data has", nrow(bio), "observations (rows)."))
## [1] "Our data has 511 observations (rows)."
dim(bio)[1]

Task 04

head(bio, n = 6)
## # A tibble: 6 x 6
##   netID fishID species     tl     w scale
##   <dbl>  <dbl> <chr>    <dbl> <dbl> <lgl>
## 1    12     16 Bluegill    61   2.9 FALSE
## 2    12     23 Bluegill    66   4.5 FALSE
## 3    12     30 Bluegill    70   5.2 FALSE
## 4    12     44 Bluegill    38   0.5 FALSE
## 5    12     50 Bluegill    42   1   FALSE
## 6    12     65 Bluegill    54   2.1 FALSE
tail(bio, n = 6)
## # A tibble: 6 x 6
##   netID fishID species          tl     w scale
##   <dbl>  <dbl> <chr>         <dbl> <dbl> <lgl>
## 1   121    808 Black Crappie   323   509 TRUE 
## 2   121    809 Black Crappie   282   352 TRUE 
## 3   121    812 Black Crappie   142    37 TRUE 
## 4   110    863 Black Crappie   307   415 TRUE 
## 5   129    870 Black Crappie   279   344 TRUE 
## 6   129    879 Black Crappie   302   397 TRUE
bio[1:6, ]
bio[506:511, ]

bio[c(1:6, 506:511), ]

bio %>% slice_head(n = 6)
bio %>% slice_tail(n = 6)

bio %>% slice(c(1:6, 506:511))

Task 05

bioSpecies <- bio$species
head(bioSpecies, n = 5)
## [1] "Bluegill" "Bluegill" "Bluegill" "Bluegill" "Bluegill"
tail(bioSpecies, n = 5)
## [1] "Black Crappie" "Black Crappie" "Black Crappie" "Black Crappie"
## [5] "Black Crappie"
bioSpecies <-
  bio %>%
  select(species)

Task 06

Species_Freq <- table(bioSpecies)

print(Species_Freq)
## bioSpecies
##    Black Crappie         Bluegill Bluntnose Minnow      Iowa Darter 
##               25              208              100               31 
##  Largemouth Bass      Pumpkinseed   Tadpole Madtom     Yellow Perch 
##               90               13                6               38

Task 07

Species_df <- as.data.frame(Species_Freq)
names(Species_df)
## [1] "bioSpecies" "Freq"
Species_df <- rename(Species_df, c("Species" = "bioSpecies", "Frequency" = "Freq"))

print(Species_df)
##            Species Frequency
## 1    Black Crappie        25
## 2         Bluegill       208
## 3 Bluntnose Minnow       100
## 4      Iowa Darter        31
## 5  Largemouth Bass        90
## 6      Pumpkinseed        13
## 7   Tadpole Madtom         6
## 8     Yellow Perch        38
names(Species_df) <- c("Species", "Freqency")

Task 08

Professor might mean proportions instead of percents. This is following instructions exactly.

Species_Pct <-
  round(
    prop.table(Species_Freq) * 100,
    digits = 3
    )

print(Species_Pct)
## bioSpecies
##    Black Crappie         Bluegill Bluntnose Minnow      Iowa Darter 
##            4.892           40.705           19.569            6.067 
##  Largemouth Bass      Pumpkinseed   Tadpole Madtom     Yellow Perch 
##           17.613            2.544            1.174            7.436

Task 09

Similar to Task 07

SpeciesPct_df <- as.data.frame(Species_Pct)
SpeciesPct_df <- dplyr::rename(SpeciesPct_df, c("Species" = "bioSpecies", "Percentage" = "Freq"))

print(SpeciesPct_df)
##            Species Percentage
## 1    Black Crappie      4.892
## 2         Bluegill     40.705
## 3 Bluntnose Minnow     19.569
## 4      Iowa Darter      6.067
## 5  Largemouth Bass     17.613
## 6      Pumpkinseed      2.544
## 7   Tadpole Madtom      1.174
## 8     Yellow Perch      7.436

Task 10

SpeciesPct_df %>% arrange(desc(Percentage))
##            Species Percentage
## 1         Bluegill     40.705
## 2 Bluntnose Minnow     19.569
## 3  Largemouth Bass     17.613
## 4     Yellow Perch      7.436
## 5      Iowa Darter      6.067
## 6    Black Crappie      4.892
## 7      Pumpkinseed      2.544
## 8   Tadpole Madtom      1.174
SpeciesPct_df[order(-SpeciesPct_df$Percentage), ]

Task 11

FinalTable <-
  Species_df %>% # Task 07
  mutate(CumFrequencies = cumsum(Frequency),
         Percentages = round(Frequency / sum(Frequency) * 100, digits = 2),
         CumPercentage = cumsum(Percentages)
         )

print(FinalTable)
##            Species Frequency CumFrequencies Percentages CumPercentage
## 1    Black Crappie        25             25        4.89          4.89
## 2         Bluegill       208            233       40.70         45.59
## 3 Bluntnose Minnow       100            333       19.57         65.16
## 4      Iowa Darter        31            364        6.07         71.23
## 5  Largemouth Bass        90            454       17.61         88.84
## 6      Pumpkinseed        13            467        2.54         91.38
## 7   Tadpole Madtom         6            473        1.17         92.55
## 8     Yellow Perch        38            511        7.44         99.99

Task 12

Save as plot1

max(Species_df$Frequency)
## [1] 208
barplot(Species_Freq , # Task 06
        main = "Fish Counts per species", # part (1)
        sub = "Data set InchBio", # part (2)
        ylab = "COUNTS", # part (3)
        las = 1, # part (4)
        cex.names = 0.6, # part (5) shows correctly when zoomed
        ylim = c(0, 250), # part (6)
        col = c(1:8), # part (7) palette("R3"); palette()
        )

Task 13

Save as plot2

barplot(FinalTable$Frequency,
        width = 1,
        space = 0.15,
        border = NA,
        axes = FALSE, # alone TRUE looks better
        ylim = c(0, 600),
        cex.names = 0.7, # shows correctly when zoomed
        names.arg = FinalTable$Species,
        main = "Individual observations per fish species", 
        las = 1) # ?par

Task 14

https://www.statmethods.net/advgraphs/axes.html

barplot(FinalTable$Frequency,
        names.arg = FinalTable$Species,
        width = 1,
        space = 0.15,
        border = NA,
        axes = FALSE,
        ylim = c(0, 600),
        cex.names = 0.7,
        main = "Individual observations per fish species", 
        las = 1)

axis(side = 2,
     at = c(0, 300),
     col = "grey62",
     col.axis = "grey62",
     cex.axis = 0.8,
     las = 2) # extra

Task 15

https://www.wolframalpha.com/input/?i=%23D930DF

barplot(FinalTable$Frequency,
        names.arg = FinalTable$Species,
        width = 1,
        space = 0.15,
        border = NA,
        axes = FALSE,
        ylim = c(0, 600),
        cex.names = 0.7,
        main = "Individual observations per fish species", 
        las = 1)

axis(side = 2,
     at = c(0, 300),
     col = "grey62",
     col.axis = "grey62",
     cex.axis = 0.8,
     las = 1)

axis(side = 4,
     at = c(0, FinalTable$CumFrequencies),
     col = "#D930DF",
     col.axis = "#D930DF",
     cex.axis = 0.8,
     las = 1) # extra

Task 16

https://www.statmethods.net/advgraphs/axes.html

par(mai=c(1,1,1,1))

barplot(FinalTable$Frequency,
        names.arg = FinalTable$Species,
        width = 1,
        space = 0.15,
        border = NA,
        axes = FALSE,
        ylim = c(0, 600),
        cex.names = 0.7,
        main = "Individual observations per fish species", 
        las = 1)

axis(side = 2,
     at = c(0, 300),
     col = "grey62",
     col.axis = "grey62",
     cex.axis = 0.8,
     las = 1)

axis(side = 4,
     at = c(0, FinalTable$CumFrequencies),
     col = "#D930DF",
     col.axis = "#D930DF",
     cex.axis = 0.8,
     las = 1)

mtext(side = 2,
      "Counts",
      line = -1) # extra

mtext(side = 4,
      "Cumulative Frequencies",
      line = -1) # extra

Task 17

par(mai=c(1,1,1,1))

barplot(FinalTable$Frequency,
        names.arg = FinalTable$Species,
        width = 1,
        space = 0.15,
        border = NA,
        axes = FALSE,
        ylim = c(0, 600),
        cex.names = 0.7,
        main = "Individual observations per fish species", 
        las = 1)

axis(side = 2,
     at = c(0, 300),
     col = "grey62",
     col.axis = "grey62",
     cex.axis = 0.8,
     las = 1)

axis(side = 4,
     at = c(0, FinalTable$CumFrequencies),
     col = "#D930DF",
     col.axis = "#D930DF",
     cex.axis = 0.8,
     las = 1)

mtext(side = 2,
      "Counts",
      line = -1)

mtext(side = 4,
      "Cumulative Frequencies",
      line = -1)

# Given
lines(FinalTable$CumFrequencies,
      type = "b",
      cex = 0.7,
      pch = 19,
      col = "cyan4")

Task 18

https://community.rstudio.com/t/boxplot-axis-and-text/52545/7

# ?boxplot

boxplot(bio$tl ~ bio$species,
        horizontal = FALSE, # default
        varwidth = TRUE,
        las = 1,
        main = "Total Length (width denotes relative count)",
        xlab = "Fish Species",
        ylab = "millimeters",
        col = 1:8,
        cex.axis = 0.5
        )