Ph.D. Cpurse Work -2024 on Quantitative Methods

Day 4 material

———————————————————————–

rm(list=ls())
#Get directory
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/Ph.D. 2024"
#Set directory
setwd("D:\\D Drive\\Ph.D. Course Work\\Ph.D. 2024\\Data")
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/Ph.D. 2024/Data"
# Load combined.csv file into R object
library(readr)
survey=read_csv("combined.csv")
## Rows: 34786 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): species_id, sex, genus, species, taxa, plot_type
## dbl (7): record_id, month, day, year, plot_id, hindfoot_length, weight
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#View() Returns contents of the object in a tabular form
View(survey)
#str() returns the structure of the object. 
str(survey)
## spc_tbl_ [34,786 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ record_id      : num [1:34786] 1 72 224 266 349 363 435 506 588 661 ...
##  $ month          : num [1:34786] 7 8 9 10 11 11 12 1 2 3 ...
##  $ day            : num [1:34786] 16 19 13 16 12 12 10 8 18 11 ...
##  $ year           : num [1:34786] 1977 1977 1977 1977 1977 ...
##  $ plot_id        : num [1:34786] 2 2 2 2 2 2 2 2 2 2 ...
##  $ species_id     : chr [1:34786] "NL" "NL" "NL" "NL" ...
##  $ sex            : chr [1:34786] "M" "M" NA NA ...
##  $ hindfoot_length: num [1:34786] 32 31 NA NA NA NA NA NA NA NA ...
##  $ weight         : num [1:34786] NA NA NA NA NA NA NA NA 218 NA ...
##  $ genus          : chr [1:34786] "Neotoma" "Neotoma" "Neotoma" "Neotoma" ...
##  $ species        : chr [1:34786] "albigula" "albigula" "albigula" "albigula" ...
##  $ taxa           : chr [1:34786] "Rodent" "Rodent" "Rodent" "Rodent" ...
##  $ plot_type      : chr [1:34786] "Control" "Control" "Control" "Control" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   record_id = col_double(),
##   ..   month = col_double(),
##   ..   day = col_double(),
##   ..   year = col_double(),
##   ..   plot_id = col_double(),
##   ..   species_id = col_character(),
##   ..   sex = col_character(),
##   ..   hindfoot_length = col_double(),
##   ..   weight = col_double(),
##   ..   genus = col_character(),
##   ..   species = col_character(),
##   ..   taxa = col_character(),
##   ..   plot_type = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
# Observe that the structure of the object is also present in the environment
# names() returns the column names of the data frame
names(survey)
##  [1] "record_id"       "month"           "day"             "year"           
##  [5] "plot_id"         "species_id"      "sex"             "hindfoot_length"
##  [9] "weight"          "genus"           "species"         "taxa"           
## [13] "plot_type"
# rownames() returns the row names of the data frame
#rownames(survey)
# dim() returns Returns a vector where 1st element is no of rows and 2nd element is no of columns.
dim(survey)
## [1] 34786    13
# nrow() returns the number of rows of data frame.
nrow(survey)
## [1] 34786
# ncol() returns the number of columns of data frame.
ncol(survey)
## [1] 13
# summary() returns summary statistics of each column of the data frame.
summary(survey)
##    record_id         month             day            year         plot_id     
##  Min.   :    1   Min.   : 1.000   Min.   : 1.0   Min.   :1977   Min.   : 1.00  
##  1st Qu.: 8964   1st Qu.: 4.000   1st Qu.: 9.0   1st Qu.:1984   1st Qu.: 5.00  
##  Median :17762   Median : 6.000   Median :16.0   Median :1990   Median :11.00  
##  Mean   :17804   Mean   : 6.474   Mean   :16.1   Mean   :1990   Mean   :11.34  
##  3rd Qu.:26655   3rd Qu.:10.000   3rd Qu.:23.0   3rd Qu.:1997   3rd Qu.:17.00  
##  Max.   :35548   Max.   :12.000   Max.   :31.0   Max.   :2002   Max.   :24.00  
##                                                                                
##   species_id            sex            hindfoot_length     weight      
##  Length:34786       Length:34786       Min.   : 2.00   Min.   :  4.00  
##  Class :character   Class :character   1st Qu.:21.00   1st Qu.: 20.00  
##  Mode  :character   Mode  :character   Median :32.00   Median : 37.00  
##                                        Mean   :29.29   Mean   : 42.67  
##                                        3rd Qu.:36.00   3rd Qu.: 48.00  
##                                        Max.   :70.00   Max.   :280.00  
##                                        NA's   :3348    NA's   :2503    
##     genus             species              taxa            plot_type        
##  Length:34786       Length:34786       Length:34786       Length:34786      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
## 
# Select species_id column for first 2 rows.
survey[1:2,6]
## # A tibble: 2 × 1
##   species_id
##   <chr>     
## 1 NL        
## 2 NL
# select all the male animals surveyed in the year 1980
survey[survey$sex=="M" & survey$year==1980,]
## # A tibble: 770 × 13
##    record_id month   day  year plot_id species_id sex   hindfoot_length weight
##        <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
##  1      2555     3     9  1980       2 DM         M                  NA     42
##  2      2622     3     9  1980       2 DM         M                  NA     43
##  3      2631     3     9  1980       2 DM         M                  NA     44
##  4      2643     3     9  1980       2 DM         M                  NA     43
##  5      2653     3     9  1980       2 DM         M                  NA     51
##  6      2662     3     9  1980       2 DM         M                  NA     47
##  7      2692     3     9  1980       2 DM         M                  NA     45
##  8      2695     3     9  1980       2 DM         M                  NA     45
##  9      2902     4    18  1980       2 DM         M                  NA     48
## 10      2906     4    18  1980       2 DM         M                  NA     43
## # ℹ 760 more rows
## # ℹ 4 more variables: genus <chr>, species <chr>, taxa <chr>, plot_type <chr>
# select the last row of the data

survey[nrow(survey),]
## # A tibble: 1 × 13
##   record_id month   day  year plot_id species_id sex   hindfoot_length weight
##       <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
## 1     30986     7     1  2000       7 PX         <NA>               NA     NA
## # ℹ 4 more variables: genus <chr>, species <chr>, taxa <chr>, plot_type <chr>
# select the middle row of the data.
survey[nrow(survey)/2,]
## # A tibble: 1 × 13
##   record_id month   day  year plot_id species_id sex   hindfoot_length weight
##       <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
## 1      9828     1    19  1985      14 AB         <NA>               NA     NA
## # ℹ 4 more variables: genus <chr>, species <chr>, taxa <chr>, plot_type <chr>
# observe the result of summary command on survey data frame
summary(survey)
##    record_id         month             day            year         plot_id     
##  Min.   :    1   Min.   : 1.000   Min.   : 1.0   Min.   :1977   Min.   : 1.00  
##  1st Qu.: 8964   1st Qu.: 4.000   1st Qu.: 9.0   1st Qu.:1984   1st Qu.: 5.00  
##  Median :17762   Median : 6.000   Median :16.0   Median :1990   Median :11.00  
##  Mean   :17804   Mean   : 6.474   Mean   :16.1   Mean   :1990   Mean   :11.34  
##  3rd Qu.:26655   3rd Qu.:10.000   3rd Qu.:23.0   3rd Qu.:1997   3rd Qu.:17.00  
##  Max.   :35548   Max.   :12.000   Max.   :31.0   Max.   :2002   Max.   :24.00  
##                                                                                
##   species_id            sex            hindfoot_length     weight      
##  Length:34786       Length:34786       Min.   : 2.00   Min.   :  4.00  
##  Class :character   Class :character   1st Qu.:21.00   1st Qu.: 20.00  
##  Mode  :character   Mode  :character   Median :32.00   Median : 37.00  
##                                        Mean   :29.29   Mean   : 42.67  
##                                        3rd Qu.:36.00   3rd Qu.: 48.00  
##                                        Max.   :70.00   Max.   :280.00  
##                                        NA's   :3348    NA's   :2503    
##     genus             species              taxa            plot_type        
##  Length:34786       Length:34786       Length:34786       Length:34786      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
## 
# Create factors for the variables taxa and genus
survey$taxa=as.factor(survey$taxa)
survey$genus=as.factor(survey$genus)
# observe the result of summary command on survey data frame and see the difference.
summary(survey)
##    record_id         month             day            year         plot_id     
##  Min.   :    1   Min.   : 1.000   Min.   : 1.0   Min.   :1977   Min.   : 1.00  
##  1st Qu.: 8964   1st Qu.: 4.000   1st Qu.: 9.0   1st Qu.:1984   1st Qu.: 5.00  
##  Median :17762   Median : 6.000   Median :16.0   Median :1990   Median :11.00  
##  Mean   :17804   Mean   : 6.474   Mean   :16.1   Mean   :1990   Mean   :11.34  
##  3rd Qu.:26655   3rd Qu.:10.000   3rd Qu.:23.0   3rd Qu.:1997   3rd Qu.:17.00  
##  Max.   :35548   Max.   :12.000   Max.   :31.0   Max.   :2002   Max.   :24.00  
##                                                                                
##   species_id            sex            hindfoot_length     weight      
##  Length:34786       Length:34786       Min.   : 2.00   Min.   :  4.00  
##  Class :character   Class :character   1st Qu.:21.00   1st Qu.: 20.00  
##  Mode  :character   Mode  :character   Median :32.00   Median : 37.00  
##                                        Mean   :29.29   Mean   : 42.67  
##                                        3rd Qu.:36.00   3rd Qu.: 48.00  
##                                        Max.   :70.00   Max.   :280.00  
##                                        NA's   :3348    NA's   :2503    
##              genus         species               taxa        plot_type        
##  Dipodomys      :16167   Length:34786       Bird   :  450   Length:34786      
##  Chaetodipus    : 6029   Class :character   Rabbit :   75   Class :character  
##  Onychomys      : 3267   Mode  :character   Reptile:   14   Mode  :character  
##  Reithrodontomys: 2694                      Rodent :34247                     
##  Peromyscus     : 2234                                                        
##  Perognathus    : 1629                                                        
##  (Other)        : 2766
# Create a data frame survey_200 containing only the row 200 of the survey dataset.
survey_200=survey[200,]
survey_200
## # A tibble: 1 × 13
##   record_id month   day  year plot_id species_id sex   hindfoot_length weight
##       <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
## 1     35212    12     7  2002       2 NL         M                  33    248
## # ℹ 4 more variables: genus <fct>, species <chr>, taxa <fct>, plot_type <chr>
# Combine nrow() and exclusion(-) notation to reproduce the behavior of head(survey).
survey_head=survey[-(7:nrow(survey)),]
survey_head
## # A tibble: 6 × 13
##   record_id month   day  year plot_id species_id sex   hindfoot_length weight
##       <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
## 1         1     7    16  1977       2 NL         M                  32     NA
## 2        72     8    19  1977       2 NL         M                  31     NA
## 3       224     9    13  1977       2 NL         <NA>               NA     NA
## 4       266    10    16  1977       2 NL         <NA>               NA     NA
## 5       349    11    12  1977       2 NL         <NA>               NA     NA
## 6       363    11    12  1977       2 NL         <NA>               NA     NA
## # ℹ 4 more variables: genus <fct>, species <chr>, taxa <fct>, plot_type <chr>
head(survey)
## # A tibble: 6 × 13
##   record_id month   day  year plot_id species_id sex   hindfoot_length weight
##       <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
## 1         1     7    16  1977       2 NL         M                  32     NA
## 2        72     8    19  1977       2 NL         M                  31     NA
## 3       224     9    13  1977       2 NL         <NA>               NA     NA
## 4       266    10    16  1977       2 NL         <NA>               NA     NA
## 5       349    11    12  1977       2 NL         <NA>               NA     NA
## 6       363    11    12  1977       2 NL         <NA>               NA     NA
## # ℹ 4 more variables: genus <fct>, species <chr>, taxa <fct>, plot_type <chr>
# How many rabbits were observed?
nrow(survey[survey$taxa=="Rabbit",])
## [1] 75
# How many different genera in the genus column.
unique(survey$genus)  
##  [1] Neotoma          Dipodomys        Perognathus      Peromyscus      
##  [5] Chaetodipus      Sigmodon         Onychomys        Spermophilus    
##  [9] Reithrodontomys  Sylvilagus       Ammospermophilus Amphispiza      
## [13] Calamospiza      Callipepla       Rodent           Pipilo          
## [17] Lizard           Baiomys          Campylorhynchus  Crotalus        
## [21] Sparrow          Pooecetes        Ammodramus       Zonotrichia     
## [25] Sceloporus       Cnemidophorus   
## 26 Levels: Ammodramus Ammospermophilus Amphispiza Baiomys ... Zonotrichia
#select 

View(survey)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#select verb: select(df,column1,column2,…)
select(survey, year, species_id)
## # A tibble: 34,786 × 2
##     year species_id
##    <dbl> <chr>     
##  1  1977 NL        
##  2  1977 NL        
##  3  1977 NL        
##  4  1977 NL        
##  5  1977 NL        
##  6  1977 NL        
##  7  1977 NL        
##  8  1978 NL        
##  9  1978 NL        
## 10  1978 NL        
## # ℹ 34,776 more rows
#filter verb : filter(df,filter condition)
filter(survey,sex=="M")
## # A tibble: 17,348 × 13
##    record_id month   day  year plot_id species_id sex   hindfoot_length weight
##        <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
##  1         1     7    16  1977       2 NL         M                  32     NA
##  2        72     8    19  1977       2 NL         M                  31     NA
##  3       588     2    18  1978       2 NL         M                  NA    218
##  4       845     5     6  1978       2 NL         M                  32    204
##  5       990     6     9  1978       2 NL         M                  NA    200
##  6      1164     8     5  1978       2 NL         M                  34    199
##  7      1261     9     4  1978       2 NL         M                  32    197
##  8      1453    11     5  1978       2 NL         M                  NA    218
##  9      1756     4    29  1979       2 NL         M                  33    166
## 10      1818     5    30  1979       2 NL         M                  32    184
## # ℹ 17,338 more rows
## # ℹ 4 more variables: genus <fct>, species <chr>, taxa <fct>, plot_type <chr>
#is.na() returns true if the column has NA values, otherwise false
#is.na(select(survey,sex))
#Create data frame survey1 using species_id , weight , hindfoot_length and year.
survey1=select(survey,species_id , weight , hindfoot_length, year)
#Create data frame survey2 containing the data where year > 1995 from survey1
survey2=filter(survey1,year>1995)
View(survey2)
#survey2 can be created by a single step by combining the previous two command together
survey2=filter((select(survey,species_id , weight , hindfoot_length, year)),year>1995)
View(survey2)
# pipe operator(%>%) can be used to simplify the above code.
survey2=survey%>%select(species_id , weight , hindfoot_length, year)%>%
  filter(year>1995)
View(survey2)
# drop_na() command deletes the NA values from the data frame
# drop_na() is difined under tidyr library
library(tidyr)
drop_na(survey)
## # A tibble: 30,676 × 13
##    record_id month   day  year plot_id species_id sex   hindfoot_length weight
##        <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
##  1       845     5     6  1978       2 NL         M                  32    204
##  2      1164     8     5  1978       2 NL         M                  34    199
##  3      1261     9     4  1978       2 NL         M                  32    197
##  4      1756     4    29  1979       2 NL         M                  33    166
##  5      1818     5    30  1979       2 NL         M                  32    184
##  6      1882     7     4  1979       2 NL         M                  32    206
##  7      2133    10    25  1979       2 NL         F                  33    274
##  8      2184    11    17  1979       2 NL         F                  30    186
##  9      2406     1    16  1980       2 NL         F                  33    184
## 10      3000     5    18  1980       2 NL         F                  31     87
## # ℹ 30,666 more rows
## # ℹ 4 more variables: genus <fct>, species <chr>, taxa <fct>, plot_type <chr>
#Create a new data frame surveyCln by deleting all NA values from survey
surveyCln=drop_na(survey)
# mutate verb: 
surveyCln%>%mutate(weight=weight/1000)
## # A tibble: 30,676 × 13
##    record_id month   day  year plot_id species_id sex   hindfoot_length weight
##        <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
##  1       845     5     6  1978       2 NL         M                  32  0.204
##  2      1164     8     5  1978       2 NL         M                  34  0.199
##  3      1261     9     4  1978       2 NL         M                  32  0.197
##  4      1756     4    29  1979       2 NL         M                  33  0.166
##  5      1818     5    30  1979       2 NL         M                  32  0.184
##  6      1882     7     4  1979       2 NL         M                  32  0.206
##  7      2133    10    25  1979       2 NL         F                  33  0.274
##  8      2184    11    17  1979       2 NL         F                  30  0.186
##  9      2406     1    16  1980       2 NL         F                  33  0.184
## 10      3000     5    18  1980       2 NL         F                  31  0.087
## # ℹ 30,666 more rows
## # ℹ 4 more variables: genus <fct>, species <chr>, taxa <fct>, plot_type <chr>
# arrange verb: 
surveyCln%>%arrange(year)
## # A tibble: 30,676 × 13
##    record_id month   day  year plot_id species_id sex   hindfoot_length weight
##        <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
##  1       226     9    13  1977       2 DM         M                  37     51
##  2       233     9    13  1977       2 DM         M                  25     44
##  3       245    10    16  1977       2 DM         M                  37     39
##  4       251    10    16  1977       2 DM         M                  36     49
##  5       257    10    16  1977       2 DM         M                  37     47
##  6       259    10    16  1977       2 DM         M                  36     41
##  7       268    10    16  1977       2 DM         F                  36     55
##  8       346    11    12  1977       2 DM         F                  37     36
##  9       350    11    12  1977       2 DM         M                  37     47
## 10       354    11    12  1977       2 DM         M                  38     44
## # ℹ 30,666 more rows
## # ℹ 4 more variables: genus <fct>, species <chr>, taxa <fct>, plot_type <chr>
surveyCln%>%arrange(desc(year))
## # A tibble: 30,676 × 13
##    record_id month   day  year plot_id species_id sex   hindfoot_length weight
##        <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
##  1     33415     2     9  2002       2 NL         M                  33    120
##  2     33583     3    13  2002       2 NL         M                  32    136
##  3     33586     3    13  2002       2 NL         F                  32    196
##  4     33847     4    17  2002       2 NL         F                  30    149
##  5     33966     5    15  2002       2 NL         F                  31    188
##  6     34198     6    15  2002       2 NL         F                  31    139
##  7     34783    10     5  2002       2 NL         M                  36    226
##  8     34991    11     9  2002       2 NL         M                  33    238
##  9     35212    12     7  2002       2 NL         M                  33    248
## 10     33329     1    12  2002       2 DM         M                  37     47
## # ℹ 30,666 more rows
## # ℹ 4 more variables: genus <fct>, species <chr>, taxa <fct>, plot_type <chr>
# group_by verb:
surveyCln%>%group_by(sex)
## # A tibble: 30,676 × 13
## # Groups:   sex [2]
##    record_id month   day  year plot_id species_id sex   hindfoot_length weight
##        <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>      <chr>           <dbl>  <dbl>
##  1       845     5     6  1978       2 NL         M                  32    204
##  2      1164     8     5  1978       2 NL         M                  34    199
##  3      1261     9     4  1978       2 NL         M                  32    197
##  4      1756     4    29  1979       2 NL         M                  33    166
##  5      1818     5    30  1979       2 NL         M                  32    184
##  6      1882     7     4  1979       2 NL         M                  32    206
##  7      2133    10    25  1979       2 NL         F                  33    274
##  8      2184    11    17  1979       2 NL         F                  30    186
##  9      2406     1    16  1980       2 NL         F                  33    184
## 10      3000     5    18  1980       2 NL         F                  31     87
## # ℹ 30,666 more rows
## # ℹ 4 more variables: genus <fct>, species <chr>, taxa <fct>, plot_type <chr>
# summarize verb:
surveyCln%>%group_by(sex)%>%summarize(meanw=mean(weight),                                   stdw=sd(weight))
## # A tibble: 2 × 3
##   sex   meanw  stdw
##   <chr> <dbl> <dbl>
## 1 F      41.5  36.2
## 2 M      42.1  35.2
#Load the file legal_weed_age_GSS2016_ch1.csv into R data frame.
mardf=read_csv("legal_weed_age_GSS2016_ch1.csv")
## Rows: 2867 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): grass, age
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Clean the data frame by executing the following rules.
#Redefine the column grass as factor.
mardf$grass=as.factor(mardf$grass)
summary(mardf)
##        grass          age           
##  DK       : 110   Length:2867       
##  IAP      : 911   Class :character  
##  LEGAL    :1126   Mode  :character  
##  NOT LEGAL: 717                     
##  NA's     :   3
#Update the grass values “DK” and “IAP” by NA.
   #Hint: Use if_else clase to update the values
   #Syntax: if_else(condition, value1,value2)
mardf_cln=mardf%>%mutate(grass=if_else(grass=="DK"|grass=="IAP",NA,grass))
summary(mardf_cln)
##        grass          age           
##  DK       :   0   Length:2867       
##  IAP      :   0   Class :character  
##  LEGAL    :1126   Mode  :character  
##  NOT LEGAL: 717                     
##  NA's     :1024
#Update the age value “89 OR OLDER” by “89”
mardf_cln=mardf_cln%>%mutate(age=if_else(age=="89 OR OLDER","89",age))
#Redefine the age column as numeric.
mardf_cln$age=as.numeric(mardf_cln$age)
#Create a column age_cat containing  4 categories <30, 30-59, 60-74, 75+ based on the values of age column.
  #Hint: Use cut function to split the age values into different levels.
  #syntax: cut(object, vector_cat,  vector_level)
mardf_cln=mardf_cln%>%mutate(age_cat=cut(age,c(-Inf,30,60,75,Inf),
                                         c("<30","30-59","60-74","75+")))
mardf_cln
## # A tibble: 2,867 × 3
##    grass       age age_cat
##    <fct>     <dbl> <fct>  
##  1 <NA>         47 30-59  
##  2 LEGAL        61 60-74  
##  3 NOT LEGAL    72 60-74  
##  4 <NA>         43 30-59  
##  5 LEGAL        55 30-59  
##  6 LEGAL        53 30-59  
##  7 <NA>         50 30-59  
##  8 NOT LEGAL    23 <30    
##  9 <NA>         45 30-59  
## 10 NOT LEGAL    71 60-74  
## # ℹ 2,857 more rows
#Execute the summary function on the cleaned data.
summary(mardf_cln)
##        grass           age         age_cat    
##  DK       :   0   Min.   :18.00   <30  : 535  
##  IAP      :   0   1st Qu.:34.00   30-59:1516  
##  LEGAL    :1126   Median :49.00   60-74: 564  
##  NOT LEGAL: 717   Mean   :49.16   75+  : 242  
##  NA's     :1024   3rd Qu.:62.00   NA's :  10  
##                   Max.   :89.00               
##                   NA's   :10