# Load in packages
library(tidyverse)
library(readr)
forest_data <- read_csv("C:/Users/djledbet/OneDrive - North Carolina State University/North Carolina State University PhD/Summer Position with Dr. RB and USFS - Summer 2026/Data analysis workshop/data/CnrForestArchiveTree20260210.csv")Introduction to R Workshop
What is R?
R is a free, open-source programming language specifically designed for statistical computing, data analysis, and graphics.
Workshop Agenda
The next 30 minutes we are going to go over how to install R and RStudio for:
- Window devices
- Apple devices (MacBooks).
Installation Instructions
Step 1: Install R
Go to the link below and after clicking on the link below you are able to see the steps on how to install R on your computer.
Step 2: Install RStudio
Notice A.1 (Below) this points you on how to go about downloading and installing R.
From there click on Download R and do so for your specific device.
After this is done and on your pc we will go back to the Hands-On Programming with R link and then scroll down until we see section A.3 RStudio (RStudio) from there we will click on download RStudio for free (Being free is one of R’s largest benefits and why it has grown in popularity in the last 2 decades). This will take you to a page — scroll down, click Download RStudio, and under RStudio IDE find your device and click download.
Now that we have R and RStudio on our computers we can start working with Data
Folder Organization
We are going to make a folder on our computers that will house the code, data, figures, and etc. This is probably the most important step because once you start working on multiple projects and time goes by it may be hard to remember where you placed your code/data — so naming your folders and tracking this is crucial.
Opening R
Once you open R you will see this page or something similar.
Things you should take note of on this screen
- The Console — where you will see the history of your code and what you have run
- The Environment — where you will see your data
- The Files — where you will be able to see what folder you are in and if your data is inside those folders
Now Lets Load in Some Data
Inside of Quarto, whenever you want to write you can just write normally, and whenever you want to code you have to make a “Code Chunk” which is done by going to the top bar > Code > Insert Chunk (or use the keyboard shortcut Ctrl + Alt + I on Windows, Cmd + Option + I on Mac).
After loading it in, notice how the Environment section populated with that data frame!
head(forest_data)# A tibble: 6 × 42
GlobalID PlotGUID Tally Species Species_Au Product_Ty Product__1 DBH
<chr> <chr> <dbl> <chr> <chr> <chr> <chr> <dbl>
1 {47fe8b98-27d7-… {78fbb3… 1 Loblol… <NA> PPW <NA> 17
2 {6922af46-85ba-… {78fbb3… 1 Loblol… <NA> PST <NA> 11
3 {ead5574b-0114-… {78fbb3… 1 Loblol… <NA> PST <NA> 16
4 {606ca9d3-237f-… {78fbb3… 1 Loblol… <NA> PST <NA> 14
5 {fa9311f6-a176-… {78fbb3… 1 Loblol… <NA> PST <NA> 17
6 {93b6596f-713d-… {78fbb3… 1 Loblol… <NA> PST <NA> 17
# ℹ 34 more variables: DBH_Audit <dbl>, Height <dbl>, Height_Aud <dbl>,
# Tot_Height <dbl>, Tot_Heig_1 <dbl>, Species_Co <lgl>, Species__1 <lgl>,
# Species_Gr <chr>, Species__2 <lgl>, Comment <chr>, Comment_Au <chr>,
# Tree_Quali <lgl>, Tree_Qua_1 <lgl>, Tree_Healt <lgl>, Tree_Hea_1 <lgl>,
# Crown_Leng <dbl>, Crown_Le_1 <dbl>, Bole_Heigh <dbl>, Bole_Hei_1 <dbl>,
# Tally_Audi <dbl>, Height_Tre <dbl>, Height_T_1 <dbl>, CreationDa <chr>,
# Creator <chr>, EditDate <chr>, Editor <chr>, Lean <dbl>, LeanAudit <dbl>, …
tail(forest_data)# A tibble: 6 × 42
GlobalID PlotGUID Tally Species Species_Au Product_Ty Product__1 DBH
<chr> <chr> <dbl> <chr> <chr> <chr> <chr> <dbl>
1 <NA> {b863d3c9-1770-… 0 Loblol… <NA> PCNS <NA> 9.10
2 <NA> {b863d3c9-1770-… 0 Loblol… <NA> PST <NA> 13.9
3 <NA> {b863d3c9-1770-… 0 Loblol… <NA> PST <NA> 15.4
4 <NA> {b863d3c9-1770-… 0 Loblol… <NA> PCNS <NA> 9.70
5 <NA> {b863d3c9-1770-… 0 Loblol… <NA> PPW <NA> 8.20
6 <NA> {b863d3c9-1770-… 0 Loblol… <NA> PCNS <NA> 10.7
# ℹ 34 more variables: DBH_Audit <dbl>, Height <dbl>, Height_Aud <dbl>,
# Tot_Height <dbl>, Tot_Heig_1 <dbl>, Species_Co <lgl>, Species__1 <lgl>,
# Species_Gr <chr>, Species__2 <lgl>, Comment <chr>, Comment_Au <chr>,
# Tree_Quali <lgl>, Tree_Qua_1 <lgl>, Tree_Healt <lgl>, Tree_Hea_1 <lgl>,
# Crown_Leng <dbl>, Crown_Le_1 <dbl>, Bole_Heigh <dbl>, Bole_Hei_1 <dbl>,
# Tally_Audi <dbl>, Height_Tre <dbl>, Height_T_1 <dbl>, CreationDa <chr>,
# Creator <chr>, EditDate <chr>, Editor <chr>, Lean <dbl>, LeanAudit <dbl>, …
summary(forest_data) GlobalID PlotGUID Tally Species
Length:2739 Length:2739 Min. :0.0000 Length:2739
Class :character Class :character 1st Qu.:0.0000 Class :character
Mode :character Mode :character Median :0.0000 Mode :character
Mean :0.3005
3rd Qu.:1.0000
Max. :5.0000
Species_Au Product_Ty Product__1 DBH
Length:2739 Length:2739 Length:2739 Min. : 0.00
Class :character Class :character Class :character 1st Qu.:10.30
Mode :character Mode :character Mode :character Median :12.70
Mean :12.47
3rd Qu.:14.60
Max. :33.00
DBH_Audit Height Height_Aud Tot_Height
Min. : 0.0000 Min. :0.000 Min. :0.0000 Min. : 0.00
1st Qu.: 0.0000 1st Qu.:2.000 1st Qu.:0.0000 1st Qu.: 0.00
Median : 0.0000 Median :2.500 Median :0.0000 Median : 46.00
Mean : 0.5991 Mean :2.525 Mean :0.1314 Mean : 36.48
3rd Qu.: 0.0000 3rd Qu.:3.000 3rd Qu.:0.0000 3rd Qu.: 71.00
Max. :17.2000 Max. :6.000 Max. :4.0000 Max. :109.00
Tot_Heig_1 Species_Co Species__1 Species_Gr
Min. : 0.000 Mode:logical Mode:logical Length:2739
1st Qu.: 0.000 NA's:2739 NA's:2739 Class :character
Median : 0.000 Mode :character
Mean : 1.544
3rd Qu.: 0.000
Max. :87.000
Species__2 Comment Comment_Au Tree_Quali
Mode:logical Length:2739 Length:2739 Mode:logical
NA's:2739 Class :character Class :character NA's:2739
Mode :character Mode :character
Tree_Qua_1 Tree_Healt Tree_Hea_1 Crown_Leng Crown_Le_1
Mode:logical Mode:logical Mode:logical Min. :0 Min. :0
NA's:2739 NA's:2739 NA's:2739 1st Qu.:0 1st Qu.:0
Median :0 Median :0
Mean :0 Mean :0
3rd Qu.:0 3rd Qu.:0
Max. :0 Max. :0
Bole_Heigh Bole_Hei_1 Tally_Audi Height_Tre Height_T_1
Min. :0 Min. :0 Min. :0 Min. :0 Min. : 0.00
1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.:0 1st Qu.: 0.00
Median :0 Median :0 Median :0 Median :0 Median : 0.00
Mean :0 Mean :0 Mean :0 Mean :0 Mean : 31.29
3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0 3rd Qu.: 68.00
Max. :0 Max. :0 Max. :0 Max. :0 Max. :109.00
CreationDa Creator EditDate Editor
Length:2739 Length:2739 Length:2739 Length:2739
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
Lean LeanAudit Fusiform FusiformAu LimitingDi
Min. :0 Min. :0 Mode:logical Mode:logical Min. :19.00
1st Qu.:0 1st Qu.:0 NA's:2739 NA's:2739 1st Qu.:27.10
Median :0 Median :0 Median :31.10
Mean :0 Mean :0 Mean :30.23
3rd Qu.:0 3rd Qu.:0 3rd Qu.:33.30
Max. :0 Max. :0 Max. :42.00
NA's :2617
PlotTypeId JobGUID JobName
Mode:logical Length:2739 Length:2739
NA's:2739 Class :character Class :character
Mode :character Mode :character
colnames(forest_data) [1] "GlobalID" "PlotGUID" "Tally" "Species" "Species_Au"
[6] "Product_Ty" "Product__1" "DBH" "DBH_Audit" "Height"
[11] "Height_Aud" "Tot_Height" "Tot_Heig_1" "Species_Co" "Species__1"
[16] "Species_Gr" "Species__2" "Comment" "Comment_Au" "Tree_Quali"
[21] "Tree_Qua_1" "Tree_Healt" "Tree_Hea_1" "Crown_Leng" "Crown_Le_1"
[26] "Bole_Heigh" "Bole_Hei_1" "Tally_Audi" "Height_Tre" "Height_T_1"
[31] "CreationDa" "Creator" "EditDate" "Editor" "Lean"
[36] "LeanAudit" "Fusiform" "FusiformAu" "LimitingDi" "PlotTypeId"
[41] "JobGUID" "JobName"
# How to remove a column
forest_data <- forest_data %>%
select(-PlotGUID)
# How to make a new dataframe with only the columns you want
clean_forest_data <- forest_data %>%
select(DBH, Tot_Height, Species)
clean_forest_data <- clean_forest_data %>%
filter(Tot_Height != 0)
unique_species <- clean_forest_data %>%
distinct(Species)
first_100_rows <- clean_forest_data %>%
slice_head(n = 100)