# excel file
data <- read_excel("../00_data/myData.xlsx")
data
## # A tibble: 1,222 × 10
## year months state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 2015 January-… Alab… 7000 7000 1800 26 2800 250 4
## 2 2015 January-… Ariz… 35000 35000 4600 13 3400 2100 6
## 3 2015 January-… Arka… 13000 14000 1500 11 1200 90 1
## 4 2015 January-… Cali… 1440000 1690000 255000 15 250000 124000 7
## 5 2015 January-… Colo… 3500 12500 1500 12 200 140 1
## 6 2015 January-… Conn… 3900 3900 870 22 290 NA NA
## 7 2015 January-… Flor… 305000 315000 42000 13 54000 25000 8
## 8 2015 January-… Geor… 104000 105000 14500 14 47000 9500 9
## 9 2015 January-… Hawa… 10500 10500 380 4 3400 760 7
## 10 2015 January-… Idaho 81000 88000 3700 4 2600 8000 9
## # … with 1,212 more rows, and abbreviated variable names ¹colony_n,
## # ²colony_max, ³colony_lost, ⁴colony_lost_pct, ⁵colony_added, ⁶colony_reno,
## # ⁷colony_reno_pct
What is the effect of diseases on bee colonies?
#Filter
filter(data, year == "2021")
## # A tibble: 94 × 10
## year months state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 2021 January-… Alab… 10000 10000 1200 12 670 200 2
## 2 2021 January-… Ariz… 29000 29000 7000 24 3500 180 1
## 3 2021 January-… Arka… 21000 21000 2200 10 790 130 1
## 4 2021 January-… Cali… 1240000 1550000 193000 12 76000 84000 5
## 5 2021 January-… Colo… 3000 16500 640 4 850 30 NA
## 6 2021 January-… Conn… 4200 4200 780 19 350 50 1
## 7 2021 January-… Flor… 300000 305000 31000 10 42000 17000 6
## 8 2021 January-… Geor… 120000 120000 20000 17 34000 14500 12
## 9 2021 January-… Hawa… 19500 19500 900 5 2400 1600 8
## 10 2021 January-… Idaho 168000 171000 15000 9 9000 8500 5
## # … with 84 more rows, and abbreviated variable names ¹colony_n, ²colony_max,
## # ³colony_lost, ⁴colony_lost_pct, ⁵colony_added, ⁶colony_reno,
## # ⁷colony_reno_pct
#Arrange
arrange(data, colony_lost)
## # A tibble: 1,222 × 10
## year months state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 2016 April-Ju… Verm… 5000 6500 20 NA 480 30 NA
## 2 2020 April-Ju… Conn… 3900 4300 30 1 1200 480 11
## 3 2015 July-Sep… Verm… 6000 6500 40 1 30 20 NA
## 4 2020 January-… New … 6000 6000 40 1 120 NA NA
## 5 2020 April-Ju… Hawa… 15500 15500 40 NA 240 2300 15
## 6 2021 April-Ju… Verm… 7000 7000 40 1 770 570 8
## 7 2015 July-Sep… Conn… 3700 3700 50 1 190 440 12
## 8 2018 April-Ju… New … 1900 4300 50 1 290 450 10
## 9 2019 January-… Okla… 21000 55000 50 NA 20 NA NA
## 10 2020 April-Ju… New … 3100 6000 50 1 250 60 1
## # … with 1,212 more rows, and abbreviated variable names ¹colony_n,
## # ²colony_max, ³colony_lost, ⁴colony_lost_pct, ⁵colony_added, ⁶colony_reno,
## # ⁷colony_reno_pct
#Select
select(data, year, months, state)
## # A tibble: 1,222 × 3
## year months state
## <dbl> <chr> <chr>
## 1 2015 January-March Alabama
## 2 2015 January-March Arizona
## 3 2015 January-March Arkansas
## 4 2015 January-March California
## 5 2015 January-March Colorado
## 6 2015 January-March Connecticut
## 7 2015 January-March Florida
## 8 2015 January-March Georgia
## 9 2015 January-March Hawaii
## 10 2015 January-March Idaho
## # … with 1,212 more rows
#Add a new column
mutate(data,
total = colony_n - colony_lost)
## # A tibble: 1,222 × 11
## year months state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 2015 January-… Alab… 7000 7000 1800 26 2800 250 4
## 2 2015 January-… Ariz… 35000 35000 4600 13 3400 2100 6
## 3 2015 January-… Arka… 13000 14000 1500 11 1200 90 1
## 4 2015 January-… Cali… 1440000 1690000 255000 15 250000 124000 7
## 5 2015 January-… Colo… 3500 12500 1500 12 200 140 1
## 6 2015 January-… Conn… 3900 3900 870 22 290 NA NA
## 7 2015 January-… Flor… 305000 315000 42000 13 54000 25000 8
## 8 2015 January-… Geor… 104000 105000 14500 14 47000 9500 9
## 9 2015 January-… Hawa… 10500 10500 380 4 3400 760 7
## 10 2015 January-… Idaho 81000 88000 3700 4 2600 8000 9
## # … with 1,212 more rows, 1 more variable: total <dbl>, and abbreviated
## # variable names ¹colony_n, ²colony_max, ³colony_lost, ⁴colony_lost_pct,
## # ⁵colony_added, ⁶colony_reno, ⁷colony_reno_pct
#Summarize
summarise(data, total = mean(colony_lost, na.rm = TRUE))
## # A tibble: 1 × 1
## total
## <dbl>
## 1 16551.