Import data

# excel file
data <- read_excel("../00_data/myData.xlsx")
data
## # A tibble: 1,222 × 10
##     year months    state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
##    <dbl> <chr>     <chr>   <dbl> <chr>     <dbl>   <dbl> <chr>   <chr>   <chr>  
##  1  2015 January-… Alab…    7000 7000       1800      26 2800    250     4      
##  2  2015 January-… Ariz…   35000 35000      4600      13 3400    2100    6      
##  3  2015 January-… Arka…   13000 14000      1500      11 1200    90      1      
##  4  2015 January-… Cali… 1440000 1690000  255000      15 250000  124000  7      
##  5  2015 January-… Colo…    3500 12500      1500      12 200     140     1      
##  6  2015 January-… Conn…    3900 3900        870      22 290     NA      NA     
##  7  2015 January-… Flor…  305000 315000    42000      13 54000   25000   8      
##  8  2015 January-… Geor…  104000 105000    14500      14 47000   9500    9      
##  9  2015 January-… Hawa…   10500 10500       380       4 3400    760     7      
## 10  2015 January-… Idaho   81000 88000      3700       4 2600    8000    9      
## # … with 1,212 more rows, and abbreviated variable names ¹​colony_n,
## #   ²​colony_max, ³​colony_lost, ⁴​colony_lost_pct, ⁵​colony_added, ⁶​colony_reno,
## #   ⁷​colony_reno_pct

State one Question

What is the effect of diseases on bee colonies?

#Filter

filter(data, year == "2021")
## # A tibble: 94 × 10
##     year months    state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
##    <dbl> <chr>     <chr>   <dbl> <chr>     <dbl>   <dbl> <chr>   <chr>   <chr>  
##  1  2021 January-… Alab…   10000 10000      1200      12 670     200     2      
##  2  2021 January-… Ariz…   29000 29000      7000      24 3500    180     1      
##  3  2021 January-… Arka…   21000 21000      2200      10 790     130     1      
##  4  2021 January-… Cali… 1240000 1550000  193000      12 76000   84000   5      
##  5  2021 January-… Colo…    3000 16500       640       4 850     30      NA     
##  6  2021 January-… Conn…    4200 4200        780      19 350     50      1      
##  7  2021 January-… Flor…  300000 305000    31000      10 42000   17000   6      
##  8  2021 January-… Geor…  120000 120000    20000      17 34000   14500   12     
##  9  2021 January-… Hawa…   19500 19500       900       5 2400    1600    8      
## 10  2021 January-… Idaho  168000 171000    15000       9 9000    8500    5      
## # … with 84 more rows, and abbreviated variable names ¹​colony_n, ²​colony_max,
## #   ³​colony_lost, ⁴​colony_lost_pct, ⁵​colony_added, ⁶​colony_reno,
## #   ⁷​colony_reno_pct

#Arrange

arrange(data, colony_lost)
## # A tibble: 1,222 × 10
##     year months    state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
##    <dbl> <chr>     <chr>   <dbl> <chr>     <dbl>   <dbl> <chr>   <chr>   <chr>  
##  1  2016 April-Ju… Verm…    5000 6500         20      NA 480     30      NA     
##  2  2020 April-Ju… Conn…    3900 4300         30       1 1200    480     11     
##  3  2015 July-Sep… Verm…    6000 6500         40       1 30      20      NA     
##  4  2020 January-… New …    6000 6000         40       1 120     NA      NA     
##  5  2020 April-Ju… Hawa…   15500 15500        40      NA 240     2300    15     
##  6  2021 April-Ju… Verm…    7000 7000         40       1 770     570     8      
##  7  2015 July-Sep… Conn…    3700 3700         50       1 190     440     12     
##  8  2018 April-Ju… New …    1900 4300         50       1 290     450     10     
##  9  2019 January-… Okla…   21000 55000        50      NA 20      NA      NA     
## 10  2020 April-Ju… New …    3100 6000         50       1 250     60      1      
## # … with 1,212 more rows, and abbreviated variable names ¹​colony_n,
## #   ²​colony_max, ³​colony_lost, ⁴​colony_lost_pct, ⁵​colony_added, ⁶​colony_reno,
## #   ⁷​colony_reno_pct

#Select

select(data, year, months, state)
## # A tibble: 1,222 × 3
##     year months        state      
##    <dbl> <chr>         <chr>      
##  1  2015 January-March Alabama    
##  2  2015 January-March Arizona    
##  3  2015 January-March Arkansas   
##  4  2015 January-March California 
##  5  2015 January-March Colorado   
##  6  2015 January-March Connecticut
##  7  2015 January-March Florida    
##  8  2015 January-March Georgia    
##  9  2015 January-March Hawaii     
## 10  2015 January-March Idaho      
## # … with 1,212 more rows

#Add a new column

mutate(data,
  total = colony_n - colony_lost)
## # A tibble: 1,222 × 11
##     year months    state colon…¹ colon…² colon…³ colon…⁴ colon…⁵ colon…⁶ colon…⁷
##    <dbl> <chr>     <chr>   <dbl> <chr>     <dbl>   <dbl> <chr>   <chr>   <chr>  
##  1  2015 January-… Alab…    7000 7000       1800      26 2800    250     4      
##  2  2015 January-… Ariz…   35000 35000      4600      13 3400    2100    6      
##  3  2015 January-… Arka…   13000 14000      1500      11 1200    90      1      
##  4  2015 January-… Cali… 1440000 1690000  255000      15 250000  124000  7      
##  5  2015 January-… Colo…    3500 12500      1500      12 200     140     1      
##  6  2015 January-… Conn…    3900 3900        870      22 290     NA      NA     
##  7  2015 January-… Flor…  305000 315000    42000      13 54000   25000   8      
##  8  2015 January-… Geor…  104000 105000    14500      14 47000   9500    9      
##  9  2015 January-… Hawa…   10500 10500       380       4 3400    760     7      
## 10  2015 January-… Idaho   81000 88000      3700       4 2600    8000    9      
## # … with 1,212 more rows, 1 more variable: total <dbl>, and abbreviated
## #   variable names ¹​colony_n, ²​colony_max, ³​colony_lost, ⁴​colony_lost_pct,
## #   ⁵​colony_added, ⁶​colony_reno, ⁷​colony_reno_pct

#Summarize

summarise(data, total = mean(colony_lost, na.rm = TRUE))
## # A tibble: 1 × 1
##    total
##    <dbl>
## 1 16551.