1.2.1 Selecting Variables

library (readr)
## Warning: package 'readr' was built under R version 4.0.3
Data1<-read_csv("EconData.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   meaning_code = col_character(),
##   code = col_character(),
##   Year = col_double(),
##   Numbers = col_number(),
##   sales = col_number(),
##   payroll = col_number(),
##   paid_employee = col_number()
## )
## Warning: 109 parsing failures.
##  row           col expected actual           file
## 1475 sales         a number      D 'EconData.csv'
## 1475 payroll       a number      D 'EconData.csv'
## 1475 paid_employee a number      i 'EconData.csv'
## 1477 sales         a number      D 'EconData.csv'
## 1477 payroll       a number      D 'EconData.csv'
## .... ............. ........ ...... ..............
## See problems(...) for more details.
library (dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Data2<-select(Data1, meaning_code:Numbers)
Data2
## # A tibble: 4,141 x 4
##    meaning_code                                  code    Year Numbers
##    <chr>                                         <chr>  <dbl>   <dbl>
##  1 Mining, quarrying, and oil and gas extraction 21      2012   25417
##  2 Mining, quarrying, and oil and gas extraction 21      2007   22667
##  3 Oil and gas extraction                        211     2012    6735
##  4 Oil and gas extraction                        211     2007    6260
##  5 Oil and gas extraction                        2111    2012    6735
##  6 Oil and gas extraction                        2111    2007    6260
##  7 Oil and gas extraction                        21111   2012    6735
##  8 Oil and gas extraction                        21111   2007    6260
##  9 Crude petroleum and natural gas extraction    211111  2012    6398
## 10 Crude petroleum and natural gas extraction    211111  2007    5964
## # ... with 4,131 more rows

1.2.2.1 Selecting Observations

library(dplyr)
Data2 <- filter(Data1, 
              Year == "2007")
Data2
## # A tibble: 2,068 x 7
##    meaning_code               code    Year Numbers   sales payroll paid_employee
##    <chr>                      <chr>  <dbl>   <dbl>   <dbl>   <dbl>         <dbl>
##  1 Mining, quarrying, and oi~ 21      2007   22667  4.14e8  4.07e7        730433
##  2 Oil and gas extraction     211     2007    6260  2.55e8  9.59e6        150443
##  3 Oil and gas extraction     2111    2007    6260  2.55e8  9.59e6        150443
##  4 Oil and gas extraction     21111   2007    6260  2.55e8  9.59e6        150443
##  5 Crude petroleum and natur~ 211111  2007    5964  2.13e8  9.09e6        143054
##  6 Natural gas liquid extrac~ 211112  2007     296  4.23e7  5.03e5          7389
##  7 Mining (except oil and ga~ 212     2007    6472  8.59e7  1.16e7        211377
##  8 Coal mining                2121    2007    1034  3.43e7  4.77e6         77435
##  9 Coal mining                21211   2007    1034  3.43e7  4.77e6         77435
## 10 Bituminous coal and ligni~ 212111  2007     587  1.85e7  2.30e6         37766
## # ... with 2,058 more rows

1.2.2.2

Data2 <- filter(Data1,
                meaning_code == "Phosphate rock mining")
Data2
## # A tibble: 2 x 7
##   meaning_code          code    Year Numbers   sales payroll paid_employee
##   <chr>                 <chr>  <dbl>   <dbl>   <dbl>   <dbl>         <dbl>
## 1 Phosphate rock mining 212392  2012      11 2486447  159073          2510
## 2 Phosphate rock mining 212392  2007       7  997838  140696          1695

1.2.3 Creating/Recoding Data

Data2 <- mutate(Data1, 
                Sales = sales * 0.001)
Data2
## # A tibble: 4,141 x 8
##    meaning_code         code    Year Numbers  sales payroll paid_employee  Sales
##    <chr>                <chr>  <dbl>   <dbl>  <dbl>   <dbl>         <dbl>  <dbl>
##  1 Mining, quarrying, ~ 21      2012   25417 5.29e8  5.95e7        848189 5.29e5
##  2 Mining, quarrying, ~ 21      2007   22667 4.14e8  4.07e7        730433 4.14e5
##  3 Oil and gas extract~ 211     2012    6735 3.11e8  1.51e7        176222 3.11e5
##  4 Oil and gas extract~ 211     2007    6260 2.55e8  9.59e6        150443 2.55e5
##  5 Oil and gas extract~ 2111    2012    6735 3.11e8  1.51e7        176222 3.11e5
##  6 Oil and gas extract~ 2111    2007    6260 2.55e8  9.59e6        150443 2.55e5
##  7 Oil and gas extract~ 21111   2012    6735 3.11e8  1.51e7        176222 3.11e5
##  8 Oil and gas extract~ 21111   2007    6260 2.55e8  9.59e6        150443 2.55e5
##  9 Crude petroleum and~ 211111  2012    6398 2.71e8  1.39e7        161685 2.71e5
## 10 Crude petroleum and~ 211111  2007    5964 2.13e8  9.09e6        143054 2.13e5
## # ... with 4,131 more rows
Data2 <- mutate(Data1,
                Sales = ifelse(sales > 300000000,
                               "Good_sales",
                               "Bad_sales"))
Data2
## # A tibble: 4,141 x 8
##    meaning_code         code    Year Numbers  sales payroll paid_employee Sales 
##    <chr>                <chr>  <dbl>   <dbl>  <dbl>   <dbl>         <dbl> <chr> 
##  1 Mining, quarrying, ~ 21      2012   25417 5.29e8  5.95e7        848189 Good_~
##  2 Mining, quarrying, ~ 21      2007   22667 4.14e8  4.07e7        730433 Good_~
##  3 Oil and gas extract~ 211     2012    6735 3.11e8  1.51e7        176222 Good_~
##  4 Oil and gas extract~ 211     2007    6260 2.55e8  9.59e6        150443 Bad_s~
##  5 Oil and gas extract~ 2111    2012    6735 3.11e8  1.51e7        176222 Good_~
##  6 Oil and gas extract~ 2111    2007    6260 2.55e8  9.59e6        150443 Bad_s~
##  7 Oil and gas extract~ 21111   2012    6735 3.11e8  1.51e7        176222 Good_~
##  8 Oil and gas extract~ 21111   2007    6260 2.55e8  9.59e6        150443 Bad_s~
##  9 Crude petroleum and~ 211111  2012    6398 2.71e8  1.39e7        161685 Bad_s~
## 10 Crude petroleum and~ 211111  2007    5964 2.13e8  9.09e6        143054 Bad_s~
## # ... with 4,131 more rows

1.2.4 Summarizing Data

Data2 <- group_by(Data1, Year)
Data2 <-summarize(Data2,
                  mean_sale = mean(sales, na.rm=TRUE),
                  mean_pay = mean(payroll, na.rm=TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
Data2
## # A tibble: 3 x 3
##    Year mean_sale  mean_pay
##   <dbl>     <dbl>     <dbl>
## 1  2007 69886815. 11337461.
## 2  2012 76661260. 12355697.
## 3    NA      NaN       NaN

1.2.5 Using Pipes

Data2 <- Data1 %>%
  filter(Year == "2007") %>%
  group_by(meaning_code) %>%
  summarize(mean = mean(sales, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
Data2
## # A tibble: 1,529 x 2
##    meaning_code                                                             mean
##    <chr>                                                                   <dbl>
##  1 Abrasive product manufacturing                                         4.54e6
##  2 Accommodation                                                          1.80e8
##  3 Accommodation and food services                                        6.14e8
##  4 Accounting, tax preparation, bookkeeping, and payroll services         1.14e8
##  5 Activities related to credit intermediation                            7.03e7
##  6 Activities related to real estate                                      6.54e7
##  7 Adhesive manufacturing                                                 1.05e7
##  8 Administrative and support and waste management and remediation ser~   6.31e8
##  9 Administrative and support services                                    5.56e8
## 10 Administrative management and general management consulting services   6.80e7
## # ... with 1,519 more rows

1.2.6 Reshaping Data

library(tidyr)
## Warning: package 'tidyr' was built under R version 4.0.3
long_data <- gather(Data1,
                    key="variable",
                    value="value",
                    code : payroll)
long_data
## # A tibble: 20,705 x 4
##    meaning_code                                  paid_employee variable value 
##    <chr>                                                 <dbl> <chr>    <chr> 
##  1 Mining, quarrying, and oil and gas extraction        848189 code     21    
##  2 Mining, quarrying, and oil and gas extraction        730433 code     21    
##  3 Oil and gas extraction                               176222 code     211   
##  4 Oil and gas extraction                               150443 code     211   
##  5 Oil and gas extraction                               176222 code     2111  
##  6 Oil and gas extraction                               150443 code     2111  
##  7 Oil and gas extraction                               176222 code     21111 
##  8 Oil and gas extraction                               150443 code     21111 
##  9 Crude petroleum and natural gas extraction           161685 code     211111
## 10 Crude petroleum and natural gas extraction           143054 code     211111
## # ... with 20,695 more rows

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.