library (readr)
## Warning: package 'readr' was built under R version 4.0.3
Data1<-read_csv("EconData.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## meaning_code = col_character(),
## code = col_character(),
## Year = col_double(),
## Numbers = col_number(),
## sales = col_number(),
## payroll = col_number(),
## paid_employee = col_number()
## )
## Warning: 109 parsing failures.
## row col expected actual file
## 1475 sales a number D 'EconData.csv'
## 1475 payroll a number D 'EconData.csv'
## 1475 paid_employee a number i 'EconData.csv'
## 1477 sales a number D 'EconData.csv'
## 1477 payroll a number D 'EconData.csv'
## .... ............. ........ ...... ..............
## See problems(...) for more details.
library (dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Data2<-select(Data1, meaning_code:Numbers)
Data2
## # A tibble: 4,141 x 4
## meaning_code code Year Numbers
## <chr> <chr> <dbl> <dbl>
## 1 Mining, quarrying, and oil and gas extraction 21 2012 25417
## 2 Mining, quarrying, and oil and gas extraction 21 2007 22667
## 3 Oil and gas extraction 211 2012 6735
## 4 Oil and gas extraction 211 2007 6260
## 5 Oil and gas extraction 2111 2012 6735
## 6 Oil and gas extraction 2111 2007 6260
## 7 Oil and gas extraction 21111 2012 6735
## 8 Oil and gas extraction 21111 2007 6260
## 9 Crude petroleum and natural gas extraction 211111 2012 6398
## 10 Crude petroleum and natural gas extraction 211111 2007 5964
## # ... with 4,131 more rows
library(dplyr)
Data2 <- filter(Data1,
Year == "2007")
Data2
## # A tibble: 2,068 x 7
## meaning_code code Year Numbers sales payroll paid_employee
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mining, quarrying, and oi~ 21 2007 22667 4.14e8 4.07e7 730433
## 2 Oil and gas extraction 211 2007 6260 2.55e8 9.59e6 150443
## 3 Oil and gas extraction 2111 2007 6260 2.55e8 9.59e6 150443
## 4 Oil and gas extraction 21111 2007 6260 2.55e8 9.59e6 150443
## 5 Crude petroleum and natur~ 211111 2007 5964 2.13e8 9.09e6 143054
## 6 Natural gas liquid extrac~ 211112 2007 296 4.23e7 5.03e5 7389
## 7 Mining (except oil and ga~ 212 2007 6472 8.59e7 1.16e7 211377
## 8 Coal mining 2121 2007 1034 3.43e7 4.77e6 77435
## 9 Coal mining 21211 2007 1034 3.43e7 4.77e6 77435
## 10 Bituminous coal and ligni~ 212111 2007 587 1.85e7 2.30e6 37766
## # ... with 2,058 more rows
Data2 <- filter(Data1,
meaning_code == "Phosphate rock mining")
Data2
## # A tibble: 2 x 7
## meaning_code code Year Numbers sales payroll paid_employee
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Phosphate rock mining 212392 2012 11 2486447 159073 2510
## 2 Phosphate rock mining 212392 2007 7 997838 140696 1695
Data2 <- mutate(Data1,
Sales = sales * 0.001)
Data2
## # A tibble: 4,141 x 8
## meaning_code code Year Numbers sales payroll paid_employee Sales
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mining, quarrying, ~ 21 2012 25417 5.29e8 5.95e7 848189 5.29e5
## 2 Mining, quarrying, ~ 21 2007 22667 4.14e8 4.07e7 730433 4.14e5
## 3 Oil and gas extract~ 211 2012 6735 3.11e8 1.51e7 176222 3.11e5
## 4 Oil and gas extract~ 211 2007 6260 2.55e8 9.59e6 150443 2.55e5
## 5 Oil and gas extract~ 2111 2012 6735 3.11e8 1.51e7 176222 3.11e5
## 6 Oil and gas extract~ 2111 2007 6260 2.55e8 9.59e6 150443 2.55e5
## 7 Oil and gas extract~ 21111 2012 6735 3.11e8 1.51e7 176222 3.11e5
## 8 Oil and gas extract~ 21111 2007 6260 2.55e8 9.59e6 150443 2.55e5
## 9 Crude petroleum and~ 211111 2012 6398 2.71e8 1.39e7 161685 2.71e5
## 10 Crude petroleum and~ 211111 2007 5964 2.13e8 9.09e6 143054 2.13e5
## # ... with 4,131 more rows
Data2 <- mutate(Data1,
Sales = ifelse(sales > 300000000,
"Good_sales",
"Bad_sales"))
Data2
## # A tibble: 4,141 x 8
## meaning_code code Year Numbers sales payroll paid_employee Sales
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Mining, quarrying, ~ 21 2012 25417 5.29e8 5.95e7 848189 Good_~
## 2 Mining, quarrying, ~ 21 2007 22667 4.14e8 4.07e7 730433 Good_~
## 3 Oil and gas extract~ 211 2012 6735 3.11e8 1.51e7 176222 Good_~
## 4 Oil and gas extract~ 211 2007 6260 2.55e8 9.59e6 150443 Bad_s~
## 5 Oil and gas extract~ 2111 2012 6735 3.11e8 1.51e7 176222 Good_~
## 6 Oil and gas extract~ 2111 2007 6260 2.55e8 9.59e6 150443 Bad_s~
## 7 Oil and gas extract~ 21111 2012 6735 3.11e8 1.51e7 176222 Good_~
## 8 Oil and gas extract~ 21111 2007 6260 2.55e8 9.59e6 150443 Bad_s~
## 9 Crude petroleum and~ 211111 2012 6398 2.71e8 1.39e7 161685 Bad_s~
## 10 Crude petroleum and~ 211111 2007 5964 2.13e8 9.09e6 143054 Bad_s~
## # ... with 4,131 more rows
Data2 <- group_by(Data1, Year)
Data2 <-summarize(Data2,
mean_sale = mean(sales, na.rm=TRUE),
mean_pay = mean(payroll, na.rm=TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
Data2
## # A tibble: 3 x 3
## Year mean_sale mean_pay
## <dbl> <dbl> <dbl>
## 1 2007 69886815. 11337461.
## 2 2012 76661260. 12355697.
## 3 NA NaN NaN
Data2 <- Data1 %>%
filter(Year == "2007") %>%
group_by(meaning_code) %>%
summarize(mean = mean(sales, na.rm = TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
Data2
## # A tibble: 1,529 x 2
## meaning_code mean
## <chr> <dbl>
## 1 Abrasive product manufacturing 4.54e6
## 2 Accommodation 1.80e8
## 3 Accommodation and food services 6.14e8
## 4 Accounting, tax preparation, bookkeeping, and payroll services 1.14e8
## 5 Activities related to credit intermediation 7.03e7
## 6 Activities related to real estate 6.54e7
## 7 Adhesive manufacturing 1.05e7
## 8 Administrative and support and waste management and remediation ser~ 6.31e8
## 9 Administrative and support services 5.56e8
## 10 Administrative management and general management consulting services 6.80e7
## # ... with 1,519 more rows
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.0.3
long_data <- gather(Data1,
key="variable",
value="value",
code : payroll)
long_data
## # A tibble: 20,705 x 4
## meaning_code paid_employee variable value
## <chr> <dbl> <chr> <chr>
## 1 Mining, quarrying, and oil and gas extraction 848189 code 21
## 2 Mining, quarrying, and oil and gas extraction 730433 code 21
## 3 Oil and gas extraction 176222 code 211
## 4 Oil and gas extraction 150443 code 211
## 5 Oil and gas extraction 176222 code 2111
## 6 Oil and gas extraction 150443 code 2111
## 7 Oil and gas extraction 176222 code 21111
## 8 Oil and gas extraction 150443 code 21111
## 9 Crude petroleum and natural gas extraction 161685 code 211111
## 10 Crude petroleum and natural gas extraction 143054 code 211111
## # ... with 20,695 more rows
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.