INTRODUCTION

Data consists of Employment information in the United Stations: Private Firms, Establishments, Employment, Annual Payroll and Receipts by Firm Size based on number of employees, 1988-2014
Source: U.S. Small Business Administration

PREPARE

library(knitr)
library(stringr)
library(ggplot2)
library(DT)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

LOAD

Read the raw data file from CSV into R

employment_raw <- read.csv("~/Desktop/employmentraw.csv", header = TRUE, stringsAsFactors = FALSE, check.names = FALSE)
employment_raw[1:10,1:5]
##     Item Year Non-employers    Totals     0 *
## 1  Firms 2014    23,836,937 5,825,458 N.A.   
## 2        2013    23,005,620 5,775,055 N.A.   
## 3        2012    22,735,915 5,726,160 N.A.   
## 4        2011    22,491,080 5,684,424 N.A.   
## 5        2010    22,110,628 5,734,538 N.A.   
## 6        2009    21,695,828 5,767,306 N.A.   
## 7        2008    21,351,320 5,930,132 N.A.   
## 8        2007    21,708,021 6,049,655 N.A.   
## 9        2006    20,768,555 6,022,127 794,622
## 10       2005    20,392,068 5,983,546 823,832

TIDY

Eliminate rows without data

employment_data <- employment_raw[rowSums(is.na(employment_raw)) == 0,]
datatable(employment_data)

Fill in empty values in the column Item with row above

employment_fill <- employment_data
employment_fill$Item[employment_fill$Item == ""] <- NA
employment_fill$Item <- na.locf(employment_fill$Item)
datatable(employment_fill)

ANALYZE

Subset data to just look at one year to compare the contributions to totals from different business sizes.

employment_2012 <- employment_fill[employment_fill$Year == 2012,]
datatable(employment_2012)