Packages Used

library(readr)
library(dplyr)
library(tibble)

Data Import

county_spending <- read_csv("County_Spending.csv")

Data Cleaning

county_spending <- county_spending %>% 
mutate(Department = factor(Department,levels = unique(Department)))
as_tibble(county_spending)

Data Description

ncol(county_spending) # gives no of columns in the data 
## [1] 25
names(county_spending) # gives names of columns in the data
##  [1] "Fiscal Year Period"        "Fiscal Year"              
##  [3] "Service"                   "Department"               
##  [5] "Program"                   "Fund"                     
##  [7] "Category"                  "Expense Category"         
##  [9] "Account Code"              "Invoice Description"      
## [11] "Vendor"                    "Vendor Number"            
## [13] "Zip"                       "Contract Number"          
## [15] "PO Number"                 "PO Line"                  
## [17] "Invoice Number"            "Invoice Line"             
## [19] "Invoice Distribution Line" "Amount"                   
## [21] "Invoice Date"              "Payment Method"           
## [23] "Payment Date"              "Payment Number"           
## [25] "Payment Status"
nrow(county_spending) # gives number of rows in the data
## [1] 837606
dim(county_spending) # gives number of rows and columns in the data
## [1] 837606     25
str(county_spending) # gives the structure of the data
## Classes 'tbl_df', 'tbl' and 'data.frame':    837606 obs. of  25 variables:
##  $ Fiscal Year Period       : int  6 11 8 12 6 6 10 7 6 5 ...
##  $ Fiscal Year              : int  2014 2014 2015 2014 2016 2016 2015 2014 2014 2014 ...
##  $ Service                  : chr  "General Government" "General Government" "General Government" "General Government" ...
##  $ Department               : Factor w/ 47 levels "Human Resources",..: 1 1 2 1 3 3 2 4 5 2 ...
##  $ Program                  : chr  "Health & Employee Welfare" "Health & Employee Welfare" "Warehouse Operations" "Health & Employee Welfare" ...
##  $ Fund                     : chr  "Employee Health Self Insurance" "Employee Health Self Insurance" "Liquor" "Employee Health Self Insurance" ...
##  $ Category                 : chr  "Operating Expenses" "Operating Expenses" "Not Defined" "Operating Expenses" ...
##  $ Expense Category         : chr  "Insurance (Non-Fringe Benefits)" "Insurance (Non-Fringe Benefits)" "Liquor Purchases (DLC)" "Training and Education" ...
##  $ Account Code             : int  65624 65628 66010 64100 62016 63634 66010 60537 60076 66010 ...
##  $ Invoice Description      : chr  "Actives - Claims" "Outside Agencies - Claims" "Beer Purchases Of Inventory" "Local Conference Related" ...
##  $ Vendor                   : chr  "GROUP HOSPITALIZATION AND MEDICAL SERVICES INC" "UNITED HEALTHCARE SERVICES INC" "BOSTON BEER CORPORATION" "ALLIANCE FOR WORKPLACE EXCELLENCE" ...
##  $ Vendor Number            : num  10759 54288 5645 40341 34954 ...
##  $ Zip                      : chr  "21117" "06103" "02210" "20884" ...
##  $ Contract Number          : chr  NA NA NA NA ...
##  $ PO Number                : chr  "1037952" "1042182" NA NA ...
##  $ PO Line                  : num  1 3 NA NA 1 NA NA 1 NA NA ...
##  $ Invoice Number           : chr  "121613PG52" "2014-05-19" "90722428-429" "7" ...
##  $ Invoice Line             : num  1 3 1 2 1 1 3 1 1 1 ...
##  $ Invoice Distribution Line: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Amount                   : num  1576281 25616 27394 195 148 ...
##  $ Invoice Date             : chr  "12/16/2013" "05/19/2014" "02/11/2015" "06/13/2014" ...
##  $ Payment Method           : chr  "EFT" "EFT" "EFT" "CHECK" ...
##  $ Payment Date             : chr  "12/20/2013" "05/22/2014" "02/19/2015" "06/17/2014" ...
##  $ Payment Number           : num  368883 380202 403394 2053265 6009472 ...
##  $ Payment Status           : chr  "RECONCILED" "RECONCILED" "RECONCILED" "RECONCILED" ...
head(county_spending) # gives the first 6 rows in the data
tail(county_spending) # gives the first 6 columns in the data
sum(is.na(county_spending)) # gives the number of NAs present in the data
## [1] 1862510