Packages Used
library(readr)
library(dplyr)
library(tibble)
Data Import
county_spending <- read_csv("County_Spending.csv")
Data Cleaning
county_spending <- county_spending %>%
mutate(Department = factor(Department,levels = unique(Department)))
as_tibble(county_spending)
Data Description
ncol(county_spending) # gives no of columns in the data
## [1] 25
names(county_spending) # gives names of columns in the data
## [1] "Fiscal Year Period" "Fiscal Year"
## [3] "Service" "Department"
## [5] "Program" "Fund"
## [7] "Category" "Expense Category"
## [9] "Account Code" "Invoice Description"
## [11] "Vendor" "Vendor Number"
## [13] "Zip" "Contract Number"
## [15] "PO Number" "PO Line"
## [17] "Invoice Number" "Invoice Line"
## [19] "Invoice Distribution Line" "Amount"
## [21] "Invoice Date" "Payment Method"
## [23] "Payment Date" "Payment Number"
## [25] "Payment Status"
nrow(county_spending) # gives number of rows in the data
## [1] 837606
dim(county_spending) # gives number of rows and columns in the data
## [1] 837606 25
str(county_spending) # gives the structure of the data
## Classes 'tbl_df', 'tbl' and 'data.frame': 837606 obs. of 25 variables:
## $ Fiscal Year Period : int 6 11 8 12 6 6 10 7 6 5 ...
## $ Fiscal Year : int 2014 2014 2015 2014 2016 2016 2015 2014 2014 2014 ...
## $ Service : chr "General Government" "General Government" "General Government" "General Government" ...
## $ Department : Factor w/ 47 levels "Human Resources",..: 1 1 2 1 3 3 2 4 5 2 ...
## $ Program : chr "Health & Employee Welfare" "Health & Employee Welfare" "Warehouse Operations" "Health & Employee Welfare" ...
## $ Fund : chr "Employee Health Self Insurance" "Employee Health Self Insurance" "Liquor" "Employee Health Self Insurance" ...
## $ Category : chr "Operating Expenses" "Operating Expenses" "Not Defined" "Operating Expenses" ...
## $ Expense Category : chr "Insurance (Non-Fringe Benefits)" "Insurance (Non-Fringe Benefits)" "Liquor Purchases (DLC)" "Training and Education" ...
## $ Account Code : int 65624 65628 66010 64100 62016 63634 66010 60537 60076 66010 ...
## $ Invoice Description : chr "Actives - Claims" "Outside Agencies - Claims" "Beer Purchases Of Inventory" "Local Conference Related" ...
## $ Vendor : chr "GROUP HOSPITALIZATION AND MEDICAL SERVICES INC" "UNITED HEALTHCARE SERVICES INC" "BOSTON BEER CORPORATION" "ALLIANCE FOR WORKPLACE EXCELLENCE" ...
## $ Vendor Number : num 10759 54288 5645 40341 34954 ...
## $ Zip : chr "21117" "06103" "02210" "20884" ...
## $ Contract Number : chr NA NA NA NA ...
## $ PO Number : chr "1037952" "1042182" NA NA ...
## $ PO Line : num 1 3 NA NA 1 NA NA 1 NA NA ...
## $ Invoice Number : chr "121613PG52" "2014-05-19" "90722428-429" "7" ...
## $ Invoice Line : num 1 3 1 2 1 1 3 1 1 1 ...
## $ Invoice Distribution Line: num 1 1 1 1 1 1 1 1 1 1 ...
## $ Amount : num 1576281 25616 27394 195 148 ...
## $ Invoice Date : chr "12/16/2013" "05/19/2014" "02/11/2015" "06/13/2014" ...
## $ Payment Method : chr "EFT" "EFT" "EFT" "CHECK" ...
## $ Payment Date : chr "12/20/2013" "05/22/2014" "02/19/2015" "06/17/2014" ...
## $ Payment Number : num 368883 380202 403394 2053265 6009472 ...
## $ Payment Status : chr "RECONCILED" "RECONCILED" "RECONCILED" "RECONCILED" ...
head(county_spending) # gives the first 6 rows in the data
tail(county_spending) # gives the first 6 columns in the data
sum(is.na(county_spending)) # gives the number of NAs present in the data
## [1] 1862510