Environment setup
library(stringr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.4.3
library(knitr)
library(tibble)
## Warning: package 'tibble' was built under R version 3.4.3
library(ggplot2)

For 1st data set i have picked World population data from discussion by steven which has data set showing the population of the world’s countries from 1980 to 2010

1 Data - World Population

Loading Data set for World population
countryData <- read.csv(file="https://raw.githubusercontent.com/Harpreet1984/DATA607/master/populationbycountry19802010millions.csv", header=TRUE, sep=",")
countryDataTidy <-  countryData %>% gather ("Year", "population", 2:32) #using gather()
## Warning: attributes are not identical across measure variables;
## they will be dropped
head(countryDataTidy)
##                           X  Year population
## 1             North America X1980  320.27638
## 2                   Bermuda X1980    0.05473
## 3                    Canada X1980    24.5933
## 4                 Greenland X1980    0.05021
## 5                    Mexico X1980   68.34748
## 6 Saint Pierre and Miquelon X1980    0.00599
Tidying Data

Removing all the rows that have NA and – as populations group data based on country. Calculate percentage change for the entire duration for each country.

countryDataFilteredGrouped <- countryDataTidy %>% filter (!population %in% c("--", NA) ) %>% group_by(X) %>% summarise( populationChange = (last(as.numeric(population)) - first(as.numeric(population)))/first(as.numeric(population)))
countryDataFilteredGrouped
## # A tibble: 229 x 2
##    X                   populationChange
##    <fct>                          <dbl>
##  1 Afghanistan                    0.936
##  2 Africa                         1.12 
##  3 Albania                        0.118
##  4 Algeria                        0.839
##  5 American Samoa                 1.05 
##  6 Angola                         0.938
##  7 Antigua and Barbuda            0.265
##  8 Argentina                      0.457
##  9 Armenia                       -0.122
## 10 Aruba                          0.749
## # ... with 219 more rows
Data Analaysis

Find the country that has the maximum population growth during this duration

countryWithMaxPopulationGrowth <-  countryDataFilteredGrouped %>% filter(populationChange ==  max(populationChange) )
countryWithMaxPopulationGrowth
## # A tibble: 1 x 2
##   X                    populationChange
##   <fct>                           <dbl>
## 1 United Arab Emirates             3.97

So here we found that the country that has maximum population growth during the entire duration is United Arab emirates

Find the country that has the minimum population growth during this duration

countryWithMinPopulationGrowth <-  countryDataFilteredGrouped %>% filter(populationChange ==  min(populationChange) )
countryWithMinPopulationGrowth
## # A tibble: 1 x 2
##   X          populationChange
##   <fct>                 <dbl>
## 1 Montserrat           -0.565

From the above result we can conclude that population percentage has decreased for montserrat by 5.64 %

2 Data - Time Spent (Male vs Female)

For 2nd data set Time spent as discussed by Nicholas this dataset on time use by gender and by country has variables include eating, sleeping, employment, travel, school, study, walking the dog, etc

Laoding Data
myurl <- "https://raw.githubusercontent.com/Harpreet1984/DATA607/master/TimeUse%20(1).csv"
time_info <- read.csv(myurl, header= TRUE,sep=",",stringsAsFactors=FALSE)
kable(time_info)
SEX GEO.ACL00 Total Personal.care Sleep Eating Other.and.or.unspecified.personal.care Employment..related.activities.and.travel.as.part.of.during.main.and.second.job Main.and.second.job.and.related.travel Activities.related.to.employment.and.unspecified.employment Study School.and.university.except.homework Homework Free.time.study Household.and.family.care Food.management.except.dish.washing Dish.washing Cleaning.dwelling Household.upkeep.except.cleaning.dwelling Laundry Ironing Handicraft.and.producing.textiles.and.other.care.for.textiles Gardening..other.pet.care Tending.domestic.animals Caring.for.pets Walking.the.dog Construction.and.repairs Shopping.and.services Childcare..except.teaching..reading.and.talking Teaching..reading.and.talking.with.child Household.management.and.help.family.member Leisure..social.and.associative.life Organisational.work Informal.help.to.other.households Participatory.activities Visiting.and.feasts Other.social.life Entertainment.and.culture Resting Walking.and.hiking Sports.and.outdoor.activities.except.walking.and.hiking Computer.games Computing Hobbies.and.games.except.computing.and.computer.games Reading.books Reading..except.books TV.and.video Radio.and.music Unspecified.leisure Travel.except.travel.related.to.jobs Travel.to.from.work Travel.related.to.study Travel.related.to.shopping.and.services Transporting.a.child Travel.related.to.other.household.purposes Travel.related.to.leisure..social.and.associative.life Unspecified.travel Unspecified.time.use
Males Belgium 24:00 10:45 8:15 1:49 0:42 3:07 3:05 0:02 0:11 0:05 0:03 0:03 2:28 0:22 0:10 0:08 0:18 0:01 0:01 0:00 0:19 0:00 0:03 0:05 0:19 0:24 0:05 0:04 0:08 5:58 0:07 0:00 0:03 0:32 0:23 0:10 0:27 0:12 0:15 0:05 0:22 0:13 0:06 0:22 2:35 0:05 0:01 1:30 0:25 0:02 0:16 0:03 0:00 0:15 0:30 0:01
Males Bulgaria 24:00 11:54 9:08 2:07 0:39 3:32 3:27 0:04 0:03 0:02 0:01 0:00 2:37 0:15 0:05 0:06 0:22 0:01 0:00 0:00 0:36 0:32 0:01 0:02 0:16 0:13 0:02 0:05 0:01 4:46 0:00 0:09 0:01 0:04 0:37 0:01 0:10 0:16 0:10 0:00 0:01 0:11 0:06 0:15 2:41 0:06 0:01 1:07 0:23 0:00 0:12 0:01 0:06 0:21 0:03 0:02
Males Germany (including former GDR from 1991) 24:00 10:40 8:08 1:43 0:49 3:27 3:21 0:06 0:15 0:06 0:05 0:04 2:22 0:16 0:08 0:11 0:14 0:02 0:01 0:00 0:17 0:01 0:03 0:03 0:19 0:29 0:05 0:05 0:09 5:42 0:09 0:08 0:04 0:17 0:45 0:14 0:16 0:13 0:15 0:05 0:16 0:18 0:06 0:31 1:58 0:05 0:00 1:29 0:27 0:02 0:16 0:02 0:05 0:34 0:03 0:05
Males Estonia 24:00 10:35 8:24 1:19 0:52 4:27 4:20 0:07 0:06 0:03 0:02 0:02 2:33 0:21 0:06 0:09 0:22 0:01 0:00 0:01 0:16 0:04 0:01 0:05 0:29 0:20 0:06 0:04 0:06 5:02 0:02 0:15 0:01 0:04 0:26 0:05 0:21 0:10 0:13 0:01 0:02 0:05 0:14 0:23 2:29 0:11 0:00 1:12 0:28 0:01 0:13 0:01 0:07 0:22 0:01 0:04
Males Spain 24:00 11:11 8:36 1:47 0:48 4:21 4:17 0:03 0:18 0:06 0:07 0:04 1:37 0:19 0:04 0:07 0:06 0:01 0:00 0:00 0:09 0:03 0:01 0:03 0:06 0:19 0:07 0:04 0:06 5:16 0:01 0:07 0:03 0:12 0:45 0:07 0:24 0:39 0:14 0:02 0:09 0:10 0:04 0:13 2:00 0:05 0:00 1:16 0:31 0:02 0:07 0:02 0:03 0:28 0:02 0:02
Males France 24:00 11:44 8:45 2:18 0:41 3:48 3:46 0:02 0:15 0:09 0:05 0:01 2:24 0:16 0:08 0:11 0:08 0:01 0:01 0:00 0:18 0:03 0:05 : 0:32 0:30 0:05 0:04 0:04 4:44 0:01 0:10 0:07 0:21 0:20 0:05 0:06 0:20 0:17 : 0:07 0:14 0:01 0:22 2:08 0:04 0:00 1:03 0:24 0:02 : 0:02 : : 0:35 0:02
Males Italy 24:00 11:16 8:17 1:57 1:02 4:15 4:11 0:04 0:11 0:04 0:06 0:01 1:35 0:11 0:05 0:09 0:06 0:00 0:00 0:00 0:16 0:01 0:01 0:02 0:06 0:22 0:04 0:07 0:04 5:05 0:02 0:07 0:04 0:17 0:42 0:06 0:32 0:23 0:15 0:02 0:07 0:12 0:04 0:17 1:52 0:04 0:00 1:35 0:32 0:02 0:12 0:02 0:03 0:36 0:08 0:03
Males Latvia 24:00 10:46 8:35 1:33 0:37 5:00 4:55 0:06 0:09 0:05 0:02 0:02 1:50 0:16 0:04 0:06 0:21 0:01 0:00 0:00 0:17 0:03 0:01 0:03 0:17 0:12 0:02 0:02 0:06 4:45 0:00 0:11 0:01 0:20 0:14 0:05 0:23 0:12 0:19 0:02 0:03 0:05 0:09 0:17 2:18 0:06 0:00 1:28 0:37 0:02 0:12 0:01 0:07 0:26 0:02 0:02
Males Lithuania 24:00 10:53 8:28 1:32 0:53 4:45 4:43 0:03 0:09 0:07 0:02 0:01 2:09 0:20 0:04 0:10 0:29 0:01 0:00 0:01 0:11 0:12 0:01 0:02 0:17 0:13 0:03 0:04 0:01 4:47 0:00 0:15 0:02 0:21 0:12 0:01 0:15 0:08 0:13 0:03 0:05 0:04 0:05 0:18 2:36 0:09 0:00 1:13 0:28 0:01 0:13 0:01 0:06 0:23 0:02 0:03
Males Poland 24:00 10:44 8:21 1:33 0:50 4:01 3:58 0:03 0:14 0:07 0:06 0:01 2:22 0:25 0:06 0:09 0:20 0:01 0:01 0:00 0:12 0:02 0:02 0:07 0:19 0:21 0:05 0:10 0:01 5:20 0:01 0:16 0:09 0:22 0:28 0:02 0:15 0:13 0:12 0:06 0:05 0:07 0:07 0:14 2:34 0:10 0:00 1:13 0:23 0:02 0:14 0:01 0:04 0:27 0:01 0:05
Males Slovenia 24:00 10:31 8:18 1:33 0:40 3:53 3:49 0:04 0:15 0:05 0:09 0:01 2:38 0:16 0:04 0:08 0:24 0:00 0:00 0:01 0:32 0:15 0:02 0:04 0:24 0:16 0:05 0:07 0:02 5:31 0:02 0:10 0:03 0:06 0:53 0:05 0:38 0:19 0:17 0:01 0:06 0:09 0:04 0:19 2:12 0:07 0:00 1:10 0:21 0:02 0:11 0:02 0:05 0:28 0:02 0:02
Males Finland 24:00 10:23 8:22 1:23 0:38 3:48 3:46 0:02 0:13 0:07 0:03 0:03 2:16 0:21 0:04 0:08 0:26 0:02 0:00 0:01 0:06 : 0:02 0:05 0:21 0:26 0:07 0:05 0:04 5:56 0:06 0:11 0:02 0:26 0:23 0:06 0:24 0:12 0:25 0:04 0:06 0:10 0:09 0:35 2:25 0:11 0:00 1:12 0:18 0:02 0:11 0:02 0:03 0:33 0:03 0:12
Males United Kingdom 24:00 10:22 8:18 1:24 0:41 4:10 4:06 0:05 0:08 0:05 0:02 0:02 2:18 0:26 0:09 0:11 0:09 0:02 0:02 0:00 0:12 0:01 0:03 0:06 0:17 0:24 0:07 0:05 0:04 5:22 0:03 0:08 0:04 0:20 0:30 0:07 0:17 0:04 0:14 0:04 0:10 0:11 0:05 0:21 2:37 0:07 0:00 1:30 0:29 0:01 0:16 0:03 0:03 0:36 0:02 0:08
Males Norway 24:00 10:06 7:56 1:25 0:45 4:04 4:03 0:01 0:12 0:08 0:04 : 2:21 0:25 0:08 0:14 0:04 0:02 0:00 0:01 0:10 : 0:01 0:04 0:23 0:21 0:12 0:05 0:12 5:52 0:03 0:07 0:05 0:48 0:44 0:07 0:11 0:13 0:20 0:03 0:10 0:12 0:07 0:26 2:06 0:08 0:02 1:21 0:26 0:02 0:13 0:02 0:02 0:36 : 0:03
Females Belgium 24:00 11:11 8:34 1:50 0:47 1:53 1:52 0:01 0:16 0:06 0:06 0:04 4:10 0:57 0:20 0:26 0:28 0:09 0:19 0:06 0:10 0:00 0:03 0:03 0:04 0:33 0:16 0:07 0:10 5:06 0:03 0:00 0:03 0:37 0:24 0:11 0:31 0:11 0:07 0:02 0:09 0:09 0:08 0:16 2:13 0:03 0:01 1:22 0:15 0:02 0:18 0:04 0:00 0:16 0:27 0:02
Females Bulgaria 24:00 11:38 9:07 1:55 0:36 2:34 2:33 0:02 0:06 0:02 0:04 0:00 5:01 1:37 0:36 0:31 0:13 0:19 0:06 0:16 0:24 0:16 0:01 0:01 0:02 0:16 0:13 0:10 0:02 3:47 0:00 0:08 0:01 0:05 0:32 0:01 0:08 0:14 0:03 0:00 0:00 0:02 0:10 0:06 2:14 0:03 0:00 0:52 0:17 0:01 0:14 0:01 0:03 0:13 0:02 0:02
Females Germany (including former GDR from 1991) 24:00 10:58 8:15 1:46 0:56 1:56 1:53 0:03 0:13 0:04 0:04 0:04 4:14 0:49 0:21 0:39 0:15 0:13 0:10 0:07 0:13 0:01 0:04 0:04 0:03 0:38 0:16 0:09 0:12 5:15 0:07 0:09 0:04 0:19 0:49 0:14 0:20 0:15 0:12 0:02 0:06 0:16 0:08 0:30 1:40 0:04 0:00 1:19 0:13 0:02 0:19 0:04 0:04 0:33 0:03 0:05
Females Estonia 24:00 10:30 8:26 1:12 0:53 3:05 3:02 0:03 0:07 0:02 0:03 0:02 4:53 1:19 0:25 0:34 0:14 0:14 0:07 0:14 0:19 0:06 0:02 0:05 0:03 0:29 0:25 0:10 0:06 4:18 0:01 0:12 0:02 0:04 0:27 0:04 0:17 0:10 0:05 0:00 0:01 0:05 0:17 0:20 2:06 0:06 0:00 1:02 0:20 0:01 0:16 0:03 0:03 0:18 0:01 0:05
Females Spain 24:00 11:05 8:32 1:44 0:49 2:06 2:05 0:01 0:20 0:06 0:07 0:06 4:55 1:20 0:29 0:50 0:11 0:11 0:12 0:10 0:03 0:01 0:01 0:02 0:01 0:35 0:25 0:05 0:19 4:26 0:01 0:12 0:05 0:16 0:34 0:06 0:26 0:32 0:06 0:00 0:03 0:05 0:04 0:08 1:46 0:02 0:00 1:05 0:18 0:03 0:12 0:06 0:01 0:24 0:01 0:02
Females France 24:00 11:53 8:55 2:11 0:46 2:17 2:16 0:01 0:14 0:08 0:05 0:01 4:34 1:01 0:25 0:58 0:07 0:07 0:15 0:08 0:09 0:02 0:05 : 0:04 0:41 0:20 0:08 0:04 4:05 0:01 0:09 0:05 0:24 0:20 0:05 0:06 0:17 0:06 : 0:02 0:11 0:01 0:22 1:55 0:02 0:00 0:54 0:15 0:01 : 0:05 : : 0:32 0:03
Females Italy 24:00 11:12 8:19 1:52 1:01 1:52 1:50 0:02 0:14 0:04 0:09 0:01 5:20 1:19 0:35 1:24 0:06 0:10 0:20 0:08 0:06 0:01 0:01 0:02 0:01 0:36 0:18 0:10 0:05 4:06 0:01 0:13 0:08 0:26 0:23 0:04 0:31 0:17 0:06 0:00 0:02 0:05 0:06 0:10 1:29 0:03 0:00 1:14 0:15 0:02 0:17 0:06 0:01 0:27 0:06 0:03
Females Latvia 24:00 10:53 8:44 1:26 0:43 3:29 3:26 0:04 0:12 0:07 0:04 0:01 3:56 1:06 0:22 0:27 0:15 0:09 0:03 0:09 0:20 0:06 0:02 0:04 0:01 0:21 0:14 0:07 0:08 4:08 0:00 0:11 0:03 0:24 0:11 0:04 0:20 0:15 0:06 0:00 0:01 0:04 0:13 0:16 1:55 0:04 0:00 1:20 0:24 0:03 0:20 0:03 0:06 0:23 0:01 0:03
Females Lithuania 24:00 10:56 8:35 1:26 0:56 3:31 3:29 0:02 0:10 0:05 0:03 0:02 4:29 1:18 0:22 0:38 0:21 0:11 0:04 0:11 0:15 0:17 0:02 0:02 0:02 0:21 0:16 0:09 0:01 3:45 0:00 0:11 0:04 0:22 0:10 0:02 0:13 0:08 0:05 0:00 0:02 0:02 0:10 0:13 1:59 0:04 0:00 1:05 0:20 0:02 0:19 0:01 0:03 0:19 0:01 0:04
Females Poland 24:00 11:03 8:35 1:34 0:54 2:15 2:14 0:01 0:14 0:06 0:06 0:01 4:45 1:30 0:29 0:34 0:14 0:14 0:07 0:05 0:10 0:01 0:02 0:06 0:02 0:30 0:22 0:17 0:02 4:32 0:00 0:14 0:15 0:23 0:26 0:02 0:12 0:12 0:04 0:01 0:02 0:06 0:13 0:12 2:03 0:06 0:00 1:06 0:14 0:03 0:19 0:03 0:02 0:25 0:01 0:05
Females Slovenia 24:00 10:32 8:25 1:26 0:41 2:42 2:39 0:03 0:19 0:06 0:11 0:01 4:56 1:24 0:28 0:39 0:16 0:09 0:16 0:07 0:25 0:11 0:02 0:03 0:02 0:21 0:19 0:11 0:03 4:27 0:00 0:06 0:05 0:07 0:50 0:04 0:30 0:18 0:08 0:00 0:02 0:05 0:09 0:14 1:44 0:05 0:00 1:02 0:16 0:03 0:14 0:03 0:03 0:23 0:01 0:02
Females Finland 24:00 10:38 8:32 1:19 0:47 2:33 2:32 0:02 0:16 0:09 0:05 0:02 3:56 0:55 0:15 0:26 0:23 0:13 0:05 0:10 0:08 : 0:03 0:07 0:04 0:32 0:21 0:08 0:05 5:17 0:04 0:12 0:03 0:30 0:25 0:05 0:20 0:13 0:15 0:01 0:02 0:10 0:14 0:33 2:02 0:08 0:00 1:07 0:14 0:02 0:14 0:03 0:02 0:31 0:02 0:12
Females United Kingdom 24:00 10:43 8:27 1:26 0:50 2:24 2:21 0:03 0:09 0:04 0:03 0:02 4:15 0:59 0:18 0:38 0:12 0:11 0:11 0:05 0:07 0:01 0:05 0:06 0:04 0:39 0:23 0:10 0:05 4:55 0:03 0:11 0:05 0:30 0:31 0:06 0:23 0:03 0:07 0:01 0:04 0:11 0:07 0:18 2:09 0:05 0:00 1:25 0:17 0:01 0:22 0:09 0:02 0:34 0:01 0:10
Females Norway 24:00 10:27 8:10 1:20 0:56 2:38 2:37 0:01 0:15 0:09 0:06 : 3:47 0:51 0:21 0:33 0:02 0:11 0:04 0:12 0:09 : 0:01 0:03 0:04 0:27 0:25 0:09 0:14 5:40 0:01 0:08 0:05 1:01 0:56 0:06 0:12 0:13 0:14 0:01 0:05 0:10 0:11 0:28 1:39 0:07 0:02 1:11 0:18 0:02 0:14 0:03 0:01 0:33 : 0:03
Tidying and analysis
timedata <- time_info
library(tibble)
timedata <- as_data_frame(timedata)
timedata <- timedata %>% rename(Country = GEO.ACL00)

Converting the time spent on personal care into minutes by removing the colon and then calculating mean of total time(personal care)

timedata_PC <- timedata %>%
              separate ("Personal.care" , c("PC_Min", "PC_sec"), sep=":")
timedata_PC <-  timedata_PC %>%
               mutate(PC_TotalSec= (as.numeric(PC_Min) * 60) + as.numeric(PC_sec))
timedata_PC <- timedata_PC %>%
              group_by(SEX) %>% summarise(mean= mean(PC_TotalSec))
kable(timedata_PC)
SEX mean
Females 658.5000
Males 650.7143
Analysis:- Here from the above results we see that Females spent more time in Personal care compared to Males

Converting the time on eating in minutes by removing the colon and calculating mean of total time(eating)

timedata_Eat <- timedata %>%
              separate ("Eating" , c("E_Min", "E_sec"), sep=":")
timedata_Eat <-  timedata_Eat %>%
               mutate(E_TotalSec= (as.numeric(E_Min) * 60) + as.numeric(E_sec))
timedata_Eat <- timedata_Eat %>%
              group_by(Country,SEX) %>% summarise(mean= mean(E_TotalSec))
timedata_Eat
## # A tibble: 28 x 3
## # Groups:   Country [?]
##    Country  SEX      mean
##    <chr>    <chr>   <dbl>
##  1 Belgium  Females 110  
##  2 Belgium  Males   109  
##  3 Bulgaria Females 115  
##  4 Bulgaria Males   127  
##  5 Estonia  Females  72.0
##  6 Estonia  Males    79.0
##  7 Finland  Females  79.0
##  8 Finland  Males    83.0
##  9 France   Females 131  
## 10 France   Males   138  
## # ... with 18 more rows
Analysis :- From the above subset we can infer that Males spend more time in eating compared to females for most of the countries.
timedata_Sleep <- timedata %>%
              separate ("Sleep" , c("S_Min", "S_sec"), sep=":")
timedata_Sleep <-  timedata_Sleep %>%
               mutate(S_TotalSec= (as.numeric(S_Min) * 60) + as.numeric(S_sec))
timedata_Sleep <- timedata_Sleep %>%
              group_by(Country,SEX) %>% summarise(mean= mean(S_TotalSec))
timedata_Sleep
## # A tibble: 28 x 3
## # Groups:   Country [?]
##    Country  SEX      mean
##    <chr>    <chr>   <dbl>
##  1 Belgium  Females   514
##  2 Belgium  Males     495
##  3 Bulgaria Females   547
##  4 Bulgaria Males     548
##  5 Estonia  Females   506
##  6 Estonia  Males     504
##  7 Finland  Females   512
##  8 Finland  Males     502
##  9 France   Females   535
## 10 France   Males     525
## # ... with 18 more rows
Analysis :- From the above subset we can infer that Males spend less time in sleeping compared to females for most of the countries.

3. Data -sales-tax credits from the government of Canada

Loading data
craCreditBenefit <- read.csv(file="https://raw.githubusercontent.com/Harpreet1984/DATA607/master/CRA_Credit_Benefit.csv", header=TRUE, sep=",")
kable(craCreditBenefit)
Province GT_freq GT_amount X5K_freq X5K_amount X5K_10K_freq X5K_10K_amount X10K_15K_freq X10K_15K_amount X15K_20K_freq X15K_20K_amount X20K_25K_freq X20K_25K_amount X25K_30K_freq X25K_30K_amount X30K_35K_freq X30K_35K_amount X35K_40K_freq X35K_40K_amount X40K_45K_freq X40K_45K_amount X45K_50K_freq X45K_50K_amount X50K_55K_freq X50K_55K_amount X55K_60K_freq X55K_60K_amount X60K_freq X60K_amount
Newfoundland_and_Labrador 160300 $65,756 26410 $7,044 16150 $5,278 19530 $8,527 28520 $13,023 18290 $8,540 18820 $9,207 15370 $7,518 12620 $5,157 3980 $1,274 550 $169 60 $18 NA NA
Prince_Edward_Island 45850 $18,648 8000 $2,115 4220 $1,363 4900 $2,003 7240 $3,332 5820 $2,776 5250 $2,520 4790 $2,321 4020 $1,676 1260 $422 290 $102 50 $14 10 $4 NA
Nova_Scotia 302210 $122,873 59800 $17,694 38340 $13,202 29460 $12,610 44640 $20,540 35870 $16,942 29900 $14,249 29700 $14,352 25480 $10,312 7530 $2,501 1280 $406 190 $57 20 $7 NA
New_Brunswick 249780 $102,431 43160 $11,986 29500 $9,489 24660 $11,372 39740 $18,130 29700 $13,994 27650 $13,379 25490 $12,383 21760 $8,996 6670 $2,243 1240 $395 200 $59 20 $4 NA
Quebec 2698620 $1,082,380 495090 $132,486 288830 $101,067 353700 $138,867 429720 $194,299 291000 $135,812 256360 $124,147 258750 $127,158 234200 $97,567 73060 $25,216 15330 $5,039 2390 $662 170 $51 30 $8
Ontario 3982840 $1,647,108 852320 $249,084 426320 $151,182 573760 $243,898 563100 $270,404 440530 $217,190 362380 $179,196 328470 $162,303 308840 $129,493 99930 $35,557 23370 $7,656 3460 $1,028 300 $97 50 $18
Manitoba 385370 $162,874 101660 $35,032 35830 $12,788 44610 $19,386 48710 $23,418 40510 $19,740 34860 $17,185 33840 $16,759 31830 $13,517 10040 $3,816 2780 $999 620 $202 70 $23 20 $10
Saskatchewan 291210 $125,501 72980 $26,349 23750 $8,961 29560 $12,701 43070 $20,642 33870 $16,671 27420 $13,623 26620 $13,072 24050 $9,945 7540 $2,728 1890 $662 410 $126 50 $16 10 $5
Alberta 921590 $384,471 208080 $65,556 80350 $28,722 91240 $39,264 124420 $59,999 121870 $57,588 91450 $43,595 87090 $42,645 83290 $34,831 25190 $9,331 6970 $2,429 1380 $433 200 $62 50 $14
British_Columbia 1390640 $561,578 304940 $89,447 124170 $42,769 195460 $79,598 199330 $93,329 159180 $75,739 134750 $64,798 118750 $56,608 111800 $45,130 34040 $11,603 7160 $2,253 970 $270 80 $26 20 $7
Northwest_Territories 10740 $4,478 2890 $998 1420 $541 1350 $571 1370 $664 1160 $558 860 $420 690 $337 720 $289 200 $71 60 $21 20 $7 NA NA
Yukon 9110 $3,788 1600 $442 730 $259 1030 $423 1470 $710 1290 $620 970 $462 920 $441 840 $337 230 $75 40 $15 NA NA NA
Nunavut 8530 $4,383 1680 $567 1740 $727 1220 $724 1060 $694 800 $508 620 $397 540 $328 490 $265 220 $107 110 $48 40 $16 NA NA
Outside_Canada 920 $436 350 $159 120 $44 130 $63 90 $46 80 $44 50 $28 50 $27 30 $15 NA NA NA NA NA
tidying data set
craCreditBenefitAmountAnalysis <- craCreditBenefit %>% select(Province, X5K_amount, X5K_10K_amount, X10K_15K_amount, X15K_20K_amount, X20K_25K_amount, X25K_30K_amount, X30K_35K_amount, X35K_40K_amount, X40K_45K_amount, X45K_50K_amount, X50K_55K_amount, X55K_60K_amount, X60K_amount)

craCreditBenefitAmountAnalysisTidy <- craCreditBenefitAmountAnalysis %>% gather("Category", "Amount",  2:14)
## Warning: attributes are not identical across measure variables;
## they will be dropped
craCreditBenefitAmountAnalysisTidy$Amount = gsub(",", "",craCreditBenefitAmountAnalysisTidy$Amount)
craCreditBenefitAmountAnalysisTidy$Amount = gsub("\\$", "",craCreditBenefitAmountAnalysisTidy$Amount)
craCreditBenefitAmountAnalysisTidy$Amount = as.numeric (gsub("^$", "0",craCreditBenefitAmountAnalysisTidy$Amount))
Analyzing data set

Here we are calculating Maximum Credit Benefit Per province

maxCreditBenefitByProvince <- craCreditBenefitAmountAnalysisTidy %>% group_by(Province) %>% summarise(Amount = max(Amount))
kable(maxCreditBenefitByProvince)
Province Amount
Alberta 65556
British_Columbia 93329
Manitoba 35032
New_Brunswick 18130
Newfoundland_and_Labrador 13023
Northwest_Territories 998
Nova_Scotia 20540
Nunavut 727
Ontario 270404
Outside_Canada 159
Prince_Edward_Island 3332
Quebec 194299
Saskatchewan 26349
Yukon 710
myggplot <- ggplot(maxCreditBenefitByProvince, aes(x = Province, y = Amount)) +
  geom_bar(stat = "identity")
myggplot <- myggplot + theme(axis.text.x = element_text(face="bold", color="#993333", 
                           size=14, angle=90)) 
 myggplot

maxCreditBenefitByCategory <- craCreditBenefitAmountAnalysisTidy %>% group_by(Category) %>% summarise(Amount = max(Amount))
kable(maxCreditBenefitByCategory)
Category Amount
X10K_15K_amount 243898
X15K_20K_amount 270404
X20K_25K_amount 217190
X25K_30K_amount 179196
X30K_35K_amount 162303
X35K_40K_amount 129493
X40K_45K_amount 35557
X45K_50K_amount 7656
X50K_55K_amount 1028
X55K_60K_amount 97
X5K_10K_amount 151182
X5K_amount 249084
X60K_amount 18
library(ggplot2)
myggplot1 <- ggplot(maxCreditBenefitByCategory, aes(x = Category, y = Amount)) +
  geom_bar(stat = "identity")
myggplot1 <- myggplot1 + theme(axis.text.x = element_text(face="bold", color="#993333", 
                           size=14, angle=90)) 
 myggplot1

craCreditBenefitFreqAnalysis <- craCreditBenefit %>% select(Province, X5K_freq, X5K_10K_freq, X10K_15K_freq, X15K_20K_freq, X20K_25K_freq, X25K_30K_freq, X30K_35K_freq, X35K_40K_freq, X40K_45K_freq, X45K_50K_freq, X50K_55K_freq, X55K_60K_freq, X60K_freq)

craCreditBenefitFreqAnalysisTidy <- craCreditBenefitFreqAnalysis %>% gather("Category", "Amount",  2:14)
craCreditBenefitFreqAnalysisTidy$Amount = gsub(",", "",craCreditBenefitFreqAnalysisTidy$Amount)
craCreditBenefitFreqAnalysisTidy$Amount = as.numeric (gsub("^$", "0",craCreditBenefitFreqAnalysisTidy$Amount))
maxCreditFreqByProvince <- craCreditBenefitFreqAnalysisTidy %>% group_by(Province) %>% summarise(max(Amount))
kable(maxCreditFreqByProvince)
Province max(Amount)
Alberta 208080
British_Columbia 304940
Manitoba 101660
New_Brunswick NA
Newfoundland_and_Labrador NA
Northwest_Territories NA
Nova_Scotia NA
Nunavut NA
Ontario 852320
Outside_Canada NA
Prince_Edward_Island NA
Quebec 495090
Saskatchewan 72980
Yukon NA
maxCreditFreqByCategory <- craCreditBenefitFreqAnalysisTidy %>% group_by(Category) %>% summarise(max(Amount))
kable(maxCreditFreqByCategory)
Category max(Amount)
X10K_15K_freq 573760
X15K_20K_freq 563100
X20K_25K_freq 440530
X25K_30K_freq 362380
X30K_35K_freq 328470
X35K_40K_freq 308840
X40K_45K_freq NA
X45K_50K_freq NA
X50K_55K_freq NA
X55K_60K_freq NA
X5K_10K_freq 426320
X5K_freq 852320
X60K_freq NA

Based on the above graphs, Ontario and montreal province got the maxmimum benefit credits and have highest frequencies. This makes sense as these two provinces are have highest number of working professionals.

Based on the general normal as the income bracket goes up the tax credit benefits decrease. This is confirmed with the graphs, there are substantial drops in the tax credit as the category goes above 40k.