The dataset from the post of Emilie Bolduc

Untidy data is sorted to answer following questions:

1) Where in the US has there been the most interest in yoga?

2) When people were most interested in yoga in the last 10 years?

3) Interactive map showing the popularity of Yoga by state.

library(stringr)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(ggmap)
## Google Maps API Terms of Service: http://developers.google.com/maps/terms.
## Please cite ggmap if you use it: see citation("ggmap") for details.
library(leaflet)
data <- read.csv("yoga.csv")
data <- data[-1,]
head(data)
##         X Alabama..us.al. Alaska..us.ak. Arizona..us.az. Arkansas..us.ar.
## 2 2004-01              20             23              21               24
## 3 2004-02               8             26              25               16
## 4 2004-03              10             26              22               26
## 5 2004-04              15             34              24               18
## 6 2004-05              15             14              24               11
## 7 2004-06              12             21              23               14
##   California..us.ca. Colorado..us.co. Connecticut..us.ct. Delaware..us.de.
## 2                 32               33                  27               47
## 3                 27               30                  26               28
## 4                 28               29                  30               51
## 5                 25               27                  22               27
## 6                 25               24                  25               20
## 7                 27               27                  26               22
##   District.of.Columbia..us.dc. Florida..us.fl. Georgia..us.ga.
## 2                           32              21              21
## 3                           36              17              20
## 4                           29              17              20
## 5                           29              19              15
## 6                           24              20              16
## 7                           26              18              18
##   Hawaii..us.hi. Idaho..us.id. Illinois..us.il. Indiana..us.in.
## 2             36            21               25              24
## 3             24            22               23              14
## 4             36            21               25              17
## 5             30            18               17              14
## 6             39            14               17               9
## 7             44            17               20              18
##   Iowa..us.ia. Kansas..us.ks. Kentucky..us.ky. Louisiana..us.la.
## 2           14             20               17                20
## 3           16             12               19                15
## 4           18             13               18                17
## 5           19             19               12                20
## 6           14             15               15                14
## 7           17             18               17                16
##   Maine..us.me. Maryland..us.md. Massachusetts..us.ma. Michigan..us.mi.
## 2            29               26                    41               19
## 3            29               23                    33               18
## 4            26               22                    32               17
## 5            21               17                    31               15
## 6            25               21                    27               15
## 7            22               23                    29               17
##   Minnesota..us.mn. Mississippi..us.ms. Missouri..us.mo. Montana..us.mt.
## 2                26                  16               19              44
## 3                22                  20               18              26
## 4                21                  18               15              41
## 5                17                  18               12              25
## 6                22                  13               15              24
## 7                18                  18               20              21
##   Nebraska..us.ne. Nevada..us.nv. New.Hampshire..us.nh. New.Jersey..us.nj.
## 2               15             20                    45                 27
## 3               21             25                    20                 22
## 4               16             24                    22                 23
## 5               16             17                    26                 20
## 6               14             25                    23                 22
## 7               15             23                    27                 21
##   New.Mexico..us.nm. New.York..us.ny. North.Carolina..us.nc.
## 2                 33               35                     23
## 3                 25               28                     22
## 4                 26               29                     20
## 5                 18               26                     18
## 6                 21               28                     16
## 7                 32               28                     22
##   North.Dakota..us.nd. Ohio..us.oh. Oklahoma..us.ok. Oregon..us.or.
## 2                   52           19               22             34
## 3                   45           16               19             30
## 4                   45           15               12             28
## 5                   45           13               11             22
## 6                   45           16               17             25
## 7                   45           16               17             27
##   Pennsylvania..us.pa. Rhode.Island..us.ri. South.Carolina..us.sc.
## 2                   19                   44                     24
## 3                   18                   26                     19
## 4                   20                   27                     18
## 5                   19                   31                     14
## 6                   16                   26                     11
## 7                   20                   33                     19
##   South.Dakota..us.sd. Tennessee..us.tn. Texas..us.tx. Utah..us.ut.
## 2                   25                21            24           26
## 3                   22                18            16           20
## 4                   21                16            17           10
## 5                   21                21            16           20
## 6                   28                15            17           19
## 7                   28                16            17           16
##   Vermont..us.vt. Virginia..us.va. Washington..us.wa.
## 2              42               22                 30
## 3              39               16                 29
## 4              41               19                 27
## 5              37               17                 25
## 6              31               18                 25
## 7              34               22                 27
##   West.Virginia..us.wv. Wisconsin..us.wi. Wyoming..us.wy.
## 2                    23                18               0
## 3                    17                17              37
## 4                    27                20              35
## 5                    26                17              37
## 6                    21                18              35
## 7                    28                19              35
#str(data)
colnames(data) <- c("Year","Alabama","Alaska","Arizona","Arkansas","California","Colorado","Connecticut","Delaware","DC","Florida","Georgia","Hawaii","Idaho","Illinois","Indiana","Iowa","Kansus","Kentucky","Louisiana","Maine","Maryland","Masachusetts","Michigan","Minnesota","Mississippi","Missouri","Montana","Nebraska","Nevada","NH","New_Jersey","New_Mexico","New_York","North_Carolina","North_Dakota","Ohio","Oklahoma","Oregon","Pennsylvania","Rhode_Island","South_Carolina","South_Dakota","Tennessse","Texas","Utah","Vermont","Virginia","Washington","West_Virginia","Wisconsin","Wyoming")
data1  <-data %>% 
            separate(Year,into = c("Year","month"),sep = "-")
data1 <- data1[,-2]
data2 <- data1 %>%
      group_by(Year)%>%
         summarise_all(funs(sum))
data2 <- data.frame(data2)
data3 <- setNames(data.frame(t(data2[,-1])),data2[,1])
for(i in 1:51) {
  data3$total[i] <- rowSums(data3[i,1:13],na.rm = TRUE)
}
head(data3)
##            2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
## Alabama     151  153  166  136  127  134  162  144  163  181  211  236
## Alaska      267  276  252  280  273  306  427  416  433  481  512  582
## Arizona     271  270  255  245  263  271  300  264  283  288  307  323
## Arkansas    204  185  176  163  139  159  209  168  189  240  259  256
## California  321  309  301  287  266  257  234  291  301  320  329  329
## Colorado    337  353  369  347  332  328  367  360  374  410  409  437
##            2016 total
## Alabama      78  2042
## Alaska      221  4726
## Arizona     113  3453
## Arkansas     77  2424
## California  114  3659
## Colorado    155  4578
max(data3$total)
## [1] 7529

Vermont(7529) had highest practice of yoga in last 10 years.

data3[52,] = c(colSums(data3[,1:14]))
max(data3[52,-14])
## [1] 18475

Year 2015 recorded highest number of yoga practisioners.

data3$Regions <- c("Alabama","Alaska","Arizona","Arkansas","California","Colorado","Connecticut","Delaware","DC","Florida","Georgia","Hawaii","Idaho","Illinois","Indiana","Iowa","Kansus","Kentucky","Louisiana","Maine","Maryland","Masachusetts","Michigan","Minnesota","Mississippi","Missouri","Montana","Nebraska","Nevada","NH","New_Jersey","New_Mexico","New_York","North_Carolina","North_Dakota","Ohio","Oklahoma","Oregon","Pennsylvania","Rhode_Island","South_Carolina","South_Dakota","Tennessse","Texas","Utah","Vermont","Virginia","Washington","West_Virginia","Wisconsin","Wyoming","Total1")
data3$total <- as.numeric(data3$total)
data3$Regions <- as.character(data3$Regions)

Getting latitudes and longitudes of all states in data frame.

for (i in 1:nrow(data3)) {
latlon = geocode(data3[i,15])
data3$lon[i] = as.numeric(latlon[1])
data3$lat[i] = as.numeric(latlon[2])
}
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Alabama
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Alaska
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Arizona
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Arkansas
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=California
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Colorado
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Connecticut
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Delaware
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=DC
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Florida
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Georgia
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Hawaii
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Idaho
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Illinois
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Indiana
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Iowa
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Kansus
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Kentucky
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Louisiana
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Maine
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Maryland
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Masachusetts
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Michigan
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Minnesota
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Mississippi
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Missouri
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Montana
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Nebraska
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Nevada
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=NH
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=New_Jersey
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=New_Mexico
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=New_York
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=North_Carolina
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=North_Dakota
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Ohio
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Oklahoma
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Oregon
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Pennsylvania
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Rhode_Island
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=South_Carolina
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=South_Dakota
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Tennessse
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Texas
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Utah
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Vermont
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Virginia
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Washington
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=West_Virginia
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Wisconsin
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Wyoming
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Total1

Popularity of Yoga by states

data4 <- data.frame(data3$total, data3$lon, data3$lat,data3$Regions)
colnames(data4) = c('total','lon','lat','Region')
m <- leaflet() %>%
  addTiles() %>%
  addMarkers(lng = data4$lon,lat = data4$lat,popup = paste("Total",data4$total) %>% 
                                                     paste("Region",data4$Region))
m

#### Clicking on pins will give the total number of yoga followers in the state.

II) My second dataset is from Chad Smith based on ‘catfish’ survey.I calculated the yearwise and size category wise numbers.Starting with crunching data and tidying it.

collecting data from ‘wide’ to ‘tall’ format

TCFData <- CFData %>%
                gather(Year,Budget,c(2:26))
## Warning: attributes are not identical across measure variables;
## they will be dropped
TCFData$Budget <- as.numeric(gsub(",","",TCFData$Budget))
TCFData$Year <- as.numeric(TCFData$Year)
head(TCFData)
##        Size category Year Budget
## 1       Broodfish 1/ 1992   1491
## 2  Fingerling/fry 2/ 1992 849412
## 3        Stockers 3/ 1992 634353
## 4  Small foodsize 4/ 1992 166731
## 5 Medium foodsize 5/ 1992  70495
## 6  Large foodsize 6/ 1992   6769
ggplot(TCFData,aes(x = reorder(Year,Budget) , y = Budget,color = `Size category`,group = `Size category`))+ geom_line(linetype = "dotted") + geom_point(shape = 20,size = 4) + labs(x = "Year", y = "Budget")+ coord_flip()

The graph makes it clear that there is much variation in amount of size -“Fingerling/fry” over the years.Th prices for Broodfish and and Large foodsize fishes are constant.

To analyze data yearwise,we choose to use “Spread” function.

TSCFdata <- spread(TCFData,`Size category`,`Budget`)
TSCFdata <- data.frame(TSCFdata)
head(TSCFdata)
##   Year Broodfish.1. Fingerling.fry.2. Large.foodsize.6. Medium.foodsize.5.
## 1 1992         1491            849412              6769              70495
## 2 1993         1169            669491              6698              61894
## 3 1994         1183            648628              5196              48851
## 4 1995         1301            724693              4536              59159
## 5 1996         1171            823397              6644              64858
## 6 1997         1163            873457              7810              84725
##   Small.foodsize.4. Stockers.3.
## 1            166731      634353
## 2            153600      571254
## 3            134314      548207
## 4            138160      554342
## 5            156297      627834
## 6            178448      754816
TSCFdata <- na.omit(TSCFdata)
TSCFdata$Broodfish.1. <- as.numeric(gsub(",","",TSCFdata$Broodfish.1.)) 
TSCFdata$Fingerling.fry.2. <- as.numeric(gsub(",","",TSCFdata$Fingerling.fry.2.))
TSCFdata$Large.foodsize.6. <- as.numeric(gsub(",","",TSCFdata$Large.foodsize.6.))
TSCFdata$Medium.foodsize.5. <- as.numeric(gsub(",","",TSCFdata$Medium.foodsize.5.))
TSCFdata$Small.foodsize.4. <- as.numeric(gsub(",","",TSCFdata$Small.foodsize.4.))
TSCFdata$Stockers.3. <- as.numeric(gsub(",","",TSCFdata$Stockers.3.)) 
TSCFData1 <- mutate(TSCFdata,Total = rowSums(TSCFdata[,2:7]))
ggplot(TSCFData1,aes(x = reorder(Year,Total) , y = Total))+ geom_line(linetype = "dotted") + geom_point(shape = 20,size = 4) + ggtitle("Year wise comparison") + labs(x = "Year", y = "Total Inventory Amount",srt = 45)+ coord_flip()
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

Obeservation shows that year 2016 resulted into much less inventory amount for total catfish purchase.

III) Dataset is from Albina Gallyavova’s post on FED CP rates

Attached file has Federal Reserve Board commercial paper rates with essentially several header rows (extract with few . Perhaps, columns should be split and organized by types (financial/non-financial/asset-backed/etc.) and tenor (overnight/7-day/15-day etc.), depending on intention of analysis. Could be useful to look at the patterns in different buckets, perform basic statistical analysis, and estimate future rates.

Loading and getting data in order

library(stringr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
dataset <- read.csv("FRB_CP.csv",stringsAsFactors = FALSE)
dataset[dataset == 'ND'] <- 0
dataset <- na.omit(dataset)
dataset <- dataset[-1:-4,]

Renaming data columns.

names(dataset) <- c("Date","0D-AA-NFC","7D-AA-NFC","15D-AA-NFC","30D-AA-NFC","60D-AA-NFC","90D-AA-NFC","0D-A2-P2-NFC","7D-A2-P2-NFC","15D-A2-P2-NFC","30D-A2-P2-NFC","60D-A2-P2-NFC","90D-A2-P2-NFC","0D-AA-FC","7D-AA-FC","15D-AA-FC","30D-AA-FC","60D-AA-FC","90D-AA-FC","0D-AA-ABC","7D-AA-ABC","15D-AA-ABC","30D-AA-ABC","60D-AA-ABC","90D-AA-ABC")

Seperating ‘data’ to analyze on basis of year.

dataset1  <- dataset %>% 
            separate(Date,into = c("month","day","Year"),sep = "/")
dataset1 <- dataset1[,-1:-2]
dataset1$Year <- as.numeric(dataset1$Year)
head(dataset1)
##    Year 0D-AA-NFC 7D-AA-NFC 15D-AA-NFC 30D-AA-NFC 60D-AA-NFC 90D-AA-NFC
## 6    16      0.36      0.35       0.38       0.42       0.48       0.54
## 7    16      0.35      0.35       0.37       0.42       0.47       0.53
## 8    16      0.36      0.38       0.41       0.43       0.49       0.54
## 9    16      0.37      0.35       0.37        0.4       0.46       0.52
## 10   16      0.37      0.38       0.39       0.41       0.47       0.53
## 11   16      0.36      0.35       0.52       0.42       0.46       0.52
##    0D-A2-P2-NFC 7D-A2-P2-NFC 15D-A2-P2-NFC 30D-A2-P2-NFC 60D-A2-P2-NFC
## 6          0.58         0.69          0.66          0.71          0.86
## 7          0.59         0.66          0.67          0.85          0.83
## 8          0.59         0.65          0.64           0.8          0.85
## 9          0.59         0.64          0.68          0.76           0.8
## 10         0.59         0.64          0.69          0.77          0.92
## 11         0.58         0.63          0.71          0.81          0.94
##    90D-A2-P2-NFC 0D-AA-FC 7D-AA-FC 15D-AA-FC 30D-AA-FC 60D-AA-FC 90D-AA-FC
## 6              0      0.4        0         0         0      0.62      0.87
## 7              0      0.4     0.39       0.4      0.42      0.59      0.76
## 8              1      0.4     0.42      0.45      0.51      0.64      0.77
## 9              0      0.4     0.41      0.42      0.45      0.58      0.73
## 10          1.07      0.4     0.41      0.41      0.43      0.61      0.79
## 11             0      0.4     0.42         0      0.49      0.62      0.84
##    0D-AA-ABC 7D-AA-ABC 15D-AA-ABC 30D-AA-ABC 60D-AA-ABC 90D-AA-ABC
## 6       0.46      0.48       0.52       0.61       0.79       0.95
## 7       0.46      0.48       0.49        0.6        0.7       0.91
## 8       0.46      0.51       0.52       0.63       0.76       0.89
## 9       0.47      0.48       0.53       0.63       0.77       0.92
## 10      0.46      0.51       0.56       0.66       0.73        0.9
## 11      0.47      0.48       0.56       0.57       0.76       0.94

Goruping with respect to year

dataset1 <- data.frame(sapply(dataset1, function(x) as.numeric(as.character(x))))
dataset2 <- dataset1 %>%
      group_by(Year)%>%
         summarise_all(funs(sum))
head(dataset2)
## # A tibble: 2 x 25
##    Year X0D.AA.NFC X7D.AA.NFC X15D.AA.NFC X30D.AA.NFC X60D.AA.NFC
##   <dbl>      <dbl>      <dbl>       <dbl>       <dbl>       <dbl>
## 1    16      29.11      29.44       30.51       34.20       35.54
## 2    17     159.85     158.46      159.23      165.28      167.01
## # ... with 19 more variables: X90D.AA.NFC <dbl>, X0D.A2.P2.NFC <dbl>,
## #   X7D.A2.P2.NFC <dbl>, X15D.A2.P2.NFC <dbl>, X30D.A2.P2.NFC <dbl>,
## #   X60D.A2.P2.NFC <dbl>, X90D.A2.P2.NFC <dbl>, X0D.AA.FC <dbl>,
## #   X7D.AA.FC <dbl>, X15D.AA.FC <dbl>, X30D.AA.FC <dbl>, X60D.AA.FC <dbl>,
## #   X90D.AA.FC <dbl>, X0D.AA.ABC <dbl>, X7D.AA.ABC <dbl>,
## #   X15D.AA.ABC <dbl>, X30D.AA.ABC <dbl>, X60D.AA.ABC <dbl>,
## #   X90D.AA.ABC <dbl>

Graph depicting yearwise change in rates of paper.

dataset3 <- melt(dataset2,id.vars = "Year",measure.vars = grep("^X",names(dataset2),value = TRUE))
ggplot(dataset3,aes(x = variable,y = value,color = as.factor(Year)))+ geom_point() +
      geom_line(aes(group = Year)) +
      coord_cartesian(ylim = c(1, 100)) + coord_flip()

for(i in 1:2) {
  dataset2$total[i] <- rowSums(dataset2[i,2:25],na.rm = TRUE)
}
## Warning: Unknown or uninitialised column: 'total'.

Percentage increase from 2016 to 2017

percent_increase <- (671.13-208.02)/(208.02)
percent_increase*100
## [1] 222.6276
dataset2[3,] = c("Total", colSums(dataset2[,2:26]))
head(dataset2)
## # A tibble: 3 x 26
##    Year X0D.AA.NFC X7D.AA.NFC X15D.AA.NFC X30D.AA.NFC X60D.AA.NFC
##   <chr>      <chr>      <chr>       <chr>       <chr>       <chr>
## 1    16      29.11      29.44       30.51        34.2       35.54
## 2    17     159.85     158.46      159.23      165.28      167.01
## 3 Total     188.96      187.9      189.74      199.48      202.55
## # ... with 20 more variables: X90D.AA.NFC <chr>, X0D.A2.P2.NFC <chr>,
## #   X7D.A2.P2.NFC <chr>, X15D.A2.P2.NFC <chr>, X30D.A2.P2.NFC <chr>,
## #   X60D.A2.P2.NFC <chr>, X90D.A2.P2.NFC <chr>, X0D.AA.FC <chr>,
## #   X7D.AA.FC <chr>, X15D.AA.FC <chr>, X30D.AA.FC <chr>, X60D.AA.FC <chr>,
## #   X90D.AA.FC <chr>, X0D.AA.ABC <chr>, X7D.AA.ABC <chr>,
## #   X15D.AA.ABC <chr>, X30D.AA.ABC <chr>, X60D.AA.ABC <chr>,
## #   X90D.AA.ABC <chr>, total <chr>

I added all rows and columns to calculate Financial,Non-financial and asset blocked rates seperately.depending upon what rates you want one can just add up the columns and take the sum.I primarily focused on how to rates changed within two years which is represented in graph above.