yoga_url <- "https://raw.githubusercontent.com/emrahakin1985/DATA607/master/datasets/yoga.csv"
yoga <- read.csv(yoga_url, stringsAsFactors = F)

yoga <- yoga[2:nrow(yoga),]

row.names(yoga) <- 1:nrow(yoga)

colnames(yoga)[1] <- "date"
knitr::kable(head(yoga, 15))
date Alabama..us.al. Alaska..us.ak. Arizona..us.az. Arkansas..us.ar. California..us.ca. Colorado..us.co. Connecticut..us.ct. Delaware..us.de. District.of.Columbia..us.dc. Florida..us.fl. Georgia..us.ga. Hawaii..us.hi. Idaho..us.id. Illinois..us.il. Indiana..us.in. Iowa..us.ia. Kansas..us.ks. Kentucky..us.ky. Louisiana..us.la. Maine..us.me. Maryland..us.md. Massachusetts..us.ma. Michigan..us.mi. Minnesota..us.mn. Mississippi..us.ms. Missouri..us.mo. Montana..us.mt. Nebraska..us.ne. Nevada..us.nv. New.Hampshire..us.nh. New.Jersey..us.nj. New.Mexico..us.nm. New.York..us.ny. North.Carolina..us.nc. North.Dakota..us.nd. Ohio..us.oh. Oklahoma..us.ok. Oregon..us.or. Pennsylvania..us.pa. Rhode.Island..us.ri. South.Carolina..us.sc. South.Dakota..us.sd. Tennessee..us.tn. Texas..us.tx. Utah..us.ut. Vermont..us.vt. Virginia..us.va. Washington..us.wa. West.Virginia..us.wv. Wisconsin..us.wi. Wyoming..us.wy.
2004-01 20 23 21 24 32 33 27 47 32 21 21 36 21 25 24 14 20 17 20 29 26 41 19 26 16 19 44 15 20 45 27 33 35 23 52 19 22 34 19 44 24 25 21 24 26 42 22 30 23 18 0
2004-02 8 26 25 16 27 30 26 28 36 17 20 24 22 23 14 16 12 19 15 29 23 33 18 22 20 18 26 21 25 20 22 25 28 22 45 16 19 30 18 26 19 22 18 16 20 39 16 29 17 17 37
2004-03 10 26 22 26 28 29 30 51 29 17 20 36 21 25 17 18 13 18 17 26 22 32 17 21 18 15 41 16 24 22 23 26 29 20 45 15 12 28 20 27 18 21 16 17 10 41 19 27 27 20 35
2004-04 15 34 24 18 25 27 22 27 29 19 15 30 18 17 14 19 19 12 20 21 17 31 15 17 18 12 25 16 17 26 20 18 26 18 45 13 11 22 19 31 14 21 21 16 20 37 17 25 26 17 37
2004-05 15 14 24 11 25 24 25 20 24 20 16 39 14 17 9 14 15 15 14 25 21 27 15 22 13 15 24 14 25 23 22 21 28 16 45 16 17 25 16 26 11 28 15 17 19 31 18 25 21 18 35
2004-06 12 21 23 14 27 27 26 22 26 18 18 44 17 20 18 17 18 17 16 22 23 29 17 18 18 20 21 15 23 27 21 32 28 22 45 16 17 27 20 33 19 28 16 17 16 34 22 27 28 19 35
2004-07 13 17 25 15 28 30 22 34 28 18 20 29 19 20 19 15 19 15 13 32 29 36 16 18 12 16 21 17 15 20 23 25 31 22 28 14 15 30 19 34 19 29 19 19 21 32 21 28 23 18 34
2004-08 11 17 20 15 28 27 27 26 31 17 19 31 26 21 17 14 17 20 16 30 21 36 17 18 12 21 22 19 24 28 22 31 31 20 31 14 11 24 18 23 18 29 13 17 18 35 23 27 32 17 26
2004-09 10 23 20 10 26 25 24 18 25 16 17 32 21 19 18 13 15 15 14 25 21 32 18 17 13 15 22 13 20 21 23 19 30 21 33 13 13 27 18 37 14 28 13 15 25 28 20 26 41 15 23
2004-10 13 23 22 18 25 30 19 35 20 16 18 36 15 16 13 19 15 12 12 26 24 29 16 15 15 15 35 15 14 22 20 14 26 17 20 13 11 28 16 32 16 28 18 15 20 49 15 25 14 20 23
2004-11 15 19 26 16 25 26 24 25 23 15 16 37 26 18 14 15 13 18 12 22 22 26 16 18 11 17 27 16 23 28 21 26 28 17 21 17 9 25 18 32 16 28 17 13 20 39 16 25 14 15 32
2004-12 9 24 19 21 25 29 25 23 23 16 17 34 24 18 16 18 15 12 14 26 21 32 15 21 16 15 23 12 22 28 20 23 27 18 19 15 13 24 17 30 13 28 18 13 21 34 16 24 14 16 25
2005-01 12 19 29 25 30 32 32 37 28 19 23 43 23 20 17 20 19 15 19 36 27 36 22 20 21 19 29 16 26 35 22 36 32 24 17 18 15 24 20 39 20 21 21 17 20 49 23 27 21 24 35
2005-02 12 12 20 19 27 28 23 19 26 16 17 28 18 20 20 18 15 15 16 29 16 31 19 20 16 15 21 12 26 34 23 26 28 20 21 17 18 28 18 32 17 16 16 16 21 35 20 26 22 15 29
2005-03 13 20 21 16 26 29 31 17 28 16 17 27 12 19 17 17 16 15 16 31 21 29 15 17 12 15 16 13 14 37 22 34 30 15 16 16 12 27 19 30 17 14 17 16 19 44 20 25 21 19 32


Date column separated into year and month:

yoga2 <- yoga %>% 
  separate(date, into = c("year", "month"), sep = "-")
  
head(yoga2 %>% 
  select(1,2, 3:5), 10)
##    year month Alabama..us.al. Alaska..us.ak. Arizona..us.az.
## 1  2004    01              20             23              21
## 2  2004    02               8             26              25
## 3  2004    03              10             26              22
## 4  2004    04              15             34              24
## 5  2004    05              15             14              24
## 6  2004    06              12             21              23
## 7  2004    07              13             17              25
## 8  2004    08              11             17              20
## 9  2004    09              10             23              20
## 10 2004    10              13             23              22


Extracting state names:

c <- colnames(yoga)
c
##  [1] "date"                         "Alabama..us.al."             
##  [3] "Alaska..us.ak."               "Arizona..us.az."             
##  [5] "Arkansas..us.ar."             "California..us.ca."          
##  [7] "Colorado..us.co."             "Connecticut..us.ct."         
##  [9] "Delaware..us.de."             "District.of.Columbia..us.dc."
## [11] "Florida..us.fl."              "Georgia..us.ga."             
## [13] "Hawaii..us.hi."               "Idaho..us.id."               
## [15] "Illinois..us.il."             "Indiana..us.in."             
## [17] "Iowa..us.ia."                 "Kansas..us.ks."              
## [19] "Kentucky..us.ky."             "Louisiana..us.la."           
## [21] "Maine..us.me."                "Maryland..us.md."            
## [23] "Massachusetts..us.ma."        "Michigan..us.mi."            
## [25] "Minnesota..us.mn."            "Mississippi..us.ms."         
## [27] "Missouri..us.mo."             "Montana..us.mt."             
## [29] "Nebraska..us.ne."             "Nevada..us.nv."              
## [31] "New.Hampshire..us.nh."        "New.Jersey..us.nj."          
## [33] "New.Mexico..us.nm."           "New.York..us.ny."            
## [35] "North.Carolina..us.nc."       "North.Dakota..us.nd."        
## [37] "Ohio..us.oh."                 "Oklahoma..us.ok."            
## [39] "Oregon..us.or."               "Pennsylvania..us.pa."        
## [41] "Rhode.Island..us.ri."         "South.Carolina..us.sc."      
## [43] "South.Dakota..us.sd."         "Tennessee..us.tn."           
## [45] "Texas..us.tx."                "Utah..us.ut."                
## [47] "Vermont..us.vt."              "Virginia..us.va."            
## [49] "Washington..us.wa."           "West.Virginia..us.wv."       
## [51] "Wisconsin..us.wi."            "Wyoming..us.wy."


c2 <- unlist(str_extract_all(c, ".+\\.{2}"))
c2
##  [1] "Alabama.."              "Alaska.."              
##  [3] "Arizona.."              "Arkansas.."            
##  [5] "California.."           "Colorado.."            
##  [7] "Connecticut.."          "Delaware.."            
##  [9] "District.of.Columbia.." "Florida.."             
## [11] "Georgia.."              "Hawaii.."              
## [13] "Idaho.."                "Illinois.."            
## [15] "Indiana.."              "Iowa.."                
## [17] "Kansas.."               "Kentucky.."            
## [19] "Louisiana.."            "Maine.."               
## [21] "Maryland.."             "Massachusetts.."       
## [23] "Michigan.."             "Minnesota.."           
## [25] "Mississippi.."          "Missouri.."            
## [27] "Montana.."              "Nebraska.."            
## [29] "Nevada.."               "New.Hampshire.."       
## [31] "New.Jersey.."           "New.Mexico.."          
## [33] "New.York.."             "North.Carolina.."      
## [35] "North.Dakota.."         "Ohio.."                
## [37] "Oklahoma.."             "Oregon.."              
## [39] "Pennsylvania.."         "Rhode.Island.."        
## [41] "South.Carolina.."       "South.Dakota.."        
## [43] "Tennessee.."            "Texas.."               
## [45] "Utah.."                 "Vermont.."             
## [47] "Virginia.."             "Washington.."          
## [49] "West.Virginia.."        "Wisconsin.."           
## [51] "Wyoming.."


c3 <- unlist(str_replace(c2, "\\.\\.", ""))
c3
##  [1] "Alabama"              "Alaska"               "Arizona"             
##  [4] "Arkansas"             "California"           "Colorado"            
##  [7] "Connecticut"          "Delaware"             "District.of.Columbia"
## [10] "Florida"              "Georgia"              "Hawaii"              
## [13] "Idaho"                "Illinois"             "Indiana"             
## [16] "Iowa"                 "Kansas"               "Kentucky"            
## [19] "Louisiana"            "Maine"                "Maryland"            
## [22] "Massachusetts"        "Michigan"             "Minnesota"           
## [25] "Mississippi"          "Missouri"             "Montana"             
## [28] "Nebraska"             "Nevada"               "New.Hampshire"       
## [31] "New.Jersey"           "New.Mexico"           "New.York"            
## [34] "North.Carolina"       "North.Dakota"         "Ohio"                
## [37] "Oklahoma"             "Oregon"               "Pennsylvania"        
## [40] "Rhode.Island"         "South.Carolina"       "South.Dakota"        
## [43] "Tennessee"            "Texas"                "Utah"                
## [46] "Vermont"              "Virginia"             "Washington"          
## [49] "West.Virginia"        "Wisconsin"            "Wyoming"


c4 <- unlist(str_replace_all(c3, "\\.", " "))
c4
##  [1] "Alabama"              "Alaska"               "Arizona"             
##  [4] "Arkansas"             "California"           "Colorado"            
##  [7] "Connecticut"          "Delaware"             "District of Columbia"
## [10] "Florida"              "Georgia"              "Hawaii"              
## [13] "Idaho"                "Illinois"             "Indiana"             
## [16] "Iowa"                 "Kansas"               "Kentucky"            
## [19] "Louisiana"            "Maine"                "Maryland"            
## [22] "Massachusetts"        "Michigan"             "Minnesota"           
## [25] "Mississippi"          "Missouri"             "Montana"             
## [28] "Nebraska"             "Nevada"               "New Hampshire"       
## [31] "New Jersey"           "New Mexico"           "New York"            
## [34] "North Carolina"       "North Dakota"         "Ohio"                
## [37] "Oklahoma"             "Oregon"               "Pennsylvania"        
## [40] "Rhode Island"         "South Carolina"       "South Dakota"        
## [43] "Tennessee"            "Texas"                "Utah"                
## [46] "Vermont"              "Virginia"             "Washington"          
## [49] "West Virginia"        "Wisconsin"            "Wyoming"


Adding columns with state names:

names(yoga2) <- c(names(yoga2)[1:2], c4)


Gathering all state columns:

yoga3 <- yoga2 %>% 
  gather(State, Yoga_Search, -year, -month)
head(yoga3, 15)
##    year month   State Yoga_Search
## 1  2004    01 Alabama          20
## 2  2004    02 Alabama           8
## 3  2004    03 Alabama          10
## 4  2004    04 Alabama          15
## 5  2004    05 Alabama          15
## 6  2004    06 Alabama          12
## 7  2004    07 Alabama          13
## 8  2004    08 Alabama          11
## 9  2004    09 Alabama          10
## 10 2004    10 Alabama          13
## 11 2004    11 Alabama          15
## 12 2004    12 Alabama           9
## 13 2005    01 Alabama          12
## 14 2005    02 Alabama          12
## 15 2005    03 Alabama          13


Yoga searches grouped by states:

yoga_search <- yoga3 %>% 
  group_by(State) %>% 
  summarise(Yoga_Search = sum(Yoga_Search)) %>% 
  arrange(desc(Yoga_Search))

#top10
head(yoga_search, 10)
## # A tibble: 10 x 2
##            State Yoga_Search
##            <chr>       <int>
##  1       Vermont        7529
##  2        Hawaii        5699
##  3  Rhode Island        5546
##  4 New Hampshire        5292
##  5         Maine        5251
##  6       Montana        4976
##  7       Wyoming        4815
##  8 Massachusetts        4763
##  9        Alaska        4726
## 10      Colorado        4578
#bottom10
tail(yoga_search, 10)
## # A tibble: 10 x 2
##          State Yoga_Search
##          <chr>       <int>
##  1      Kansas        2532
##  2   Tennessee        2507
##  3       Texas        2466
##  4    Kentucky        2446
##  5   Louisiana        2440
##  6    Arkansas        2424
##  7    Oklahoma        2421
##  8     Georgia        2383
##  9 Mississippi        2241
## 10     Alabama        2042


Yoga search by year. The year 2016 seems to be wrong or incomplete, therefore excluded:

yoga_search2 <- yoga3 %>% 
  group_by(year) %>% 
  summarise(Yoga_Search = sum(Yoga_Search)) %>% 
  arrange((year)) %>% 
  filter(year != 2016)
yoga_search2
## # A tibble: 12 x 2
##     year Yoga_Search
##    <chr>       <int>
##  1  2004       13467
##  2  2005       13173
##  3  2006       12952
##  4  2007       12353
##  5  2008       12085
##  6  2009       12671
##  7  2010       14874
##  8  2011       14197
##  9  2012       14875
## 10  2013       16350
## 11  2014       17442
## 12  2015       18475


Scatter plot showing the association between years and yoga search

ggplot(yoga_search2, aes(year, Yoga_Search) ) + geom_point(color = "blue")