library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
squirrel_url = "https://raw.githubusercontent.com/schoolkidrich/R/main/DATA%20607/project2/2018_Central_Park_Squirrel_Census_-_Squirrel_Data.csv"
gdp_url = "https://raw.githubusercontent.com/schoolkidrich/R/main/DATA%20607/project2/gdp.csv"
candy_url = "https://raw.githubusercontent.com/schoolkidrich/R/main/DATA%20607/project2/candyhierarchy2017.csv"
squirrel_df = read.csv(squirrel_url)
head(squirrel_df)
## X Y Unique.Squirrel.ID Hectare Shift Date
## 1 -73.95613 40.79408 37F-PM-1014-03 37F PM 10142018
## 2 -73.96886 40.78378 21B-AM-1019-04 21B AM 10192018
## 3 -73.97428 40.77553 11B-PM-1014-08 11B PM 10142018
## 4 -73.95964 40.79031 32E-PM-1017-14 32E PM 10172018
## 5 -73.97027 40.77621 13E-AM-1017-05 13E AM 10172018
## 6 -73.96836 40.77259 11H-AM-1010-03 11H AM 10102018
## Hectare.Squirrel.Number Age Primary.Fur.Color Highlight.Fur.Color
## 1 3
## 2 4
## 3 8 Gray
## 4 14 Adult Gray
## 5 5 Adult Gray Cinnamon
## 6 3 Adult Cinnamon White
## Combination.of.Primary.and.Highlight.Color
## 1 +
## 2 +
## 3 Gray+
## 4 Gray+
## 5 Gray+Cinnamon
## 6 Cinnamon+White
## Color.notes
## 1
## 2
## 3
## 4 Nothing selected as Primary. Gray selected as Highlights. Made executive adjustments.
## 5
## 6
## Location Above.Ground.Sighter.Measurement Specific.Location Running
## 1 FALSE
## 2 FALSE
## 3 Above Ground 10 FALSE
## 4 FALSE
## 5 Above Ground on tree stump FALSE
## 6 FALSE
## Chasing Climbing Eating Foraging Other.Activities Kuks Quaas Moans
## 1 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 3 TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## 4 FALSE FALSE TRUE TRUE FALSE FALSE FALSE
## 5 FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## 6 FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## Tail.flags Tail.twitches Approaches Indifferent Runs.from Other.Interactions
## 1 FALSE FALSE FALSE FALSE FALSE
## 2 FALSE FALSE FALSE FALSE FALSE
## 3 FALSE FALSE FALSE FALSE FALSE
## 4 FALSE FALSE FALSE FALSE TRUE
## 5 FALSE FALSE FALSE FALSE FALSE
## 6 FALSE TRUE FALSE TRUE FALSE
## Lat.Long
## 1 POINT (-73.9561344937861 40.7940823884086)
## 2 POINT (-73.9688574691102 40.7837825208444)
## 3 POINT (-73.97428114848522 40.775533619083)
## 4 POINT (-73.9596413903948 40.7903128889029)
## 5 POINT (-73.9702676472613 40.7762126854894)
## 6 POINT (-73.9683613516225 40.7725908847499)
# dropping columns
squirrel_sightings = squirrel_df[c('Unique.Squirrel.ID','Hectare','Shift','Age','Primary.Fur.Color','Running','Chasing','Climbing','Eating','Foraging')]
# pivoting dataframe
squirrel_sightings = squirrel_sightings %>%
pivot_longer(names(squirrel_sightings)[6:dim(squirrel_sightings)[2]], names_to = "Action")
# dropping FALSE (N/A) values
squirrel_clean = squirrel_sightings[squirrel_sightings['value'] == TRUE,]
## Warning: The `i` argument of ``[`()` can't be a matrix as of tibble 3.0.0.
## Convert to a vector.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
squirrel_clean = squirrel_clean[names(squirrel_clean)[1:dim(squirrel_clean)[2]-1]]
head(squirrel_clean)
## # A tibble: 6 x 6
## Unique.Squirrel.ID Hectare Shift Age Primary.Fur.Color Action
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 11B-PM-1014-08 11B PM "" Gray Chasing
## 2 32E-PM-1017-14 32E PM "Adult" Gray Eating
## 3 32E-PM-1017-14 32E PM "Adult" Gray Foraging
## 4 13E-AM-1017-05 13E AM "Adult" Gray Foraging
## 5 11H-AM-1010-03 11H AM "Adult" Cinnamon Foraging
## 6 36H-AM-1010-02 36H AM "Adult" Gray Foraging
squirrel_clean %>%
group_by(Action) %>%
summarise(Percentage = n()/dim(squirrel_clean)[1]) %>%
ggplot(mapping = aes(x = reorder(Action,-Percentage),y = Percentage)) +geom_bar(stat = 'identity') + labs(x = 'Actions', title = "What are Squirrels at Central Park up to?")
squirrel_clean %>%
group_by(Hectare) %>%
summarise(Count = n()) %>%
arrange(desc(Count))%>%
head(10) %>%
ggplot(mapping = aes(x = reorder(Hectare,-Count),y = Count))+geom_bar(stat = 'identity') + labs(x = 'Location', title = 'Locations with the Most Squirrel Sightings')
gdp_df = read.csv(gdp_url)
head(gdp_df)
## Country.Name Country.Code Indicator.Name Indicator.Code X1960 X1961
## 1 Aruba ABW GDP growth (annual %) NY.GDP.MKTP.KD.ZG NA NA
## 2 Afghanistan AFG GDP growth (annual %) NY.GDP.MKTP.KD.ZG NA NA
## 3 Angola AGO GDP growth (annual %) NY.GDP.MKTP.KD.ZG NA NA
## 4 Albania ALB GDP growth (annual %) NY.GDP.MKTP.KD.ZG NA NA
## 5 Andorra AND GDP growth (annual %) NY.GDP.MKTP.KD.ZG NA NA
## 6 Arab World ARB GDP growth (annual %) NY.GDP.MKTP.KD.ZG NA NA
## X1962 X1963 X1964 X1965 X1966 X1967 X1968 X1969 X1970 X1971 X1972
## 1 NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA 4.649465 8.149743
## 6 NA NA NA NA NA NA NA NA NA NA NA
## X1973 X1974 X1975 X1976 X1977 X1978 X1979 X1980
## 1 NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA
## 5 7.788467 5.61879 0.5422057 3.303787 2.838576 1.4630002 0.04155719 2.208728
## 6 NA NA NA 15.740326 8.243803 -0.6817728 11.68988215 9.082873
## X1981 X1982 X1983 X1984 X1985 X1986 X1987
## 1 NA NA NA NA NA NA 16.0784314
## 2 NA NA NA NA NA NA NA
## 3 -4.4000012 0.000000 4.200001 6.000002 3.499999 2.900002 4.0827486
## 4 5.7456353 2.948597 1.104938 -1.251597 1.780644 5.637243 -0.7878427
## 5 -0.1324745 1.246461 1.770118 1.784687 2.321433 3.253322 5.5471222
## 6 3.0386015 -9.078660 -6.896081 1.326049 -2.366242 4.684686 -0.5397674
## X1988 X1989 X1990 X1991 X1992 X1993 X1994
## 1 18.648649 12.12984055 3.961402 7.9628725 5.882353 7.307692 8.203903
## 2 NA NA NA NA NA NA NA
## 3 6.128890 0.04162146 -3.450099 0.9913593 -5.838281 -23.983417 1.339363
## 4 -1.420040 9.83654897 -9.575640 -28.0021417 -7.187111 9.559412 8.302867
## 5 5.094326 4.82703424 3.781388 2.5460029 0.929212 -1.031484 2.383188
## 6 5.671389 2.37715021 13.126828 1.5030680 4.938746 3.267514 3.203914
## X1995 X1996 X1997 X1998 X1999 X2000 X2001
## 1 2.547144 1.185788 7.046874 1.991986 1.238042 7.616588 -2.971257
## 2 NA NA NA NA NA NA NA
## 3 15.000000 13.544370 7.274277 4.691146 2.181490 3.054624 4.205999
## 4 13.322333 9.099999 -10.919984 8.830088 12.889897 6.950036 8.290070
## 5 2.757499 4.649740 9.067672 3.194790 4.099081 3.528361 8.119358
## 6 2.748955 4.592264 4.224988 5.265799 1.800874 5.496228 1.563740
## X2002 X2003 X2004 X2005 X2006 X2007 X2008
## 1 -3.2736464 1.975547 7.911563 1.214349 1.050608 1.800226 -0.09070805
## 2 NA 8.832278 1.414118 11.229715 5.357403 13.826320 3.92498382
## 3 13.6656865 2.989850 10.952862 15.028915 11.547683 14.010018 11.16613833
## 4 4.5399606 5.530051 5.509999 5.529915 5.900084 5.979982 7.49996956
## 5 4.5463622 8.694204 8.135676 5.397796 4.808689 1.553188 -5.55918637
## 6 0.5699665 5.318673 9.373925 5.747402 6.501691 4.523806 5.81119882
## X2009 X2010 X2011 X2012 X2013 X2014 X2015
## 1 -10.5197485 -3.685029 3.446054750 -1.369863 4.198232 0.300000 5.7000009
## 2 21.3905284 14.362441 0.426354793 12.752287 5.600745 2.724543 1.4513147
## 3 0.8587126 4.403933 3.471976345 8.542188 4.954545 4.822628 0.9435716
## 4 3.3500665 3.706892 2.545321845 1.417526 1.001987 1.774487 2.2187522
## 5 -5.3028465 -1.974958 -0.008069753 -4.974444 -3.547597 2.504466 1.4341404
## 6 0.4199733 4.668194 3.748900536 6.662222 3.287271 2.505076 3.2433121
## X2016 X2017 X2018 X2019 X2020
## 1 2.100000 1.9999991 NA NA NA
## 2 2.260314 2.6470032 1.189228 3.9116034 NA
## 3 -2.580050 -0.1472129 -2.003630 -0.6246443 NA
## 4 3.314805 3.8021975 4.071301 2.2400703 NA
## 5 3.709678 0.3460719 1.588765 1.8492383 NA
## 6 3.507128 0.8208465 2.079469 1.4876330 NA
# removing "x" from year strings
cols = names(gdp_df)
len = dim(gdp_df)[2]
for (col in cols[5:len]){
cols[cols == col] = substr(col,2,5)
}
names(gdp_df) = cols
# pivoting year columns to a single 'Year' column
gdp_clean = gdp_df %>%
pivot_longer(cols = cols[5:len],names_to = "Year", values_to = 'percent_growth') %>%
drop_na()
# separate df for country codes
country_code = unique(gdp_clean[c('Country.Name','Country.Code')])
# drop unwanted columns from gdp df
gdp_clean = gdp_clean[c('Country.Name','Year','percent_growth')]
head(gdp_clean)
## # A tibble: 6 x 3
## Country.Name Year percent_growth
## <chr> <chr> <dbl>
## 1 Aruba 1987 16.1
## 2 Aruba 1988 18.6
## 3 Aruba 1989 12.1
## 4 Aruba 1990 3.96
## 5 Aruba 1991 7.96
## 6 Aruba 1992 5.88
gdp_clean %>%
group_by(Country.Name) %>%
summarize(Avg_Growth = sum(percent_growth)/n())%>%
arrange(desc(Avg_Growth)) %>%
head(10) %>%
ggplot(mapping = aes(x = reorder(Country.Name,Avg_Growth),y = Avg_Growth))+geom_bar(stat = 'identity') + coord_flip() +labs(x = 'Countries', title = 'Largest Average Change in GDP (1960-2020)')
# subset df into united states only
us_gdp = gdp_clean[gdp_clean['Country.Name'] == 'United States' & gdp_clean['Year']>= 1997,]
us_gdp %>%
ggplot(mapping = aes(x = Year, y = percent_growth, group = Country.Name))+geom_line() + theme(axis.text.x = element_text(angle = 90)) + labs(title = 'Change in GDP since 1997')
candy_df = read.csv(candy_url)
head(candy_df)
## Internal.ID Q1..GOING.OUT. Q2..GENDER Q3..AGE Q4..COUNTRY
## 1 90258773
## 2 90272821 No Male 44 USA
## 3 90272829 Male 49 USA
## 4 90272840 No Male 40 us
## 5 90272841 No Male 23 usa
## 6 90272852 No Male
## Q5..STATE..PROVINCE..COUNTY..ETC Q6...100.Grand.Bar
## 1
## 2 NM MEH
## 3 Virginia
## 4 or MEH
## 5 exton pa JOY
## 6 JOY
## Q6...Anonymous.brown.globs.that.come.in.black.and.orange.wrappers..a.k.a..Mary.Janes.
## 1
## 2 DESPAIR
## 3
## 4 DESPAIR
## 5 DESPAIR
## 6 DESPAIR
## Q6...Any.full.sized.candy.bar Q6...Black.Jacks Q6...Bonkers..the.candy.
## 1
## 2 JOY MEH DESPAIR
## 3
## 4 JOY MEH MEH
## 5 JOY DESPAIR MEH
## 6 JOY
## Q6...Bonkers..the.board.game. Q6...Bottle.Caps Q6...Box.o.Raisins
## 1
## 2 DESPAIR DESPAIR DESPAIR
## 3
## 4 DESPAIR MEH DESPAIR
## 5 DESPAIR MEH DESPAIR
## 6 MEH MEH
## Q6...Broken.glow.stick Q6...Butterfinger Q6...Cadbury.Creme.Eggs
## 1
## 2 DESPAIR DESPAIR MEH
## 3
## 4 DESPAIR MEH MEH
## 5 DESPAIR MEH MEH
## 6 DESPAIR JOY DESPAIR
## Q6...Candy.Corn
## 1
## 2 MEH
## 3
## 4 DESPAIR
## 5 DESPAIR
## 6 MEH
## Q6...Candy.that.is.clearly.just.the.stuff.given.out.for.free.at.restaurants
## 1
## 2 DESPAIR
## 3
## 4 DESPAIR
## 5 DESPAIR
## 6 MEH
## Q6...Caramellos Q6...Cash..or.other.forms.of.legal.tender Q6...Chardonnay
## 1
## 2 MEH JOY MEH
## 3
## 4 MEH JOY MEH
## 5 JOY MEH JOY
## 6 JOY JOY JOY
## Q6...Chick.o.Sticks..we.donÕt.know.what.that.is. Q6...Chiclets
## 1
## 2 DESPAIR DESPAIR
## 3
## 4 JOY MEH
## 5 DESPAIR DESPAIR
## 6 MEH MEH
## Q6...Coffee.Crisp Q6...Creepy.Religious.comics.Chick.Tracts
## 1
## 2 DESPAIR DESPAIR
## 3
## 4 DESPAIR DESPAIR
## 5 MEH DESPAIR
## 6 JOY DESPAIR
## Q6...Dental.paraphenalia Q6...Dots Q6...Dove.Bars Q6...Fuzzy.Peaches
## 1
## 2 DESPAIR MEH JOY DESPAIR
## 3
## 4 DESPAIR JOY JOY MEH
## 5 DESPAIR MEH JOY DESPAIR
## 6 MEH MEH JOY
## Q6...Generic.Brand.Acetaminophen Q6...Glow.sticks Q6...Goo.Goo.Clusters
## 1
## 2 DESPAIR DESPAIR DESPAIR
## 3
## 4 MEH JOY JOY
## 5 DESPAIR DESPAIR MEH
## 6 DESPAIR MEH JOY
## Q6...Good.N..Plenty Q6...Gum.from.baseball.cards Q6...Gummy.Bears.straight.up
## 1
## 2 MEH DESPAIR MEH
## 3
## 4 MEH DESPAIR JOY
## 5 MEH DESPAIR JOY
## 6 MEH DESPAIR MEH
## Q6...Hard.Candy Q6...Healthy.Fruit Q6...Heath.Bar
## 1
## 2 MEH DESPAIR MEH
## 3
## 4 MEH MEH DESPAIR
## 5 MEH DESPAIR DESPAIR
## 6 MEH MEH JOY
## Q6...Hershey.s.Dark.Chocolate Q6...HersheyÕs.Milk.Chocolate
## 1
## 2 JOY JOY
## 3
## 4 MEH MEH
## 5 JOY MEH
## 6 JOY MEH
## Q6...Hershey.s.Kisses Q6...Hugs..actual.physical.hugs.
## 1
## 2 MEH DESPAIR
## 3
## 4 MEH MEH
## 5 MEH DESPAIR
## 6 MEH MEH
## Q6...Jolly.Rancher..bad.flavor. Q6...Jolly.Ranchers..good.flavor.
## 1
## 2 DESPAIR MEH
## 3
## 4 MEH MEH
## 5 MEH JOY
## 6 MEH MEH
## Q6...JoyJoy..Mit.Iodine.. Q6...Junior.Mints Q6...Senior.Mints
## 1
## 2 DESPAIR DESPAIR DESPAIR
## 3
## 4 DESPAIR DESPAIR DESPAIR
## 5 MEH JOY DESPAIR
## 6 JOY
## Q6...Kale.smoothie Q6...Kinder.Happy.Hippo Q6...Kit.Kat Q6...LaffyTaffy
## 1
## 2 DESPAIR DESPAIR JOY DESPAIR
## 3
## 4 DESPAIR MEH MEH JOY
## 5 DESPAIR JOY JOY JOY
## 6 DESPAIR JOY MEH
## Q6...LemonHeads Q6...Licorice..not.black. Q6...Licorice..yes.black.
## 1
## 2 MEH MEH JOY
## 3
## 4 JOY MEH JOY
## 5 JOY MEH DESPAIR
## 6 MEH MEH MEH
## Q6...Lindt.Truffle Q6...Lollipops Q6...Mars Q6...Maynards Q6...Mike.and.Ike
## 1
## 2 MEH DESPAIR DESPAIR DESPAIR MEH
## 3
## 4 JOY DESPAIR JOY MEH MEH
## 5 JOY MEH MEH DESPAIR MEH
## 6 JOY MEH JOY JOY
## Q6...Milk.Duds Q6...Milky.Way Q6...Regular.M.Ms Q6...Peanut.M.MÕs
## 1
## 2 MEH JOY JOY MEH
## 3
## 4 DESPAIR JOY MEH JOY
## 5 MEH JOY JOY JOY
## 6 JOY JOY JOY JOY
## Q6...Blue.M.M.s Q6...Red.M.M.s Q6...Green.Party.M.M.s Q6...Independent.M.M.s
## 1
## 2 JOY JOY JOY JOY
## 3
## 4 MEH MEH MEH MEH
## 5 JOY JOY JOY JOY
## 6 JOY JOY
## Q6...Abstained.from.M.M.ing. Q6...Minibags.of.chips Q6...Mint.Kisses
## 1
## 2 DESPAIR DESPAIR MEH
## 3
## 4 MEH DESPAIR DESPAIR
## 5 MEH DESPAIR JOY
## 6 JOY JOY
## Q6...Mint.Juleps Q6...Mr..Goodbar Q6...Necco.Wafers Q6...Nerds
## 1
## 2 DESPAIR DESPAIR DESPAIR DESPAIR
## 3
## 4 MEH MEH DESPAIR MEH
## 5 JOY MEH DESPAIR JOY
## 6 JOY JOY MEH JOY
## Q6...Nestle.Crunch Q6...Now.n.Laters Q6...Peeps Q6...Pencils Q6...Pixy.Stix
## 1
## 2 JOY DESPAIR DESPAIR DESPAIR DESPAIR
## 3
## 4 MEH MEH DESPAIR DESPAIR DESPAIR
## 5 JOY JOY MEH DESPAIR JOY
## 6 MEH MEH DESPAIR DESPAIR MEH
## Q6...Real.Housewives.of.Orange.County.Season.9.Blue.Ray
## 1
## 2 DESPAIR
## 3
## 4 DESPAIR
## 5 DESPAIR
## 6 DESPAIR
## Q6...ReeseÕs.Peanut.Butter.Cups Q6...Reese.s.Pieces Q6...Reggie.Jackson.Bar
## 1
## 2 JOY JOY DESPAIR
## 3
## 4 JOY MEH MEH
## 5 JOY JOY MEH
## 6 JOY JOY DESPAIR
## Q6...Rolos Q6...Sandwich.sized.bags.filled.with.BooBerry.Crunch Q6...Skittles
## 1
## 2 JOY DESPAIR DESPAIR
## 3
## 4 MEH DESPAIR DESPAIR
## 5 JOY JOY
## 6 JOY DESPAIR JOY
## Q6...Smarties..American. Q6...Smarties..Commonwealth. Q6...Snickers
## 1
## 2 DESPAIR DESPAIR MEH
## 3
## 4 DESPAIR MEH JOY
## 5 JOY DESPAIR
## 6 JOY JOY JOY
## Q6...Sourpatch.Kids..i.e..abominations.of.nature. Q6...Spotted.Dick
## 1
## 2 DESPAIR DESPAIR
## 3
## 4 MEH DESPAIR
## 5 MEH DESPAIR
## 6 DESPAIR DESPAIR
## Q6...Starburst Q6...Sweet.Tarts Q6...Swedish.Fish
## 1
## 2 MEH DESPAIR MEH
## 3
## 4 MEH MEH JOY
## 5 JOY JOY MEH
## 6 MEH MEH MEH
## Q6...Sweetums..a.friend.to.diabetes. Q6...Take.5 Q6...Tic.Tacs
## 1
## 2 DESPAIR DESPAIR DESPAIR
## 3
## 4 MEH JOY MEH
## 5 DESPAIR MEH DESPAIR
## 6 MEH
## Q6...Those.odd.marshmallow.circus.peanut.things Q6...Three.Musketeers
## 1
## 2 DESPAIR JOY
## 3
## 4 DESPAIR DESPAIR
## 5 DESPAIR JOY
## 6 DESPAIR JOY
## Q6...Tolberone.something.or.other Q6...Trail.Mix Q6...Twix
## 1
## 2 JOY DESPAIR JOY
## 3
## 4 JOY MEH JOY
## 5 JOY DESPAIR JOY
## 6 JOY MEH JOY
## Q6...Vials.of.pure.high.fructose.corn.syrup..for.main.lining.into.your.vein
## 1
## 2 DESPAIR
## 3
## 4 DESPAIR
## 5 MEH
## 6 DESPAIR
## Q6...Vicodin Q6...Whatchamacallit.Bars Q6...White.Bread
## 1
## 2 DESPAIR DESPAIR DESPAIR
## 3
## 4 JOY JOY DESPAIR
## 5 JOY JOY DESPAIR
## 6 DESPAIR JOY DESPAIR
## Q6...Whole.Wheat.anything Q6...York.Peppermint.Patties
## 1
## 2 DESPAIR DESPAIR
## 3
## 4 DESPAIR DESPAIR
## 5 DESPAIR JOY
## 6 DESPAIR JOY
## Q7..JOY.OTHER
## 1
## 2 Mounds
## 3
## 4 Reese's crispy crunchy bars, 5th avenue bars, ferrero rocher, dries fruit other than raisins
## 5
## 6
## Q8..DESPAIR.OTHER Q9..OTHER.COMMENTS
## 1
## 2 Bottom line is Twix is really the only candy worth eating.
## 3
## 4 Raisins can go to hell
## 5
## 6
## Q10..DRESS X Q11..DAY Q12..MEDIA..Daily.Dish. Q12..MEDIA..Science.
## 1 NA NA
## 2 White and gold Sunday NA 1
## 3 NA NA
## 4 White and gold Sunday NA 1
## 5 White and gold Friday NA 1
## 6 NA 1
## Q12..MEDIA..ESPN. Q12..MEDIA..Yahoo. Click.Coordinates..x..y.
## 1 NA NA
## 2 NA NA (84, 25)
## 3 NA NA
## 4 NA NA (75, 23)
## 5 NA NA (70, 10)
## 6 NA NA (75, 23)
length = dim(candy_df)[2]
columns = names(candy_df)
pattern = '^[a-z][0-9]$'
# removing "Q#" from strings
for (column in columns){
new_str = ""
for (item in strsplit(column,"\\.")[[1]]){
if (grepl(pattern, item, ignore.case = TRUE) == FALSE){
new_str = paste(new_str,item)
}
}
columns[columns == column] = trimws(new_str)
}
names(candy_df) = columns
head(candy_df)
## Internal ID GOING OUT GENDER AGE COUNTRY STATE PROVINCE COUNTY ETC
## 1 90258773
## 2 90272821 No Male 44 USA NM
## 3 90272829 Male 49 USA Virginia
## 4 90272840 No Male 40 us or
## 5 90272841 No Male 23 usa exton pa
## 6 90272852 No Male
## 100 Grand Bar
## 1
## 2 MEH
## 3
## 4 MEH
## 5 JOY
## 6 JOY
## Anonymous brown globs that come in black and orange wrappers a k a Mary Janes
## 1
## 2 DESPAIR
## 3
## 4 DESPAIR
## 5 DESPAIR
## 6 DESPAIR
## Any full sized candy bar Black Jacks Bonkers the candy
## 1
## 2 JOY MEH DESPAIR
## 3
## 4 JOY MEH MEH
## 5 JOY DESPAIR MEH
## 6 JOY
## Bonkers the board game Bottle Caps Box o Raisins Broken glow stick
## 1
## 2 DESPAIR DESPAIR DESPAIR DESPAIR
## 3
## 4 DESPAIR MEH DESPAIR DESPAIR
## 5 DESPAIR MEH DESPAIR DESPAIR
## 6 MEH MEH DESPAIR
## Butterfinger Cadbury Creme Eggs Candy Corn
## 1
## 2 DESPAIR MEH MEH
## 3
## 4 MEH MEH DESPAIR
## 5 MEH MEH DESPAIR
## 6 JOY DESPAIR MEH
## Candy that is clearly just the stuff given out for free at restaurants
## 1
## 2 DESPAIR
## 3
## 4 DESPAIR
## 5 DESPAIR
## 6 MEH
## Caramellos Cash or other forms of legal tender Chardonnay
## 1
## 2 MEH JOY MEH
## 3
## 4 MEH JOY MEH
## 5 JOY MEH JOY
## 6 JOY JOY JOY
## Chick o Sticks we donÕt know what that is Chiclets Coffee Crisp
## 1
## 2 DESPAIR DESPAIR DESPAIR
## 3
## 4 JOY MEH DESPAIR
## 5 DESPAIR DESPAIR MEH
## 6 MEH MEH JOY
## Creepy Religious comics Chick Tracts Dental paraphenalia Dots Dove Bars
## 1
## 2 DESPAIR DESPAIR MEH JOY
## 3
## 4 DESPAIR DESPAIR JOY JOY
## 5 DESPAIR DESPAIR MEH JOY
## 6 DESPAIR MEH MEH JOY
## Fuzzy Peaches Generic Brand Acetaminophen Glow sticks Goo Goo Clusters
## 1
## 2 DESPAIR DESPAIR DESPAIR DESPAIR
## 3
## 4 MEH MEH JOY JOY
## 5 DESPAIR DESPAIR DESPAIR MEH
## 6 DESPAIR MEH JOY
## Good N Plenty Gum from baseball cards Gummy Bears straight up Hard Candy
## 1
## 2 MEH DESPAIR MEH MEH
## 3
## 4 MEH DESPAIR JOY MEH
## 5 MEH DESPAIR JOY MEH
## 6 MEH DESPAIR MEH MEH
## Healthy Fruit Heath Bar Hershey s Dark Chocolate HersheyÕs Milk Chocolate
## 1
## 2 DESPAIR MEH JOY JOY
## 3
## 4 MEH DESPAIR MEH MEH
## 5 DESPAIR DESPAIR JOY MEH
## 6 MEH JOY JOY MEH
## Hershey s Kisses Hugs actual physical hugs Jolly Rancher bad flavor
## 1
## 2 MEH DESPAIR DESPAIR
## 3
## 4 MEH MEH MEH
## 5 MEH DESPAIR MEH
## 6 MEH MEH MEH
## Jolly Ranchers good flavor JoyJoy Mit Iodine Junior Mints Senior Mints
## 1
## 2 MEH DESPAIR DESPAIR DESPAIR
## 3
## 4 MEH DESPAIR DESPAIR DESPAIR
## 5 JOY MEH JOY DESPAIR
## 6 MEH JOY
## Kale smoothie Kinder Happy Hippo Kit Kat LaffyTaffy LemonHeads
## 1
## 2 DESPAIR DESPAIR JOY DESPAIR MEH
## 3
## 4 DESPAIR MEH MEH JOY JOY
## 5 DESPAIR JOY JOY JOY JOY
## 6 DESPAIR JOY MEH MEH
## Licorice not black Licorice yes black Lindt Truffle Lollipops Mars
## 1
## 2 MEH JOY MEH DESPAIR DESPAIR
## 3
## 4 MEH JOY JOY DESPAIR JOY
## 5 MEH DESPAIR JOY MEH MEH
## 6 MEH MEH JOY MEH JOY
## Maynards Mike and Ike Milk Duds Milky Way Regular M Ms Peanut M MÕs
## 1
## 2 DESPAIR MEH MEH JOY JOY MEH
## 3
## 4 MEH MEH DESPAIR JOY MEH JOY
## 5 DESPAIR MEH MEH JOY JOY JOY
## 6 JOY JOY JOY JOY JOY
## Blue M M s Red M M s Green Party M M s Independent M M s
## 1
## 2 JOY JOY JOY JOY
## 3
## 4 MEH MEH MEH MEH
## 5 JOY JOY JOY JOY
## 6 JOY JOY
## Abstained from M M ing Minibags of chips Mint Kisses Mint Juleps Mr Goodbar
## 1
## 2 DESPAIR DESPAIR MEH DESPAIR DESPAIR
## 3
## 4 MEH DESPAIR DESPAIR MEH MEH
## 5 MEH DESPAIR JOY JOY MEH
## 6 JOY JOY JOY JOY
## Necco Wafers Nerds Nestle Crunch Now n Laters Peeps Pencils Pixy Stix
## 1
## 2 DESPAIR DESPAIR JOY DESPAIR DESPAIR DESPAIR DESPAIR
## 3
## 4 DESPAIR MEH MEH MEH DESPAIR DESPAIR DESPAIR
## 5 DESPAIR JOY JOY JOY MEH DESPAIR JOY
## 6 MEH JOY MEH MEH DESPAIR DESPAIR MEH
## Real Housewives of Orange County Season 9 Blue Ray ReeseÕs Peanut Butter Cups
## 1
## 2 DESPAIR JOY
## 3
## 4 DESPAIR JOY
## 5 DESPAIR JOY
## 6 DESPAIR JOY
## Reese s Pieces Reggie Jackson Bar Rolos
## 1
## 2 JOY DESPAIR JOY
## 3
## 4 MEH MEH MEH
## 5 JOY MEH JOY
## 6 JOY DESPAIR JOY
## Sandwich sized bags filled with BooBerry Crunch Skittles Smarties American
## 1
## 2 DESPAIR DESPAIR DESPAIR
## 3
## 4 DESPAIR DESPAIR DESPAIR
## 5 JOY JOY
## 6 DESPAIR JOY JOY
## Smarties Commonwealth Snickers Sourpatch Kids i e abominations of nature
## 1
## 2 DESPAIR MEH DESPAIR
## 3
## 4 MEH JOY MEH
## 5 DESPAIR MEH
## 6 JOY JOY DESPAIR
## Spotted Dick Starburst Sweet Tarts Swedish Fish
## 1
## 2 DESPAIR MEH DESPAIR MEH
## 3
## 4 DESPAIR MEH MEH JOY
## 5 DESPAIR JOY JOY MEH
## 6 DESPAIR MEH MEH MEH
## Sweetums a friend to diabetes Take 5 Tic Tacs
## 1
## 2 DESPAIR DESPAIR DESPAIR
## 3
## 4 MEH JOY MEH
## 5 DESPAIR MEH DESPAIR
## 6 MEH
## Those odd marshmallow circus peanut things Three Musketeers
## 1
## 2 DESPAIR JOY
## 3
## 4 DESPAIR DESPAIR
## 5 DESPAIR JOY
## 6 DESPAIR JOY
## Tolberone something or other Trail Mix Twix
## 1
## 2 JOY DESPAIR JOY
## 3
## 4 JOY MEH JOY
## 5 JOY DESPAIR JOY
## 6 JOY MEH JOY
## Vials of pure high fructose corn syrup for main lining into your vein
## 1
## 2 DESPAIR
## 3
## 4 DESPAIR
## 5 MEH
## 6 DESPAIR
## Vicodin Whatchamacallit Bars White Bread Whole Wheat anything
## 1
## 2 DESPAIR DESPAIR DESPAIR DESPAIR
## 3
## 4 JOY JOY DESPAIR DESPAIR
## 5 JOY JOY DESPAIR DESPAIR
## 6 DESPAIR JOY DESPAIR DESPAIR
## York Peppermint Patties
## 1
## 2 DESPAIR
## 3
## 4 DESPAIR
## 5 JOY
## 6 JOY
## JOY OTHER
## 1
## 2 Mounds
## 3
## 4 Reese's crispy crunchy bars, 5th avenue bars, ferrero rocher, dries fruit other than raisins
## 5
## 6
## DESPAIR OTHER OTHER COMMENTS
## 1
## 2 Bottom line is Twix is really the only candy worth eating.
## 3
## 4 Raisins can go to hell
## 5
## 6
## Q10 DRESS X Q11 DAY Q12 MEDIA Daily Dish Q12 MEDIA Science
## 1 NA NA
## 2 White and gold Sunday NA 1
## 3 NA NA
## 4 White and gold Sunday NA 1
## 5 White and gold Friday NA 1
## 6 NA 1
## Q12 MEDIA ESPN Q12 MEDIA Yahoo Click Coordinates x y
## 1 NA NA
## 2 NA NA (84, 25)
## 3 NA NA
## 4 NA NA (75, 23)
## 5 NA NA (70, 10)
## 6 NA NA (75, 23)
# pivoting candy data
candy_clean = candy_df %>%
pivot_longer(columns[7:(length-11)],names_to = "Candy", values_to = "Mood")
# Separate out repeated information
candy_id = unique(candy_clean[c('Internal ID','GOING OUT','GENDER','AGE','COUNTRY')])
# Drop columns not needed for analysis
candy_clean = candy_clean[c('Internal ID','Candy','Mood')]
candy_clean = candy_clean[candy_clean["Mood"] != "",]
head(candy_clean)
## # A tibble: 6 x 3
## `Internal ID` Candy Mood
## <int> <chr> <chr>
## 1 90272821 100 Grand Bar MEH
## 2 90272821 Anonymous brown globs that come in black and orange wrap~ DESPA~
## 3 90272821 Any full sized candy bar JOY
## 4 90272821 Black Jacks MEH
## 5 90272821 Bonkers the candy DESPA~
## 6 90272821 Bonkers the board game DESPA~
candy_clean%>%
group_by(Mood)%>%
summarise(Percent = n()/dim(candy_clean)[1]) %>%
ggplot(mapping = aes(x = Mood, y = Percent)) + geom_bar(stat = 'identity') + labs(title = "")
joy = candy_clean[candy_clean['Mood'] == 'JOY',]
joy %>%
group_by(Candy)%>%
summarize(Count = n())%>%
arrange(desc(Count))%>%
head(10) %>%
ggplot(mapping = aes(x = reorder(Candy,Count),y = Count))+geom_bar(stat = 'identity') +coord_flip() + labs(x = 'Candy', title = "Which Candy Brings People Joy?")