Pet Adoption
#Packages used
library(tidytext)
library(DT)
library(tm)
## Loading required package: NLP
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::annotate() masks NLP::annotate()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(stringr)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(leaflet)
library(ggplot2)
library(dplyr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(forcats)
myfile <-read.csv("https://raw.githubusercontent.com/mgino11/listings_R/main/dog_descriptions2.csv")
summary(myfile)
## ï..id org_id url species
## Min. :45901985 Length:221 Length:221 Length:221
## 1st Qu.:45945344 Class :character Class :character Class :character
## Median :45987766 Mode :character Mode :character Mode :character
## Mean :45984757
## 3rd Qu.:46029444
## Max. :46042150
## breed_primary breed_secondary breed_mixed breed_unknown
## Length:221 Length:221 Mode :logical Mode :logical
## Class :character Class :character FALSE:37 FALSE:221
## Mode :character Mode :character TRUE :184
##
##
##
## color_primary color_secondary color_tertiary age
## Length:221 Length:221 Length:221 Length:221
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## sex size coat fixed
## Length:221 Length:221 Length:221 Mode :logical
## Class :character Class :character Class :character FALSE:41
## Mode :character Mode :character Mode :character TRUE :180
##
##
##
## house_trained declawed special_needs shots_current env_children
## Mode :logical Mode:logical Mode :logical Mode :logical Mode :logical
## FALSE:171 NA's:221 FALSE:220 FALSE:99 FALSE:7
## TRUE :50 TRUE :1 TRUE :122 TRUE :39
## NA's :175
##
##
## env_dogs env_cats name tags
## Mode :logical Mode :logical Length:221 Length:221
## FALSE:3 FALSE:3 Class :character Class :character
## TRUE :106 TRUE :16 Mode :character Mode :character
## NA's :112 NA's :202
##
##
## photo status posted contact_city
## Mode:logical Length:221 Length:221 Length:221
## NA's:221 Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## contact_state contact_zip contact_country stateQ
## Length:221 Min. :86401 Length:221 Min. :89009
## Class :character 1st Qu.:89101 Class :character 1st Qu.:89009
## Mode :character Median :89101 Mode :character Median :89009
## Mean :88809 Mean :89009
## 3rd Qu.:89103 3rd Qu.:89009
## Max. :89147 Max. :89009
## accessed type description
## Length:221 Length:221 Length:221
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
myfile %>%
count(breed_primary) %>%
arrange(desc(n))
## breed_primary n
## 1 Chihuahua 53
## 2 Pit Bull Terrier 42
## 3 German Shepherd Dog 12
## 4 Terrier 12
## 5 Dachshund 8
## 6 Cattle Dog 7
## 7 Shepherd 7
## 8 Labrador Retriever 6
## 9 Boxer 5
## 10 Maltese 5
## 11 Jack Russell Terrier 4
## 12 American Bulldog 3
## 13 Cocker Spaniel 3
## 14 Collie 3
## 15 Doberman Pinscher 3
## 16 Mastiff 3
## 17 Poodle 3
## 18 Schnauzer 3
## 19 Shar-Pei 3
## 20 Akita 2
## 21 Catahoula Leopard Dog 2
## 22 Corgi 2
## 23 Hound 2
## 24 Italian Greyhound 2
## 25 Miniature Pinscher 2
## 26 Redbone Coonhound 2
## 27 Siberian Husky 2
## 28 American Staffordshire Terrier 1
## 29 Australian Cattle Dog / Blue Heeler 1
## 30 Beagle 1
## 31 Belgian Shepherd / Malinois 1
## 32 Bichon Frise 1
## 33 Black Labrador Retriever 1
## 34 Border Collie 1
## 35 Cane Corso 1
## 36 Chow Chow 1
## 37 Dogo Argentino 1
## 38 Giant Schnauzer 1
## 39 Husky 1
## 40 Jindo 1
## 41 Pomeranian 1
## 42 Pug 1
## 43 Retriever 1
## 44 Saint Bernard 1
## 45 Staffordshire Bull Terrier 1
## 46 Wirehaired Terrier 1
## 47 Yorkshire Terrier 1
myfile %>%
count(sex) %>%
arrange(desc(n))
## sex n
## 1 Male 122
## 2 Female 99
myfile %>%
count(age) %>%
arrange(desc(n))
## age n
## 1 Adult 87
## 2 Young 71
## 3 Baby 43
## 4 Senior 20
myfile %>%
count(size) %>%
arrange(desc(n))
## size n
## 1 Small 94
## 2 Large 72
## 3 Medium 54
## 4 Extra Large 1
Subset1
We subset data in order to understand it better. Subset_1 allows us to see the a new data frame including variables like
subset_1<- subset(myfile, select = age:size)
head(subset_1)
## age sex size
## 1 Senior Male Medium
## 2 Adult Male Large
## 3 Adult Male Large
## 4 Baby Female Large
## 5 Young Male Small
## 6 Baby Male Medium
Subset2
subset_2<- subset(myfile, select = fixed:declawed)
head(subset_2)
## fixed house_trained declawed
## 1 TRUE TRUE NA
## 2 TRUE TRUE NA
## 3 TRUE FALSE NA
## 4 FALSE FALSE NA
## 5 TRUE FALSE NA
## 6 TRUE FALSE NA
Sub1_senior
If we want to understand the amount of senior dogs for adoption we can filter the subset and see there is a significant amount of senior dogs given for adoption.
Sub1_senior <- subset_1 %>%
filter(age == "Senior")
Sub1_senior
## age sex size
## 1 Senior Male Medium
## 2 Senior Male Medium
## 3 Senior Female Small
## 4 Senior Female Small
## 5 Senior Male Medium
## 6 Senior Male Large
## 7 Senior Female Medium
## 8 Senior Male Small
## 9 Senior Male Small
## 10 Senior Female Small
## 11 Senior Female Small
## 12 Senior Female Large
## 13 Senior Male Medium
## 14 Senior Male Large
## 15 Senior Male Small
## 16 Senior Male Small
## 17 Senior Male Small
## 18 Senior Male Small
## 19 Senior Male Small
## 20 Senior Female Small
Subset_1
In order to understand Categorical Data we need to use Levels and factors. The package used for this task is forcats
summary(subset_1)
## age sex size
## Length:221 Length:221 Length:221
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
subset_1 %>%
mutate(age = fct_lump(age, n = 50)) %>%
count(age)
## age n
## 1 Adult 87
## 2 Baby 43
## 3 Senior 20
## 4 Young 71
Subset_2
summary(subset_2)
## fixed house_trained declawed
## Mode :logical Mode :logical Mode:logical
## FALSE:41 FALSE:171 NA's:221
## TRUE :180 TRUE :50
subset_2 %>%
count(fixed, house_trained)
## fixed house_trained n
## 1 FALSE FALSE 39
## 2 FALSE TRUE 2
## 3 TRUE FALSE 132
## 4 TRUE TRUE 48
ggplot(subset_1, aes(x = age)) +
geom_bar() +
coord_flip()
subset_1 %>%
mutate(age = fct_infreq(age)) %>%
ggplot(aes(x = age)) +
geom_bar()
ggplot(subset_1, aes(x = age, fill = sex)) +
geom_density(col = NA, alpha = 0.55)
ggplot(subset_2, aes(x = fixed)) +
geom_bar() +
coord_flip()
ggplot(subset_2, aes(x = house_trained, fill = fixed)) +
geom_density(col = NA, alpha = 0.55)
ggplot(subset_2, aes(x = house_trained, fill = fixed)) +
geom_density(col = NA, alpha = 0.55)