library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
- import data from github
mydata <- read.csv("https://raw.githubusercontent.com/arinolan/Nolan_Project-2/main/Untidy_Dog_Data.csv")
mydata
## Breed Weight Color
## 1 Lab 90lbs yellow
## 2 Yorki 10 pounds black and brown
## 3 Aussie 60lbs blue
## 4 Corgi 35 beige
## 5 Husky 40kgs white
- clean data
kgs_calc <- 40 * 2.2
df_mydata <- mydata
df_mydata$Weight[df_mydata$Weight == '90lbs'] <- 90
df_mydata$Weight[df_mydata$Weight == '10 pounds'] <- 10
df_mydata$Weight[df_mydata$Weight == '60lbs'] <- 60
df_mydata$Weight[df_mydata$Weight == '35'] <- 35
df_mydata$Weight[df_mydata$Weight == '40kgs'] <- kgs_calc
df_mydata
## Breed Weight Color
## 1 Lab 90 yellow
## 2 Yorki 10 black and brown
## 3 Aussie 60 blue
## 4 Corgi 35 beige
## 5 Husky 88 white
- analysis
- which dog breeds are less than 50lbs?
- which dog breeds are heavier than 50lbs?
df_mydata[order(as.numeric(as.character(df_mydata$Weight))),]
## Breed Weight Color
## 2 Yorki 10 black and brown
## 4 Corgi 35 beige
## 3 Aussie 60 blue
## 5 Husky 88 white
## 1 Lab 90 yellow
small <- df_mydata %>%
filter(Weight < 50)
small$Breed
## [1] "Yorki" "Corgi"
large <- df_mydata %>%
filter(Weight > 50)
large$Breed
## [1] "Lab" "Aussie" "Husky"