fruits<-read.csv("https://raw.githubusercontent.com/Sangeetha-007/R-Practice/master/607/Projects/Project%202/Fruit%20Prices.csv")
fruits
## item price calories
## 1 "banana" "$1" 105
## 2 "apple" "0.75" 95
## 3 "apple" "0.75" 95
## 4 "peach" "$3" 55
## 5 "peach" "$4" 55
## 6 "clementine" "2.5" 35
duplicated(fruits)
## [1] FALSE FALSE TRUE FALSE FALSE FALSE
fruits<- distinct(fruits)
fruits
## item price calories
## 1 "banana" "$1" 105
## 2 "apple" "0.75" 95
## 3 "peach" "$3" 55
## 4 "peach" "$4" 55
## 5 "clementine" "2.5" 35
fruits$item<-gsub('"',"",as.character(fruits$item))
fruits
## item price calories
## 1 banana "$1" 105
## 2 apple "0.75" 95
## 3 peach "$3" 55
## 4 peach "$4" 55
## 5 clementine "2.5" 35
class(fruits)
## [1] "data.frame"
fruits$price<- gsub('"',"",as.character(fruits$price))
#This code did not work.
#fruits$price<- gsub('$',"",as.character(fruits$price))
#fruits
fruits %>%
mutate(across(starts_with("price"), ~gsub("\\$", "", .) %>% as.numeric))
## item price calories
## 1 banana 1.00 105
## 2 apple 0.75 95
## 3 peach 3.00 55
## 4 peach 4.00 55
## 5 clementine 2.50 35
sapply (fruits, class)
## item price calories
## "character" "character" "integer"
fruits
## item price calories
## 1 banana $1 105
## 2 apple 0.75 95
## 3 peach $3 55
## 4 peach $4 55
## 5 clementine 2.5 35
fruits<- fruits %>%
mutate(price = parse_number(price))
sapply(fruits, class)
## item price calories
## "character" "numeric" "integer"
fruits
## item price calories
## 1 banana 1.00 105
## 2 apple 0.75 95
## 3 peach 3.00 55
## 4 peach 4.00 55
## 5 clementine 2.50 35
summarise(fruits)
## data frame with 0 columns and 1 row
Finally, I created a bar graph to show the calories of each item. The graph shows bananas have the most calories, and clementines have the least.
ggplot(fruits, aes( y=`calories`, x=item)) +
geom_bar(position="dodge", stat="identity")
Source: https://www.datanovia.com/en/lessons/identify-and-remove-duplicate-data-in-r/, https://stackoverflow.com/questions/64741916/how-to-remove-the-dollar-sign-in-r