This is the summary of my data set which is about the price of orange juice
theUrl <- "https://raw.githubusercontent.com/Kingtilon1/Bridge_Workshop/main/FrozenJuice.csv"
Juice <- read.table(file= theUrl, header=TRUE, sep=",")
summary(Juice)
## X price ppi fdd
## Min. : 1.0 Min. : 26.30 Min. : 27.20 Min. : 0.0000
## 1st Qu.:153.8 1st Qu.: 41.30 1st Qu.: 33.50 1st Qu.: 0.0000
## Median :306.5 Median : 59.20 Median : 58.25 Median : 0.0000
## Mean :306.5 Mean : 72.95 Mean : 71.37 Mean : 0.6144
## 3rd Qu.:459.2 3rd Qu.:106.42 3rd Qu.:106.47 3rd Qu.: 0.0000
## Max. :612.0 Max. :162.80 Max. :140.10 Max. :37.0000
This is the mean and median of the “price” and the “ppi” attribute.
price_mean <- mean(Juice$price, na.rm = TRUE)
price_median <- median(Juice$price, na.rm =TRUE)
price_sum <- data.frame(
Price=c("Mean", "Median"),
Value = c(price_mean, price_median)
)
print(price_sum)
## Price Value
## 1 Mean 72.94918
## 2 Median 59.20000
ppi_mean <- mean(Juice$ppi, na.rm = TRUE)
ppi_median <- median(Juice$ppi, na.rm = TRUE)
ppi_sum <-data.frame(
Price=c("Mean", "Median"),
Value = c(ppi_mean, ppi_median)
)
print(ppi_sum)
## Price Value
## 1 Mean 71.37387
## 2 Median 58.24924
I created the subset of the rows so that only rows with a price over 35 dollars appear,changed the names of the columns to reflect the new change, as well as the fact that I’ve only displayed the last three columns instead of the unnamed number column
sub_values <- Juice %>%
filter(price > 35) %>%
select("New Price" = price, "New ppi" = ppi, "New fdd" = fdd)
summary(sub_values)
## New Price New ppi New fdd
## Min. : 35.10 Min. : 27.20 Min. : 0.0000
## 1st Qu.: 45.00 1st Qu.: 34.20 1st Qu.: 0.0000
## Median : 75.10 Median : 66.50 Median : 0.0000
## Mean : 77.29 Mean : 75.59 Mean : 0.6655
## 3rd Qu.:107.40 3rd Qu.:113.60 3rd Qu.: 0.0000
## Max. :162.80 Max. :140.10 Max. :37.0000
print(paste("ppi mean: ", ppi_mean))
## [1] "ppi mean: 71.3738679514869"
print(paste("ppi median: ", ppi_median))
## [1] "ppi median: 58.249235765"
sub_mean <- mean(sub_values$'New ppi', na.rm = TRUE)
sub_median <- median(sub_values$'New ppi', na.rm = TRUE)
print(paste("new ppi mean: ", sub_mean))
## [1] "new ppi mean: 75.5911955701808"
print(paste("new ppi median: ", sub_median))
## [1] "new ppi median: 66.5"
We can see by this new data’s ppi(Produce Price Index) mean increased by 5.9% and the ppi’s median increased by approximately 14 percent
print(paste("price mean: ", price_mean))
## [1] "price mean: 72.9491830065359"
print(paste("price median: ", price_median))
## [1] "price median: 59.2"
subs_mean <- mean(sub_values$'New Price', na.rm = TRUE)
subs_median <- median(sub_values$'New Price', na.rm = TRUE)
print(paste("new price mean: ", subs_mean))
## [1] "new price mean: 77.2873417721519"
print(paste("new price median: ", subs_median))
## [1] "new price median: 75.1"
We can see by this new data’s price mean increased by 5.97% and the price median increased by approximately 26.75 percent
new_sub <- sub_values %>%
mutate(`New fdd` = gsub(0, "Zero!", `New fdd`),
`New fdd` = gsub(1, "One!", `New fdd`),
`New fdd` = gsub(2, "TWOOOOO!", `New fdd`))
news_sub <- head(new_sub, 15)
news_sub