df <-getURL("https://raw.githubusercontent.com/hrensimin05/Project2-/main/Dataset.csv")
salary <- read.csv(text=df)
head(salary)
## religion X.10k X10k.20k X20k.30k X30k.40k X40k.50k X50k.75k X75k.100k
## 1 Agnostic 27 34 60 81 76 137 122
## 2 Atheist 12 27 37 52 35 70 73
## 3 Buddhist 27 21 30 34 33 58 62
## 4 Catholic 418 617 732 670 638 1116 949
## 5 Refused 15 14 15 11 10 35 21
## X100k.150k X.150k refused
## 1 109 84 96
## 2 59 74 76
## 3 39 53 54
## 4 792 633 1489
## 5 17 18 116
#column headers are values, so we need to turn the variables in the columns into rows
df<-melt(salary, id.vars = c("religion"))
head(df)
## religion variable value
## 1 Agnostic X.10k 27
## 2 Atheist X.10k 12
## 3 Buddhist X.10k 27
## 4 Catholic X.10k 418
## 5 Refused X.10k 15
## 6 Agnostic X10k.20k 34
#renaming columns
names(df)[names(df) == "variable"] <- "income"
names(df)[names(df) == "value"] <- "frequency"
#alphabetical order by religion
df <- df[order(df$religion),]
head(df)
## religion income frequency
## 1 Agnostic X.10k 27
## 6 Agnostic X10k.20k 34
## 11 Agnostic X20k.30k 60
## 16 Agnostic X30k.40k 81
## 21 Agnostic X40k.50k 76
## 26 Agnostic X50k.75k 137
# dataset is easy to read now
df2 <-getURL("https://raw.githubusercontent.com/hrensimin05/Project2-/main/500_Person_Gender_Height_Weight_Index.csv")
health <- read.csv(text=df2)
head(health)
## X Gender Height Weight Index
## 1 1 Male 174 96 4
## 2 2 Male 189 87 2
## 3 3 Female 185 110 4
## 4 4 Female 195 104 3
## 5 5 Male 149 61 3
## 6 6 Male 189 104 3
df7<-melt(health, id.vars = c("Gender"), measure.vars = c("Height","Weight"))
head(df7)
## Gender variable value
## 1 Male Height 174
## 2 Male Height 189
## 3 Female Height 185
## 4 Female Height 195
## 5 Male Height 149
## 6 Male Height 189
df0<-df7[
with(df7, order(df7$Gender, df7$value)),
]
head(df0)
## Gender variable value
## 600 Female Weight 50
## 651 Female Weight 50
## 745 Female Weight 50
## 890 Female Weight 50
## 993 Female Weight 50
## 571 Female Weight 51
plot<- ggplot(df7, aes(x = Gender, y = value)) +
geom_point(alpha = .3, color= "blue")
plot
### DATA SET 3
data <-getURL("https://raw.githubusercontent.com/hrensimin05/Project2-/main/earth.csv")
earth <- read.csv(text=data)
earth
## month day ozone solar.r wind temp
## 1 5 1 41 190 7.4 67
## 2 5 2 36 118 8.0 72
## 3 5 3 12 149 12.6 74
## 4 5 4 18 313 11.5 62
## 5 5 5 NA NA 14.3 56
## 6 5 6 28 NA 14.9 66
earth_melt <- melt(earth, id.vars = c("month", "day"))
head(earth_melt)
## month day variable value
## 1 5 1 ozone 41
## 2 5 2 ozone 36
## 3 5 3 ozone 12
## 4 5 4 ozone 18
## 5 5 5 ozone NA
## 6 5 6 ozone 28
names(earth_melt)[names(earth_melt) == "variable"] <- "climate_variable"
names(earth_melt)[names(earth_melt) == "value"] <- "climate_value"
head(earth_melt)
## month day climate_variable climate_value
## 1 5 1 ozone 41
## 2 5 2 ozone 36
## 3 5 3 ozone 12
## 4 5 4 ozone 18
## 5 5 5 ozone NA
## 6 5 6 ozone 28
plot_air<- ggplot(earth_melt, aes(x = day, y = climate_value)) +
geom_point()
plot_air
## Warning: Removed 3 rows containing missing values (geom_point).