Project 2

DATA SET ONE

df <-getURL("https://raw.githubusercontent.com/hrensimin05/Project2-/main/Dataset.csv")

salary <- read.csv(text=df)
head(salary)

##   religion X.10k X10k.20k X20k.30k X30k.40k X40k.50k X50k.75k X75k.100k
## 1 Agnostic    27       34       60       81       76      137       122
## 2  Atheist    12       27       37       52       35       70        73
## 3 Buddhist    27       21       30       34       33       58        62
## 4 Catholic   418      617      732      670      638     1116       949
## 5  Refused    15       14       15       11       10       35        21
##   X100k.150k X.150k refused
## 1        109     84      96
## 2         59     74      76
## 3         39     53      54
## 4        792    633    1489
## 5         17     18     116

 #column headers are values, so we need to turn the variables in the columns into rows

df<-melt(salary, id.vars = c("religion"))

head(df)

##   religion variable value
## 1 Agnostic    X.10k    27
## 2  Atheist    X.10k    12
## 3 Buddhist    X.10k    27
## 4 Catholic    X.10k   418
## 5  Refused    X.10k    15
## 6 Agnostic X10k.20k    34

#renaming columns
names(df)[names(df) == "variable"] <- "income"
names(df)[names(df) == "value"] <- "frequency"

#alphabetical order by religion
df <- df[order(df$religion),]
head(df)

##    religion   income frequency
## 1  Agnostic    X.10k        27
## 6  Agnostic X10k.20k        34
## 11 Agnostic X20k.30k        60
## 16 Agnostic X30k.40k        81
## 21 Agnostic X40k.50k        76
## 26 Agnostic X50k.75k       137

# dataset is easy to read now

DATA SET TWO

df2 <-getURL("https://raw.githubusercontent.com/hrensimin05/Project2-/main/500_Person_Gender_Height_Weight_Index.csv")

health <- read.csv(text=df2)
head(health)

##   X Gender Height Weight Index
## 1 1   Male    174     96     4
## 2 2   Male    189     87     2
## 3 3 Female    185    110     4
## 4 4 Female    195    104     3
## 5 5   Male    149     61     3
## 6 6   Male    189    104     3

df7<-melt(health, id.vars = c("Gender"), measure.vars = c("Height","Weight"))
head(df7)

##   Gender variable value
## 1   Male   Height   174
## 2   Male   Height   189
## 3 Female   Height   185
## 4 Female   Height   195
## 5   Male   Height   149
## 6   Male   Height   189

df0<-df7[
  with(df7, order(df7$Gender, df7$value)),
]
head(df0)

##     Gender variable value
## 600 Female   Weight    50
## 651 Female   Weight    50
## 745 Female   Weight    50
## 890 Female   Weight    50
## 993 Female   Weight    50
## 571 Female   Weight    51

plot<- ggplot(df7, aes(x = Gender, y = value)) +
        geom_point(alpha = .3, color= "blue")
        
plot

### DATA SET 3

data <-getURL("https://raw.githubusercontent.com/hrensimin05/Project2-/main/earth.csv")

earth <- read.csv(text=data)
earth

##   month day ozone solar.r wind temp
## 1     5   1    41     190  7.4   67
## 2     5   2    36     118  8.0   72
## 3     5   3    12     149 12.6   74
## 4     5   4    18     313 11.5   62
## 5     5   5    NA      NA 14.3   56
## 6     5   6    28      NA 14.9   66

earth_melt <- melt(earth, id.vars = c("month", "day"))
head(earth_melt)

##   month day variable value
## 1     5   1    ozone    41
## 2     5   2    ozone    36
## 3     5   3    ozone    12
## 4     5   4    ozone    18
## 5     5   5    ozone    NA
## 6     5   6    ozone    28

names(earth_melt)[names(earth_melt) == "variable"] <- "climate_variable"
names(earth_melt)[names(earth_melt) == "value"] <- "climate_value"
head(earth_melt)

##   month day climate_variable climate_value
## 1     5   1            ozone            41
## 2     5   2            ozone            36
## 3     5   3            ozone            12
## 4     5   4            ozone            18
## 5     5   5            ozone            NA
## 6     5   6            ozone            28

plot_air<- ggplot(earth_melt, aes(x = day, y = climate_value)) +
        geom_point()
plot_air

## Warning: Removed 3 rows containing missing values (geom_point).

Project 2

Dominika Markowska-Desvallons

10/3/2020

DATA SET ONE

DATA SET TWO