#To import dataset see ‘environment’ and copy and paste the csv # I imported data about Autism, csv.
theURL <- "https://vincentarelbundock.github.io/Rdatasets/csv/HLMdiag/autism.csv"
autism <- read.csv(file = theURL , header = TRUE , sep = ",")
head(autism)
## X childid sicdegp age2 vsae gender race bestest2
## 1 1 1 high 0 6 male white pdd
## 2 2 1 high 1 7 male white pdd
## 3 3 1 high 3 18 male white pdd
## 4 4 1 high 7 25 male white pdd
## 5 5 1 high 11 27 male white pdd
## 6 6 10 low 0 9 male white autism
#1. Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.
```r
summary(autism)
## X childid sicdegp age2
## Min. : 1.0 Min. : 1 Length:604 Min. : 0.000
## 1st Qu.:153.8 1st Qu.: 48 Class :character 1st Qu.: 0.000
## Median :305.5 Median :107 Mode :character Median : 3.000
## Mean :306.0 Mean :105 Mean : 3.785
## 3rd Qu.:458.2 3rd Qu.:158 3rd Qu.: 7.000
## Max. :612.0 Max. :212 Max. :11.000
## vsae gender race bestest2
## Min. : 1.00 Length:604 Length:604 Length:604
## 1st Qu.: 10.00 Class :character Class :character Class :character
## Median : 14.00 Mode :character Mode :character Mode :character
## Mean : 26.28
## 3rd Qu.: 27.00
## Max. :198.00
age2mean <- mean(autism$age2)
print(age2mean)
## [1] 3.784768
age2median <- median(autism$age2)
print(age2median)
## [1] 3
childidmean <- mean(autism$childid)
print(childidmean)
## [1] 105.0215
childidmedian <- median(autism$childid)
print(childidmedian)
## [1] 107
#Mean and Median for childid is more than age2
#2. Create a new data frame with a subset of the columns and rows. Make sure to rename it.
newd <- data.frame(subset(autism, age2>= 3 & childid >= 205))
#3. Create new column names for the new data frame.
colnames(newd) <- c("score", "id", "level", "yo", "metric", "construct", "race", "diagnosis")
#4. Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.
summary(newd)
## score id level yo
## Min. :354.0 Min. :205.0 Length:10 Min. : 3.0
## 1st Qu.:358.2 1st Qu.:207.0 Class :character 1st Qu.: 7.0
## Median :364.5 Median :209.0 Mode :character Median : 7.0
## Mean :365.8 Mean :208.8 Mean : 8.2
## 3rd Qu.:375.0 3rd Qu.:211.5 3rd Qu.:11.0
## Max. :378.0 Max. :212.0 Max. :11.0
## metric construct race diagnosis
## Min. : 12.00 Length:10 Length:10 Length:10
## 1st Qu.: 23.75 Class :character Class :character Class :character
## Median : 40.00 Mode :character Mode :character Mode :character
## Mean : 57.60
## 3rd Qu.: 70.50
## Max. :147.00
yomean <- mean(newd$yo)
print(yomean)
## [1] 8.2
#more than age2 mean
yomedian <- median(newd$yo)
print(yomedian)
## [1] 7
#more than age2 median
idmean <- mean(newd$id)
print(idmean)
## [1] 208.8
idmedian <- median(newd$id)
print(idmedian)
## [1] 209
#more than childid median and mean
#5. For at least 3 values in a column please rename so that every value in that column is renamed.
newd$yo <- as.character(newd$yo)
newd$yo[newd$yo == "11"] <- "no"
#6 . Display enough rows to see examples of all of steps 1-5 above.
head(newd)
## score id level yo metric construct race diagnosis
## 349 354 205 med 7 18 female white pdd
## 350 355 205 med no 66 female white pdd
## 353 358 207 high 7 22 male white autism
## 354 359 207 high no 48 male white autism
## 359 364 209 med 7 12 male white autism
## 360 365 209 med no 32 male white autism
#7 Bonus- attempt- place the original .csv in a github file and have R read from the link. This will be a very useful skill as you progress in your data science education and caree
theURL <- "https://vincentarelbundock.github.io/Rdatasets/csv/HLMdiag/autism.csv"
autism <- read.csv(file = theURL , header = TRUE , sep = ",")
head(autism)
## X childid sicdegp age2 vsae gender race bestest2
## 1 1 1 high 0 6 male white pdd
## 2 2 1 high 1 7 male white pdd
## 3 3 1 high 3 18 male white pdd
## 4 4 1 high 7 25 male white pdd
## 5 5 1 high 11 27 male white pdd
## 6 6 10 low 0 9 male white autism