library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
Q1
mpg <- as.data.frame(ggplot2::mpg)
mpgnew <- mpg
Q2
rename(mpgnew, city = cty) -> mpgnew
rename(mpgnew, highway = hwy) -> mpgnew
Q3
head(mpgnew)
## manufacturer model displ year cyl trans drv city highway fl class
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
## 4 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compact
Q1
midwest <- as.data.frame(ggplot2::midwest)
head(midwest)
tail(midwest)
View(midwest)
dim(midwest)
str(midwest)
summary(midwest)
Q2
midwest <- rename(midwest, total = poptotal, asian = popasian)
Q3
midwest$percentage <- midwest$asian/midwest$total * 100
hist(midwest$percentage)
Q4
mean(midwest$percentage)
## [1] 0.4872462
midwest$abc <- ifelse(midwest$percentage > 0.4872462, "large", "small")
Q5
table(midwest$abc)
##
## large small
## 119 318
qplot(midwest$abc)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(readxl)
Q1
setwd("C:/Users/최창정/Documents")
mlu_data <- readxl::read_excel("mlu.xlsx", sheet = 2)
mlu_data -> mlu_data2
Q2
unique_age <- unique(mlu_data2$age)
length(unique_age)
## [1] 3
Q3
mlu_data2 <- rename(mlu_data2, utterances = utterances_mlu)
mlu_data2 <- rename(mlu_data2, words = words_mlu)
mlu_data2
## # A tibble: 35 × 8
## File age utterances words DurationTime DurationSec Types_freq Token_freq
## <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 13_A0P… A0 566 1290 "00:17:35" 1055 580 1346
## 2 21_A0P… A0 565 1602 "00:20:44" 1244 737 1606
## 3 27_A0P… A0 470 813 "00:12:07" 727 378 832
## 4 28_A0P… A0 371 976 "00:11:53" 713 419 979
## 5 29_A0P… A0 802 2239 "00:24:45" 1485 814 2253
## 6 2_A0P0… A0 563 1243 "00:12:06\"" NA 425 1263
## 7 30_A0P… A0 574 1705 "00:21:56" 1316 828 1712
## 8 31_A0P… A0 539 1110 "00:10:54" 654 426 1124
## 9 35_A0P… A0 705 1847 "00:20:46" 1246 622 1860
## 10 36_A0P… A0 752 2120 "00:29:22" 1762 1014 2599
## # ℹ 25 more rows
Q4
mlu_data2$mlu <- mlu_data2$words/mlu_data2$utterances
Q5
summary(mlu_data2$mlu)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.730 2.447 2.745 2.696 2.916 3.476
Q6
mlu_data2$grade <- ifelse(mlu_data2$mlu >=3.476, "A", ifelse(mlu_data2$mlu >=2.916, "B", ifelse(mlu_data2$mlu >= 2.745, "C", "D")))
head(mlu_data2$grade)
## [1] "D" "C" "D" "D" "C" "D"
Q8
qplot(mlu_data2$age,mlu_data2$mlu)