library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readxl)
#1. 변수명바꾸기
library(dplyr)
mpg<-as.data.frame(mpg)
head(mpg)
## manufacturer model displ year cyl trans drv cty hwy fl class
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
## 4 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compact
mpg<-rename(mpg, "city"="cty", "highway"="hwy")
head(mpg)
## manufacturer model displ year cyl trans drv city highway fl class
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
## 4 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compact
백분율: (아시아 인구수 / 전체인구수 ) * 100
library(dplyr)
library(ggplot2)
midwest <- as.data.frame(ggplot2 :: midwest)
midwest <- rename(midwest, "total"="poptotal", "asian"="popasian")
midwest$asian_population <- (midwest$asian/midwest$total)*2
hist(midwest$asian_population)
mean(midwest$asian_population)
## [1] 0.009744924
midwest$asain_mean <- ifelse(midwest$asian_population > 0.009744924, "large", "small")
table(midwest$asain_mean)
##
## large small
## 119 318
qplot(midwest$asain_mean)
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
각 utterance의 평균 단어 개수: words/utterances
library(readxl)
getwd()
## [1] "C:/Users/admin/Desktop/R"
setwd("C:/Users/admin/Desktop/R")
read_excel("mlu (1).xls",sheet=2)
## # A tibble: 35 × 9
## File age utterances_mlu words_mlu DurationTime DurationSec Types_freq
## <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 13_A0P04M… A0 566 1290 "00:17:35" 1055 580
## 2 21_A0P05M… A0 565 1602 "00:20:44" 1244 737
## 3 27_A0P06M… A0 470 813 "00:12:07" 727 378
## 4 28_A0P07M… A0 371 976 "00:11:53" 713 419
## 5 29_A0P08M… A0 802 2239 "00:24:45" 1485 814
## 6 2_A0P01M.… A0 563 1243 "00:12:06\"" NA 425
## 7 30_A0P09F… A0 574 1705 "00:21:56" 1316 828
## 8 31_A0P10F… A0 539 1110 "00:10:54" 654 426
## 9 35_A0P11M… A0 705 1847 "00:20:46" 1246 622
## 10 36_A0P12M… A0 752 2120 "00:29:22" 1762 1014
## # ℹ 25 more rows
## # ℹ 2 more variables: Token_freq <dbl>, mlu <dbl>
mlu_data <- read_excel("mlu (1).xls",sheet=2)
unique(mlu_data$age)
## [1] "A0" "A1" "A2"
mlu_data <- rename(mlu_data,"utterances"="utterances_mlu", "words"="words_mlu")
mlu_data$mlu <- mlu_data$utterances/mlu_data$words
summary(mlu_data$mlu)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2877 0.3429 0.3642 0.3789 0.4087 0.5781
mlu_data$grade <- ifelse(mlu_data$mlu >= 0.4087, "A", ifelse(mlu_data$mlu >= 0.3642, "B", ifelse(mlu_data$mlu >= 0.3429, "C", "D")))
qplot(mlu_data$age,mlu_data$mlu)
mlu_plot<-qplot(mlu_data$age,mlu_data$mlu)
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.