Untitled

1번 문제

install packages

library(dplyr)

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(readxl)

#1. 변수명바꾸기

library(dplyr)

mpg<-as.data.frame(mpg)
head(mpg)

##   manufacturer model displ year cyl      trans drv cty hwy fl   class
## 1         audi    a4   1.8 1999   4   auto(l5)   f  18  29  p compact
## 2         audi    a4   1.8 1999   4 manual(m5)   f  21  29  p compact
## 3         audi    a4   2.0 2008   4 manual(m6)   f  20  31  p compact
## 4         audi    a4   2.0 2008   4   auto(av)   f  21  30  p compact
## 5         audi    a4   2.8 1999   6   auto(l5)   f  16  26  p compact
## 6         audi    a4   2.8 1999   6 manual(m5)   f  18  26  p compact

mpg<-rename(mpg, "city"="cty", "highway"="hwy")
head(mpg)

##   manufacturer model displ year cyl      trans drv city highway fl   class
## 1         audi    a4   1.8 1999   4   auto(l5)   f   18      29  p compact
## 2         audi    a4   1.8 1999   4 manual(m5)   f   21      29  p compact
## 3         audi    a4   2.0 2008   4 manual(m6)   f   20      31  p compact
## 4         audi    a4   2.0 2008   4   auto(av)   f   21      30  p compact
## 5         audi    a4   2.8 1999   6   auto(l5)   f   16      26  p compact
## 6         audi    a4   2.8 1999   6 manual(m5)   f   18      26  p compact

2번 문제

백분율: (아시아 인구수 / 전체인구수 ) * 100

library(dplyr)
library(ggplot2)

midwest <- as.data.frame(ggplot2 :: midwest)
midwest <- rename(midwest, "total"="poptotal", "asian"="popasian")
midwest$asian_population <- (midwest$asian/midwest$total)*2
hist(midwest$asian_population)

mean(midwest$asian_population)

## [1] 0.009744924

midwest$asain_mean <- ifelse(midwest$asian_population > 0.009744924, "large", "small")
table(midwest$asain_mean)

## 
## large small 
##   119   318

qplot(midwest$asain_mean)

## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

3번 문제

각 utterance의 평균 단어 개수: words/utterances

library(readxl)

getwd()

## [1] "C:/Users/admin/Desktop/R"

setwd("C:/Users/admin/Desktop/R")
read_excel("mlu (1).xls",sheet=2)

## # A tibble: 35 × 9
##    File       age   utterances_mlu words_mlu DurationTime DurationSec Types_freq
##    <chr>      <chr>          <dbl>     <dbl> <chr>              <dbl>      <dbl>
##  1 13_A0P04M… A0               566      1290 "00:17:35"          1055        580
##  2 21_A0P05M… A0               565      1602 "00:20:44"          1244        737
##  3 27_A0P06M… A0               470       813 "00:12:07"           727        378
##  4 28_A0P07M… A0               371       976 "00:11:53"           713        419
##  5 29_A0P08M… A0               802      2239 "00:24:45"          1485        814
##  6 2_A0P01M.… A0               563      1243 "00:12:06\""          NA        425
##  7 30_A0P09F… A0               574      1705 "00:21:56"          1316        828
##  8 31_A0P10F… A0               539      1110 "00:10:54"           654        426
##  9 35_A0P11M… A0               705      1847 "00:20:46"          1246        622
## 10 36_A0P12M… A0               752      2120 "00:29:22"          1762       1014
## # ℹ 25 more rows
## # ℹ 2 more variables: Token_freq <dbl>, mlu <dbl>

mlu_data <-  read_excel("mlu (1).xls",sheet=2)
unique(mlu_data$age)

## [1] "A0" "A1" "A2"

mlu_data <- rename(mlu_data,"utterances"="utterances_mlu", "words"="words_mlu")
mlu_data$mlu <- mlu_data$utterances/mlu_data$words
summary(mlu_data$mlu)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2877  0.3429  0.3642  0.3789  0.4087  0.5781

mlu_data$grade <- ifelse(mlu_data$mlu >= 0.4087, "A", ifelse(mlu_data$mlu >= 0.3642, "B", ifelse(mlu_data$mlu >= 0.3429, "C", "D")))
qplot(mlu_data$age,mlu_data$mlu)

mlu_plot<-qplot(mlu_data$age,mlu_data$mlu)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Untitled

임소율

2023-10-09

1번 문제

install packages

2번 문제

3번 문제