Untitled

getwd()

## [1] "C:/Users/samsung/Desktop"

setwd('c:/data')

library(dplyr)

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(caret)

## 필요한 패키지를 로딩중입니다: ggplot2

## 필요한 패키지를 로딩중입니다: lattice

data(mtcars)
glimpse(mtcars)

## Rows: 32
## Columns: 11
## $ mpg  <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl  <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16…
## $ hp   <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180…
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,…
## $ wt   <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18…
## $ vs   <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,…
## $ am   <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,…
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,…
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,…

library(gapminder)

## Warning: 패키지 'gapminder'는 R 버전 4.3.2에서 작성되었습니다

glimpse(gapminder)

## Rows: 1,704
## Columns: 6
## $ country   <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year      <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp   <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop       <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …

mtcars %>% count(am)

##   am  n
## 1  0 19
## 2  1 13

gapminder %>% count(continent)

## # A tibble: 5 × 2
##   continent     n
##   <fct>     <int>
## 1 Africa      624
## 2 Americas    300
## 3 Asia        396
## 4 Europe      360
## 5 Oceania      24

df<-mtcars %>% filter(am==1) %>% select('am','mpg')


glimpse(df)

## Rows: 13
## Columns: 2
## $ am  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
## $ mpg <dbl> 21.0, 21.0, 22.8, 32.4, 30.4, 33.9, 27.3, 26.0, 30.4, 15.8, 19.7, …

df$am

##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 1

shapiro.test(df$mpg)

## 
##  Shapiro-Wilk normality test
## 
## data:  df$mpg
## W = 0.9458, p-value = 0.5363

plot(density(df$mpg))

shapiro.test(gapminder$lifeExp)

## 
##  Shapiro-Wilk normality test
## 
## data:  gapminder$lifeExp
## W = 0.95248, p-value < 2.2e-16

plot(density(gapminder$lifeExp))

t.test(df$mpg,mu=20,alternative="greater")

## 
##  One Sample t-test
## 
## data:  df$mpg
## t = 2.5682, df = 12, p-value = 0.01231
## alternative hypothesis: true mean is greater than 20
## 95 percent confidence interval:
##  21.3441     Inf
## sample estimates:
## mean of x 
##  24.39231

t.test(gapminder$lifeExp,mu=58,alternative="greater")

## 
##  One Sample t-test
## 
## data:  gapminder$lifeExp
## t = 4.7119, df = 1703, p-value = 1.327e-06
## alternative hypothesis: true mean is greater than 58
## 95 percent confidence interval:
##  58.95946      Inf
## sample estimates:
## mean of x 
##  59.47444

t.test(gapminder$lifeExp,mu=59,alternative="greater")

## 
##  One Sample t-test
## 
## data:  gapminder$lifeExp
## t = 1.5162, df = 1703, p-value = 0.06483
## alternative hypothesis: true mean is greater than 59
## 95 percent confidence interval:
##  58.95946      Inf
## sample estimates:
## mean of x 
##  59.47444

before<-c(59,72,85,69,78,82,55)
after<-c(54,65,84,63,72,83,51)


diff<-before-after
shapiro.test(diff)

## 
##  Shapiro-Wilk normality test
## 
## data:  diff
## W = 0.88458, p-value = 0.2476

t.test(before,after,mu=0,alternative='greater',paired=T)

## 
##  Paired t-test
## 
## data:  before and after
## t = 3.5949, df = 6, p-value = 0.005718
## alternative hypothesis: true mean difference is greater than 0
## 95 percent confidence interval:
##  1.837829      Inf
## sample estimates:
## mean difference 
##               4

tre<-c(10,16,27,15,21,14,16,21,22,23,25,28,27,13,
       15,16,21,22,25,28)
con<-c(23,26,27,23,16,18,31,33,28,36,18,21,26,28,29,
       33,32,16,18,23)


df<-data.frame(group=c(rep(1,20),rep(2,20)),score=c(tre,con))


glimpse(df)

## Rows: 40
## Columns: 2
## $ group <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2…
## $ score <dbl> 10, 16, 27, 15, 21, 14, 16, 21, 22, 23, 25, 28, 27, 13, 15, 16, …

shapiro.test(tre)

## 
##  Shapiro-Wilk normality test
## 
## data:  tre
## W = 0.93475, p-value = 0.1905

var.test(data=df,score~group)

## 
##  F test to compare two variances
## 
## data:  score by group
## F = 0.81402, num df = 19, denom df = 19, p-value = 0.6583
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3221986 2.0565791
## sample estimates:
## ratio of variances 
##           0.814019

t.test(data=df,score~group,var.equal=TRUE,mu=0,
       alternative="less")

## 
##  Two Sample t-test
## 
## data:  score by group
## t = -2.7164, df = 38, p-value = 0.00494
## alternative hypothesis: true difference in means between group 1 and group 2 is less than 0
## 95 percent confidence interval:
##       -Inf -1.896672
## sample estimates:
## mean in group 1 mean in group 2 
##           20.25           25.25

data(iris)
names(iris)

## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"

summary(iris)

##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
##

fit<-aov(Sepal.Length ~ Species, data = iris)

summary(fit)

##              Df Sum Sq Mean Sq F value Pr(>F)    
## Species       2  63.21  31.606   119.3 <2e-16 ***
## Residuals   147  38.96   0.265                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

options(scipen=999)


data(iris)

summary(iris)

##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
##

fit<-aov(Sepal.Length ~ Species, data=iris)

summary(iris)

##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
##

options(scipen=999)

Untitled

Kim, Dong-Hyun

2023-11-11