Ex1Principles

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ### Q1 #### a

# setwd("C:/Users/avivg/OneDrive/Documents/R/Statistical Analysis Course/Ex1")

# df <- read.csv("q1.csv")

# head(df)

AGE <- c(53,43,33,45,46,55,41,55,36,45,55,50,49,47,69,51,48,62,45,37,50,50,50,58,53,57,53,61,47,56,44,46,58,48,38,74,60,32,51,50,40,61,63,56,45,61,70,59,57,69,44,56,50,56,43,48,52,62,48)

SAL = c(145,621,262,208,362,424,339,736,291,58,498,643,390,332,750,368,659,234,396,300,343,536,543,217,298,1103,406,254,862,204,206,250,21,298,350,800,726,370,536,291,808,543,149,350,242,198,213,296,317,482,155,802,200,282,573,388,250,396,572)

df <-data.frame(AGE, SAL) 
age <- df$AGE
head(df)

##   AGE SAL
## 1  53 145
## 2  43 621
## 3  33 262
## 4  45 208
## 5  46 362
## 6  55 424

# print("head age:")
# print(head(age))
# print("mean age:")
print(mean(age))

## [1] 51.54237

# print("median age:")
print(median(age))

## [1] 50

age <- df$AGE

salary <- df$SAL
# print("head age:")
print(head(salary))

## [1] 145 621 262 208 362 424

# print("mean salary:")
print(mean(salary))

## [1] 404.1695

# print("median salary:")
print(median(salary))

## [1] 350

b

#quantile
quant9 <- quantile(salary,0.9)
quant9

##   90% 
## 738.8

plot(df)
abline(h=712,col="red")

names(age) <- salary

age_10 <- age[names(age) > 712.9]
print("Ages of top 10 %:")

## [1] "Ages of top 10 %:"

print(age_10)

## 736 750 862 800 726 808 802 
##  55  69  47  74  60  40  56

c

sd(salary) # Standard deviation

## [1] 220.5335

# print("The count of managers up to ONE-SD from the mean salary")
print(sum(abs(mean(salary) -  salary ) < sd(salary) ))#managers with salary that are one or more standard deviation

## [1] 44

# print("The count of managers up to TWO-SD from the mean salary")
print(sum(abs(mean(salary) -  salary ) < 2*sd(salary) ))#managers with salary that are one or more standard deviation

## [1] 57

d

#age hist
hist(age)

hist(age ,freq=F)

#salary hist
hist(salary,breaks=10)

hist(salary, freq=F,breaks=10)

e

#plot the data frame (ages and salaries)
plot(df)
title("Age VS Salary")

cor(df$AGE, df$SAL) #low positive correlation

## [1] 0.1275554

2

a

df <- read.csv("firstgoalclean.csv")
head(df)

##   Div     Date    HomeTeam   AwayTeam FTHG FTAG FTR HTHG HTAG HTR       Referee
## 1  E0 15/08/09 Aston Villa      Wigan    0    2   A    0    1   A M Clattenburg
## 2  E0 15/08/09   Blackburn   Man City    0    2   A    0    1   A        M Dean
## 3  E0 15/08/09      Bolton Sunderland    0    1   A    0    1   A    A Marriner
## 4  E0 15/08/09     Chelsea       Hull    2    1   H    1    1   D       A Wiley
## 5  E0 15/08/09     Everton    Arsenal    1    6   A    0    3   A      M Halsey
## 6  E0 15/08/09  Portsmouth     Fulham    0    1   A    0    1   A    M Atkinson
##   HS AS HST AST HF AF HC AC HY AY HR AR HTFGM ATFGM FGM
## 1 11 14   5   7 15 14  4  6  2  2  0  0     0    31  31
## 2 17  8   9   5 12  9  5  4  2  1  0  0     0     3   3
## 3 11 20   3  13 16 10  4  7  2  1  0  0     0     5   5
## 4 26  7  12   3 13 15 12  4  1  2  0  0    37    28  28
## 5  8 15   5   9 11 13  4  9  0  0  0  0    90    26  26
## 6 16  9   4   3 11 18  6  4  3  2  0  0     0    14  14

length(df$Div)

## [1] 348

hist(df$FGM, breaks = 50)

b

# b
hist(df$HF) # normal dist'

c

# c
sd(df$FGM)

## [1] 21.96523

mean(df$FGM)

## [1] 29.85345

mean(df$FGM)

## [1] 29.85345

sd(df$HF)

## [1] 3.5547

d

# d half sd from mean
filterFGM <- abs(df$FGM - mean(df$FGM)) <= sd(df$FGM)/2
length(df$FGM[filterFGM])

## [1] 120

filterHF <- abs(df$HF - mean(df$HF)) <= sd(df$HF)/2
length(df$HF[filterHF])

## [1] 154

# tziun teken
head(   (df$FGM[filterFGM] - mean(df$FGM)) / sd(df$FGM)  )  # showing only five of them

## [1]  0.05219849 -0.08438102 -0.17543402 -0.49411953 -0.35754002  0.18877799

head(  (df$HF[filterHF] - mean(df$HF)) / sd(df$HF) )

## [1]  0.09215579  0.37347346 -0.18916188 -0.18916188  0.37347346 -0.18916188

e

# E quantiles
quantile(df$FGM, 0.4)

##  40% 
## 19.8

quantile(df$FGM, 0.7)

## 70% 
##  38

Q 5

a

AGE <- c(53,43,33,45,46,55,41,55,36,45,55,50,49,47,69,51,48,62,45,37,50,50,50,58,53,57,53,61,47,56,44,46,58,48,38,74,60,32,51,50,40,61,63,56,45,61,70,59,57,69,44,56,50,56,43,48,52,62,48)

SAL = c(145,621,262,208,362,424,339,736,291,58,498,643,390,332,750,368,659,234,396,300,343,536,543,217,298,1103,406,254,862,204,206,250,21,298,350,800,726,370,536,291,808,543,149,350,242,198,213,296,317,482,155,802,200,282,573,388,250,396,572)

Sal_after_tax <- SAL*0.8-10
names(Sal_after_tax) <- SAL
mean(SAL)

## [1] 404.1695

mean(Sal_after_tax)

## [1] 313.3356

median(SAL)

## [1] 350

median(Sal_after_tax)

## [1] 270

sd(SAL)

## [1] 220.5335

sd(Sal_after_tax)

## [1] 176.4268

hist(Sal_after_tax, breaks = 20)

#plot the data frame (ages and salaries)

# cor(df$AGE, df$SAL) #/low positive correlation

Ex1Principles

Aviv Gelfand

2022-10-25

R Markdown

b

c

d

e

2

a

b

c

d

e

Q 5

a