result <- 1
Exp <- function(x, k) {
for(i in 1:k) {
result = result + (x^i)/factorial(i)
}
print(result)
}
Exp(3, 2)
## [1] 8.5
sample_mean <- function(x) {
y = sum(x)/length(x)
y
}
sample_sd <- function(x){
z = sqrt(sum((x - sum(x)/length(x))^2) / (length(x) - 1))
z
}
x <- c(3,5,7,7)
sample_mean(x)
## [1] 5.5
sample_sd(x)
## [1] 1.914854
calculate_CI <- function(x, conf=0.95){
mean_value <- sample_mean(x)
n <- length(x)
standard_deviation <- sample_sd(x)
standard_error <- standard_deviation / sqrt(n)
alpha = 1 - conf
degrees_of_freedom = n - 1
t_score = qt(p=alpha/2, df=degrees_of_freedom, lower.tail=F)
margin_error <- t_score * standard_error
lower_bound <- mean_value - margin_error
upper_bound <- mean_value + margin_error
print(c(lower_bound,upper_bound))
}
y <- c(9, 8, 8, 9, 1, 5)
calculate_CI(y, conf = 0.95)
## [1] 3.370258 9.963075
y <- c(9, 8, 8, 9, 1, 5)
dat = data.frame(x=y)
fit <- lm(x ~ 1, dat)
confint(fit, level=0.95)
## 2.5 % 97.5 %
## (Intercept) 3.370258 9.963075
tuesdata <- tidytuesdayR::tt_load('2020-01-07')
## --- Compiling #TidyTuesday Information for 2020-01-07 ----
## --- There are 11 files available ---
## --- Starting Download ---
##
## Downloading file 1 of 11: `fire_nrt_M6_94771.csv`
## Downloading file 2 of 11: `IDCJAC0009_009151_1800_Data.csv`
## Downloading file 3 of 11: `IDCJAC0009_023011_1800_Data.csv`
## Downloading file 4 of 11: `IDCJAC0009_040383_1800_Data.csv`
## Downloading file 5 of 11: `IDCJAC0009_040913_1800_Data.csv`
## Downloading file 6 of 11: `IDCJAC0009_066062_1800_Data.csv`
## Downloading file 7 of 11: `IDCJAC0009_070351_1800_Data.csv`
## Downloading file 8 of 11: `IDCJAC0009_086232_1800_Data.csv`
## Downloading file 9 of 11: `MODIS_C6_Australia_and_New_Zealand_7d.csv`
## Downloading file 10 of 11: `rainfall.csv`
## Downloading file 11 of 11: `temperature.csv`
## --- Download complete ---
rainfall <- tuesdata$rainfall
temperature <- tuesdata$temperature
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
df <- drop_na(rainfall)
df <- df %>%
mutate(date=paste0(year, "-", month, "-", day))
df$date = as.Date(df$date)
df = select(df, -c(4,5))
df$city_name <- toupper(df$city_name)
df1 <- inner_join(x=df, y=temperature, by=c("date" = "date", "city_name" = "city_name"))
df1
## # A tibble: 83,964 × 13
## station…¹ city_…² year rainf…³ period quality lat long stati…⁴ date
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <chr> <date>
## 1 009151 PERTH 1967 2.8 1 Y -32.0 116. Subiac… 1967-07-05
## 2 009151 PERTH 1967 2.8 1 Y -32.0 116. Subiac… 1967-07-05
## 3 009151 PERTH 1967 4.8 1 Y -32.0 116. Subiac… 1967-07-06
## 4 009151 PERTH 1967 4.8 1 Y -32.0 116. Subiac… 1967-07-06
## 5 009151 PERTH 1967 5.8 1 Y -32.0 116. Subiac… 1967-07-07
## 6 009151 PERTH 1967 5.8 1 Y -32.0 116. Subiac… 1967-07-07
## 7 009151 PERTH 1967 16 1 Y -32.0 116. Subiac… 1967-07-10
## 8 009151 PERTH 1967 16 1 Y -32.0 116. Subiac… 1967-07-10
## 9 009151 PERTH 1967 1 1 Y -32.0 116. Subiac… 1967-07-11
## 10 009151 PERTH 1967 1 1 Y -32.0 116. Subiac… 1967-07-11
## # … with 83,954 more rows, 3 more variables: temperature <dbl>,
## # temp_type <chr>, site_name <chr>, and abbreviated variable names
## # ¹station_code, ²city_name, ³rainfall, ⁴station_name
df2 <- df1 %>%
filter(year >= 2014) %>%
pivot_wider(names_from = temp_type, values_from = temperature)
ggplot(df2, aes(x = date, fill = city_name))+
geom_line(aes(y = max, color = "blue")) +
geom_line(aes(y = min, color = "yellow")) +
facet_grid(. ~ city_name) +
labs(title = "Temperatures over time", x = "Date", y = "Temperature", color = " ",
subtitle = "In general, both the maximuma and minimum temperatures have been increasing since 2014",
caption = "Created by Ying Zhang") +
scale_color_manual(labels = c("Max", "Min"), values = c("coral3", "aquamarine2"))
## Part 3B ### How I chose this and why? First, I wrote a function called
rainfall which takes in two inputs:city for the name of the city and y for the year I want to plot. Withinin the function, I used the if-else function to test if the input names and years exist. If they do, the function will move on to plot the rainfall data using ggplot2.
rainfall_dist <- function(city, y) {
if (!toupper(city) %in% df1$city_name){
stop("City name doesn't exist!")
}
else if (!y %in% df1$year){
stop("Year doesn't exist!")
}
else{
df1 %>%
filter(city_name == toupper(city) & year == as.numeric(y)) %>%
ggplot(aes(log(rainfall))) +
geom_histogram()
}}
rainfall_dist("sydney", 2015)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 396 rows containing non-finite values (stat_bin).
rain_df <- df1 %>%
filter(year >= 2014) %>%
select(year, city_name, rainfall) %>%
group_by(year, city_name) %>%
summarise(rainfall_mean = sample_mean(rainfall),
rainfall_sd = sample_mean(rainfall),
lower_bound = calculate_CI(rainfall)[c(1)],
upper_bound = calculate_CI(rainfall)[c(2)])
## [1] 2.992674 4.694608
## [1] 1.217762 1.923674
## [1] 3.180624 4.486576
## [1] 5.900879 9.845463
## [1] 2.012387 2.958250
## [1] 4.533109 7.816081
## [1] 1.134468 1.888247
## [1] 3.306707 4.819419
## [1] 6.124078 9.473105
## [1] 2.833977 4.498626
## [1] 1.465417 2.719706
## [1] 1.698006 2.609098
## [1] 3.729219 4.919189
## [1] 6.34664 8.87211
## [1] 2.924211 4.647374
## [1] 1.971184 3.467019
## [1] 1.038765 1.638921
## [1] 4.231374 6.162122
## [1] 6.038234 9.768359
## [1] 2.295725 3.630446
## [1] 1.752538 3.185393
## [1] 0.926316 1.667091
## [1] 3.658153 5.494390
## [1] 6.045517 10.599645
## [1] 2.085155 3.410450
## [1] 1.376062 2.818605
## [1] 0.9609772 2.1701486
## [1] 2.100971 3.347918
## [1] 1.859987 4.266329
## [1] 1.632691 3.522276
## [1] 2.992674 4.694608
## [1] 1.217762 1.923674
## [1] 3.180624 4.486576
## [1] 5.900879 9.845463
## [1] 2.012387 2.958250
## [1] 4.533109 7.816081
## [1] 1.134468 1.888247
## [1] 3.306707 4.819419
## [1] 6.124078 9.473105
## [1] 2.833977 4.498626
## [1] 1.465417 2.719706
## [1] 1.698006 2.609098
## [1] 3.729219 4.919189
## [1] 6.34664 8.87211
## [1] 2.924211 4.647374
## [1] 1.971184 3.467019
## [1] 1.038765 1.638921
## [1] 4.231374 6.162122
## [1] 6.038234 9.768359
## [1] 2.295725 3.630446
## [1] 1.752538 3.185393
## [1] 0.926316 1.667091
## [1] 3.658153 5.494390
## [1] 6.045517 10.599645
## [1] 2.085155 3.410450
## [1] 1.376062 2.818605
## [1] 0.9609772 2.1701486
## [1] 2.100971 3.347918
## [1] 1.859987 4.266329
## [1] 1.632691 3.522276
## `summarise()` has grouped output by 'year'. You can override using the `.groups`
## argument.
rain_df
## # A tibble: 30 × 6
## # Groups: year [6]
## year city_name rainfall_mean rainfall_sd lower_bound upper_bound
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2014 BRISBANE 3.84 3.84 2.99 4.69
## 2 2014 CANBERRA 1.57 1.57 1.22 1.92
## 3 2014 MELBOURNE 3.83 3.83 3.18 4.49
## 4 2014 PERTH 7.87 7.87 5.90 9.85
## 5 2014 SYDNEY 2.49 2.49 2.01 2.96
## 6 2015 BRISBANE 6.17 6.17 4.53 7.82
## 7 2015 CANBERRA 1.51 1.51 1.13 1.89
## 8 2015 MELBOURNE 4.06 4.06 3.31 4.82
## 9 2015 PERTH 7.80 7.80 6.12 9.47
## 10 2015 SYDNEY 3.67 3.67 2.83 4.50
## # … with 20 more rows
ggplot(rain_df, aes(x = year, fill = city_name))+
geom_point(aes(y = rainfall_mean, color = "blue")) +
geom_line(aes(y = rainfall_mean, color = "blue")) +
geom_errorbar(aes(ymin = lower_bound, ymax = upper_bound), width=.2) +
facet_grid(city_name ~ .) +
labs(title = "Rainfall mean and standard deviation", x = "Year", y = "Rainfall", color = " ",
subtitle = "Perth had the heaviest rainfall among the 5 cities. Rainfall decreased from 2018 in all cities except for Canberra.",
caption = "Created by Ying Zhang") +
scale_color_manual(labels = c("Mean"), values = c("coral3"))
```