Research Methods & Data Analyses
AUTHOR
Jal Vashi (N0990629)
Week 2 - Tutorial
It was nice to understand and troubleshoot R and R Studio.
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (palmerpenguins)
data ("penguins" )
penguins %>%
select (1 : 5 )
# A tibble: 344 × 5
species island bill_length_mm bill_depth_mm flipper_length_mm
<fct> <fct> <dbl> <dbl> <int>
1 Adelie Torgersen 39.1 18.7 181
2 Adelie Torgersen 39.5 17.4 186
3 Adelie Torgersen 40.3 18 195
4 Adelie Torgersen NA NA NA
5 Adelie Torgersen 36.7 19.3 193
6 Adelie Torgersen 39.3 20.6 190
7 Adelie Torgersen 38.9 17.8 181
8 Adelie Torgersen 39.2 19.6 195
9 Adelie Torgersen 34.1 18.1 193
10 Adelie Torgersen 42 20.2 190
# ℹ 334 more rows
data ("penguins" )
penguins %>%
group_by (species) %>%
ggplot (aes (x= bill_length_mm, color= species, fill= species))+
geom_histogram ()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_bin()`).
library (tidyverse)
library (palmerpenguins)
data ("penguins" )
penguins %>%
group_by (species) %>%
ggplot (aes (x= species,
y= bill_length_mm,
color= species,
fill= species))+ geom_boxplot (alpha= 0.5 )+
theme (axis.text= element_text (size= 16 ),
axis.title= element_text (size= 16 ))
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Loading required package: kableExtra
Attaching package: 'kableExtra'
The following object is masked from 'package:dplyr':
group_rows
library (gt)
penguins %>%
group_by (species) %>%
na.omit () %>%
summarise (mean = mean (bill_length_mm), sd= sd (bill_length_mm), n = n ())
# A tibble: 3 × 4
species mean sd n
<fct> <dbl> <dbl> <int>
1 Adelie 38.8 2.66 146
2 Chinstrap 48.8 3.34 68
3 Gentoo 47.6 3.11 119
Week 3 - Diamonds Exercise 6.7
library (tidyverse)
midwest %>%
group_by (state) %>%
summarize (poptotalmean = mean (poptotal),
poptotalmed = median (poptotal),
popmax = max (poptotal),
popmin = min (poptotal),
popdistinct = n_distinct (poptotal),
popfirst = first (poptotal),
popany = any (poptotal < 5000 ),
popany2 = any (poptotal > 2000000 )) %>%
ungroup ()
# A tibble: 5 × 9
state poptotalmean poptotalmed popmax popmin popdistinct popfirst popany
<chr> <dbl> <dbl> <int> <int> <int> <int> <lgl>
1 IL 112065. 24486. 5105067 4373 101 66090 TRUE
2 IN 60263. 30362. 797159 5315 92 31095 FALSE
3 MI 111992. 37308 2111687 1701 83 10145 TRUE
4 OH 123263. 54930. 1412140 11098 88 25371 FALSE
5 WI 67941. 33528 959275 3890 72 15682 TRUE
# ℹ 1 more variable: popany2 <lgl>
Week 3 Diamonds Exercise 6.7 (1)
library (tidyverse)
view (diamonds)
Week 3 Diamonds Exercise 6.7 (2)
library (tidyverse)
library (dplyr)
view (diamonds) %>%
arrange (price) %>%
arrange (desc (price)) %>%
arrange (cut) %>%
arrange (desc (price), cut)
# A tibble: 53,940 × 10
carat cut color clarity depth table price x y z
<dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
1 2.29 Premium I VS2 60.8 60 18823 8.5 8.47 5.16
2 2 Very Good G SI1 63.5 56 18818 7.9 7.97 5.04
3 1.51 Ideal G IF 61.7 55 18806 7.37 7.41 4.56
4 2.07 Ideal G SI2 62.5 55 18804 8.2 8.13 5.11
5 2 Very Good H SI1 62.8 57 18803 7.95 8 5.01
6 2.29 Premium I SI1 61.8 59 18797 8.52 8.45 5.24
7 2.04 Premium H SI1 58.1 60 18795 8.37 8.28 4.84
8 2 Premium I VS1 60.8 59 18795 8.13 8.02 4.91
9 1.71 Premium F VS2 62.3 59 18791 7.57 7.53 4.7
10 2.15 Ideal G SI2 62.6 54 18791 8.29 8.35 5.21
# ℹ 53,930 more rows
Week 3 Diamonds Exercise 6.7 (3)
view (diamonds) %>%
arrange (desc (price))%>%
arrange (clarity)
# A tibble: 53,940 × 10
carat cut color clarity depth table price x y z
<dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
1 4.5 Fair J I1 65.8 58 18531 10.2 10.2 6.72
2 5.01 Fair J I1 65.5 59 18018 10.7 10.5 6.98
3 4.13 Fair H I1 64.8 61 17329 10 9.85 6.43
4 3.01 Ideal J I1 65.4 60 16538 8.99 8.93 5.86
5 3.67 Premium I I1 62.4 56 16193 9.86 9.81 6.13
6 4 Very Good I I1 63.3 58 15984 10.0 9.94 6.31
7 3.4 Fair D I1 66.8 52 15964 9.42 9.34 6.27
8 4.01 Premium I I1 61 61 15223 10.1 10.1 6.17
9 4.01 Premium J I1 62.5 62 15223 10.0 9.94 6.24
10 3 Premium G I1 59.7 60 13203 9.42 9.26 5.58
# ℹ 53,930 more rows
Week 3 Diamonds Exercise 6.7 (4)
library (tidyverse)
library (dplyr)
diamonds %>%
mutate (salePrice = price - 250 ,
Values = 'something' ,
Simple = TRUE )
# A tibble: 53,940 × 13
carat cut color clarity depth table price x y z salePrice
<dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 76
2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 76
3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 77
4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63 84
5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 85
6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48 86
7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47 86
8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53 87
9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 87
10 0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39 88
# ℹ 53,930 more rows
# ℹ 2 more variables: Values <chr>, Simple <lgl>
Week 3 Diamonds Exercise 6.7 (5)
library (tidyverse)
library (dplyr)
diamonds %>%
select (- x , - y , - z)
# A tibble: 53,940 × 7
carat cut color clarity depth table price
<dbl> <ord> <ord> <ord> <dbl> <dbl> <int>
1 0.23 Ideal E SI2 61.5 55 326
2 0.21 Premium E SI1 59.8 61 326
3 0.23 Good E VS1 56.9 65 327
4 0.29 Premium I VS2 62.4 58 334
5 0.31 Good J SI2 63.3 58 335
6 0.24 Very Good J VVS2 62.8 57 336
7 0.24 Very Good I VVS1 62.3 57 336
8 0.26 Very Good H SI1 61.9 55 337
9 0.22 Fair E VS2 65.1 61 337
10 0.23 Very Good H VS1 59.4 61 338
# ℹ 53,930 more rows
Week 3 Diamonds Exercise 6.7 (6)
library (tidyverse)
library (dplyr)
diamonds %>%
group_by (cut) %>%
summarise (m = mean (carat))
# A tibble: 5 × 2
cut m
<ord> <dbl>
1 Fair 1.05
2 Good 0.849
3 Very Good 0.806
4 Premium 0.892
5 Ideal 0.703
Week 3 Diamonds Exercise 6.7 (7)
diamonds %>%
summarise (totalNum = nrow (diamonds))
# A tibble: 1 × 1
totalNum
<int>
1 53940
Week 3 Diamonds (Good Ques, Bad Ques)
Good Ques: How can cut and clarity can influence the price of diamond?
Bad Ques: Is there any difference between cuts of diamonds?
Week 4 Data visualisation
library (tidyverse)
library (dplyr)
plot (mtcars$ wt, mtcars$ mpg)
library (readxl)
data <- read_excel ("SPAD and PPFD.xlsx" )
New names:
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
library(tidyverse) library(modeldata) library(dplyr) library(ggplot2) library(knitr) library(rmarkdown) ?ggplot
?crickets View(crickets)
The basics
ggplot(crickets, aes(x = temp, y = rate)) + geom_point() + labs(x = “Temperature”, y = “Chirp rate”, title = “Cricket chirps”, caption = “Source: McDonald (2009)”)
ggplot(crickets, aes(x = temp, y = rate, color = species)) + geom_point() + labs(x = “Temperature”, y = “Chirp rate”, color = “Species”, title = “Cricket chirps”, caption = “Source: McDonald (2009)”) + scale_color_brewer(palette = “Dark2”)
Modifiying basic properties of the plot
ggplot(crickets, aes(x = temp, y = rate)) + geom_point(color = “red”, size = 2, alpha = .3, shape = “square”) + labs(x = “Temperature”, y = “Chirp rate”, title = “Cricket chirps”, caption = “Source: McDonald (2009)”)
Learn more about the options for the geom_abline()
Adding another layer
ggplot(crickets, aes(x = temp, y = rate)) + geom_point() + geom_smooth(method = “lm”, se = FALSE) + labs(x = “Temperature”, y = “Chirp rate”, title = “Cricket chirps”, caption = “Source: McDonald (2009)”)
ggplot(crickets, aes(x = temp, y = rate, color = species)) + geom_point() + geom_smooth(method = “lm”, se = FALSE) + labs(x = “Temperature”, y = “Chirp rate”, color = “Species”, title = “Cricket chirps”, caption = “Source: McDonald (2009)”) + scale_color_brewer(palette = “Dark2”)
Other plots
ggplot(crickets, aes(x = rate)) + geom_histogram(bins = 15) # one quantitative variable
ggplot(crickets, aes(x = rate)) + geom_freqpoly(bins = 15)
ggplot(crickets, aes(x = species)) + geom_bar(color = “black”, fill = “lightblue”)
ggplot(crickets, aes(x = species, fill = species)) + geom_bar(show.legend = FALSE) + scale_fill_brewer(palette = “Dark2”)
ggplot(crickets, aes(x = species, y = rate, color = species)) + geom_boxplot(show.legend = FALSE) + scale_color_brewer(palette = “Dark2”) + theme_minimal()
?theme_minimal()
not great:
ggplot(crickets, aes(x = rate, fill = species)) + geom_histogram(bins = 15) + scale_fill_brewer(palette = “Dark2”)
ggplot(crickets, aes(x = rate, fill = species)) + geom_histogram(bins = 15, show.legend = FALSE) + facet_wrap(~species) + scale_fill_brewer(palette = “Dark2”) ?facet_wrap
ggplot(crickets, aes(x = rate, fill = species)) + geom_histogram(bins = 15, show.legend = FALSE) + facet_wrap(~species, ncol = 1) + scale_fill_brewer(palette = “Dark2”) + theme_minimal()
Scientific methods.
The scientific method is an empirical method for gaining knowledge.
It involves careful observation and interpretion of this observation.
it involves creating a hypothesis and, through reasoning and experiment, is solved.
නිරීක්ෂණය / ගැටළුව 觀察 / 問問題 Observation / question මාතෘකාව ගවේෂණය 研究吓 目標課題 Research topic area කල්පිතය 提出假說 Hypothesis පරීක්ෂණය 用實驗嚟 驗證 Test with experiment දත්ත විශ්ලේෂණය 分析吓 啲 data Analyze data අවසාන නිගමනය විද්යාත්මක ක්රමය 做報告 總結 科學 方法 Report conclusions Scientific method
Development of knowledge before an experiment through Characterizations, hypotheses, and predictions and experiments are necessary.
A scientific method can be thought as :
Define a question -Gather information and resources -Form a hypothesis -Test the hypothesis by performing an experiment and collecting data in a reproducible manner (For future testing) -Analyze the data -Interpret the data and draw conclusions that serve as a starting point for a new hypothesis -Publish results -Retest (frequently done by other scientists to find knowledge gaps)
How to write a storng hypothesis.
A good hypothesis should have following questions involved in the question:
It should review and identify a knowledge gap in the field of study.
The study should be repliable and provide results if the conditions are set the same.
The statement should be brief and not too descriptive.
The statement should provide a clear notion regarding the knowledge gap.
It should have a null and alternate hypothesis. Alternate hypothesis will be the opposite of null hypothesis.
Week 5 How to choose the correct analyses & Hypothetico-Deductive reasoning
library (tidyverse)
library (dplyr)
library (modeldata)
Attaching package: 'modeldata'
The following object is masked _by_ '.GlobalEnv':
penguins
The following object is masked from 'package:palmerpenguins':
penguins
data ("iris" )
iris %>%
mutate (size = ifelse (Sepal.Length < median (Sepal.Length), "small" , "big" ))
Sepal.Length Sepal.Width Petal.Length Petal.Width Species size
1 5.1 3.5 1.4 0.2 setosa small
2 4.9 3.0 1.4 0.2 setosa small
3 4.7 3.2 1.3 0.2 setosa small
4 4.6 3.1 1.5 0.2 setosa small
5 5.0 3.6 1.4 0.2 setosa small
6 5.4 3.9 1.7 0.4 setosa small
7 4.6 3.4 1.4 0.3 setosa small
8 5.0 3.4 1.5 0.2 setosa small
9 4.4 2.9 1.4 0.2 setosa small
10 4.9 3.1 1.5 0.1 setosa small
11 5.4 3.7 1.5 0.2 setosa small
12 4.8 3.4 1.6 0.2 setosa small
13 4.8 3.0 1.4 0.1 setosa small
14 4.3 3.0 1.1 0.1 setosa small
15 5.8 4.0 1.2 0.2 setosa big
16 5.7 4.4 1.5 0.4 setosa small
17 5.4 3.9 1.3 0.4 setosa small
18 5.1 3.5 1.4 0.3 setosa small
19 5.7 3.8 1.7 0.3 setosa small
20 5.1 3.8 1.5 0.3 setosa small
21 5.4 3.4 1.7 0.2 setosa small
22 5.1 3.7 1.5 0.4 setosa small
23 4.6 3.6 1.0 0.2 setosa small
24 5.1 3.3 1.7 0.5 setosa small
25 4.8 3.4 1.9 0.2 setosa small
26 5.0 3.0 1.6 0.2 setosa small
27 5.0 3.4 1.6 0.4 setosa small
28 5.2 3.5 1.5 0.2 setosa small
29 5.2 3.4 1.4 0.2 setosa small
30 4.7 3.2 1.6 0.2 setosa small
31 4.8 3.1 1.6 0.2 setosa small
32 5.4 3.4 1.5 0.4 setosa small
33 5.2 4.1 1.5 0.1 setosa small
34 5.5 4.2 1.4 0.2 setosa small
35 4.9 3.1 1.5 0.2 setosa small
36 5.0 3.2 1.2 0.2 setosa small
37 5.5 3.5 1.3 0.2 setosa small
38 4.9 3.6 1.4 0.1 setosa small
39 4.4 3.0 1.3 0.2 setosa small
40 5.1 3.4 1.5 0.2 setosa small
41 5.0 3.5 1.3 0.3 setosa small
42 4.5 2.3 1.3 0.3 setosa small
43 4.4 3.2 1.3 0.2 setosa small
44 5.0 3.5 1.6 0.6 setosa small
45 5.1 3.8 1.9 0.4 setosa small
46 4.8 3.0 1.4 0.3 setosa small
47 5.1 3.8 1.6 0.2 setosa small
48 4.6 3.2 1.4 0.2 setosa small
49 5.3 3.7 1.5 0.2 setosa small
50 5.0 3.3 1.4 0.2 setosa small
51 7.0 3.2 4.7 1.4 versicolor big
52 6.4 3.2 4.5 1.5 versicolor big
53 6.9 3.1 4.9 1.5 versicolor big
54 5.5 2.3 4.0 1.3 versicolor small
55 6.5 2.8 4.6 1.5 versicolor big
56 5.7 2.8 4.5 1.3 versicolor small
57 6.3 3.3 4.7 1.6 versicolor big
58 4.9 2.4 3.3 1.0 versicolor small
59 6.6 2.9 4.6 1.3 versicolor big
60 5.2 2.7 3.9 1.4 versicolor small
61 5.0 2.0 3.5 1.0 versicolor small
62 5.9 3.0 4.2 1.5 versicolor big
63 6.0 2.2 4.0 1.0 versicolor big
64 6.1 2.9 4.7 1.4 versicolor big
65 5.6 2.9 3.6 1.3 versicolor small
66 6.7 3.1 4.4 1.4 versicolor big
67 5.6 3.0 4.5 1.5 versicolor small
68 5.8 2.7 4.1 1.0 versicolor big
69 6.2 2.2 4.5 1.5 versicolor big
70 5.6 2.5 3.9 1.1 versicolor small
71 5.9 3.2 4.8 1.8 versicolor big
72 6.1 2.8 4.0 1.3 versicolor big
73 6.3 2.5 4.9 1.5 versicolor big
74 6.1 2.8 4.7 1.2 versicolor big
75 6.4 2.9 4.3 1.3 versicolor big
76 6.6 3.0 4.4 1.4 versicolor big
77 6.8 2.8 4.8 1.4 versicolor big
78 6.7 3.0 5.0 1.7 versicolor big
79 6.0 2.9 4.5 1.5 versicolor big
80 5.7 2.6 3.5 1.0 versicolor small
81 5.5 2.4 3.8 1.1 versicolor small
82 5.5 2.4 3.7 1.0 versicolor small
83 5.8 2.7 3.9 1.2 versicolor big
84 6.0 2.7 5.1 1.6 versicolor big
85 5.4 3.0 4.5 1.5 versicolor small
86 6.0 3.4 4.5 1.6 versicolor big
87 6.7 3.1 4.7 1.5 versicolor big
88 6.3 2.3 4.4 1.3 versicolor big
89 5.6 3.0 4.1 1.3 versicolor small
90 5.5 2.5 4.0 1.3 versicolor small
91 5.5 2.6 4.4 1.2 versicolor small
92 6.1 3.0 4.6 1.4 versicolor big
93 5.8 2.6 4.0 1.2 versicolor big
94 5.0 2.3 3.3 1.0 versicolor small
95 5.6 2.7 4.2 1.3 versicolor small
96 5.7 3.0 4.2 1.2 versicolor small
97 5.7 2.9 4.2 1.3 versicolor small
98 6.2 2.9 4.3 1.3 versicolor big
99 5.1 2.5 3.0 1.1 versicolor small
100 5.7 2.8 4.1 1.3 versicolor small
101 6.3 3.3 6.0 2.5 virginica big
102 5.8 2.7 5.1 1.9 virginica big
103 7.1 3.0 5.9 2.1 virginica big
104 6.3 2.9 5.6 1.8 virginica big
105 6.5 3.0 5.8 2.2 virginica big
106 7.6 3.0 6.6 2.1 virginica big
107 4.9 2.5 4.5 1.7 virginica small
108 7.3 2.9 6.3 1.8 virginica big
109 6.7 2.5 5.8 1.8 virginica big
110 7.2 3.6 6.1 2.5 virginica big
111 6.5 3.2 5.1 2.0 virginica big
112 6.4 2.7 5.3 1.9 virginica big
113 6.8 3.0 5.5 2.1 virginica big
114 5.7 2.5 5.0 2.0 virginica small
115 5.8 2.8 5.1 2.4 virginica big
116 6.4 3.2 5.3 2.3 virginica big
117 6.5 3.0 5.5 1.8 virginica big
118 7.7 3.8 6.7 2.2 virginica big
119 7.7 2.6 6.9 2.3 virginica big
120 6.0 2.2 5.0 1.5 virginica big
121 6.9 3.2 5.7 2.3 virginica big
122 5.6 2.8 4.9 2.0 virginica small
123 7.7 2.8 6.7 2.0 virginica big
124 6.3 2.7 4.9 1.8 virginica big
125 6.7 3.3 5.7 2.1 virginica big
126 7.2 3.2 6.0 1.8 virginica big
127 6.2 2.8 4.8 1.8 virginica big
128 6.1 3.0 4.9 1.8 virginica big
129 6.4 2.8 5.6 2.1 virginica big
130 7.2 3.0 5.8 1.6 virginica big
131 7.4 2.8 6.1 1.9 virginica big
132 7.9 3.8 6.4 2.0 virginica big
133 6.4 2.8 5.6 2.2 virginica big
134 6.3 2.8 5.1 1.5 virginica big
135 6.1 2.6 5.6 1.4 virginica big
136 7.7 3.0 6.1 2.3 virginica big
137 6.3 3.4 5.6 2.4 virginica big
138 6.4 3.1 5.5 1.8 virginica big
139 6.0 3.0 4.8 1.8 virginica big
140 6.9 3.1 5.4 2.1 virginica big
141 6.7 3.1 5.6 2.4 virginica big
142 6.9 3.1 5.1 2.3 virginica big
143 5.8 2.7 5.1 1.9 virginica big
144 6.8 3.2 5.9 2.3 virginica big
145 6.7 3.3 5.7 2.5 virginica big
146 6.7 3.0 5.2 2.3 virginica big
147 6.3 2.5 5.0 1.9 virginica big
148 6.5 3.0 5.2 2.0 virginica big
149 6.2 3.4 5.4 2.3 virginica big
150 5.9 3.0 5.1 1.8 virginica big