reading <- read.csv("full_term_report.csv", stringsAsFactors = FALSE)
dim(reading)
## [1] 646 16
summary(reading)
## X Time Nickname Book_title
## Min. : 1.0 Length:646 Length:646 Length:646
## 1st Qu.:162.2 Class :character Class :character Class :character
## Median :323.5 Mode :character Mode :character Mode :character
## Mean :323.5
## 3rd Qu.:484.8
## Max. :646.0
## Author Publisher Genre
## Length:646 Length:646 Length:646
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## Summary Opinion Stars Campus
## Length:646 Length:646 Min. :1.000 Length:646
## Class :character Class :character 1st Qu.:2.000 Class :character
## Mode :character Mode :character Median :3.000 Mode :character
## Mean :2.992
## 3rd Qu.:4.000
## Max. :5.000
## Gender Teacher_Assessment Plagiarism_Source
## Length:646 Min. :0.000 Length:646
## Class :character 1st Qu.:2.000 Class :character
## Mode :character Median :3.000 Mode :character
## Mean :2.745
## 3rd Qu.:3.000
## Max. :4.000
## Teacher_Assessment_2 Period
## Min. :0.000 Length:646
## 1st Qu.:2.000 Class :character
## Median :3.000 Mode :character
## Mean :2.837
## 3rd Qu.:3.000
## Max. :4.000
str(reading)
## 'data.frame': 646 obs. of 16 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Time : chr "08/05/2018 13:02:23" "15/05/2018 12:57:50" "08/05/2018 05:48:55" "23/04/2018 21:47:37" ...
## $ Nickname : chr "ryo" "ryo" "mako" "mako" ...
## $ Book_title : chr "Six Sketches" "Marcel and the Shakespeare Letters" "The Voyages of Sindbad Sailor" "A Death in Oxford" ...
## $ Author : chr "Leslie Dunkling" "Stephen Rabley" "auther unknown" "Richard MacAndrew" ...
## $ Publisher : chr "Penguin" "Pearson" "Pearson" "Cambridge" ...
## $ Genre : chr "children's literature" "children's literature" "action adventure" "mystery" ...
## $ Summary : chr "There are six funny stories written.\n1. I'm Right !\nNorma and Corin forgot the train ticket, also ride on wro"| __truncated__ "The Story of two mice. Shakespeare letter bring Professor Barton's flat." "The Sindbad stories come from the Arabian Nights.\nOne day Sindbad went on a journey aboard a ship sailing for "| __truncated__ "One day a female doctor named Janet was killed in the city of Oxford, England. Frank Williams of the criminal q"| __truncated__ ...
## $ Opinion : chr "I think Norma is stupid." "This two mice is so clever." "As Sindbad continued his voyage for 27 years, I also wanted to focus on the activities of the university department." "Since I read this book and it is a close human relationship, I thought that I should not forget the courtesy an"| __truncated__ ...
## $ Stars : int 1 1 2 1 3 2 3 2 2 2 ...
## $ Campus : chr "Tokyo" "Tokyo" "Tokyo" "Tokyo" ...
## $ Gender : chr "Male" "Male" "Female" "Female" ...
## $ Teacher_Assessment : int 2 1 2 3 3 4 3 3 3 3 ...
## $ Plagiarism_Source : chr "" "" "" "" ...
## $ Teacher_Assessment_2: int 2 1 3 4 3 4 3 3 3 3 ...
## $ Period : chr NA NA NA NA ...
reading <- reading[!duplicated(reading$Summary),]
dim(reading)
## [1] 639 16
vars <- c("Nickname","Book_title", "Author","Publisher",
"Genre", "Stars", "Campus","Gender", "Period")
reading[,vars] <- lapply(reading[,vars], factor)
reading$Time <- as.POSIXct(reading$Time, format= "%d/%m/%Y %H:%M:%S")
str(reading)
## 'data.frame': 639 obs. of 16 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Time : POSIXct, format: "2018-05-08 13:02:23" "2018-05-15 12:57:50" ...
## $ Nickname : Factor w/ 59 levels "alice","asahi",..: 39 39 22 22 22 22 22 22 22 22 ...
## $ Book_title : Factor w/ 338 levels "a chrismas carol",..: 203 130 307 3 331 5 136 80 155 259 ...
## $ Author : Factor w/ 242 levels "a stenen spielberg",..: 131 216 21 195 124 73 196 104 44 201 ...
## $ Publisher : Factor w/ 7 levels "Cambridge","Macmillan",..: 6 5 5 1 4 4 5 6 6 6 ...
## $ Genre : Factor w/ 13 levels "action adventure",..: 3 3 1 7 9 9 5 5 8 1 ...
## $ Summary : chr "There are six funny stories written.\n1. I'm Right !\nNorma and Corin forgot the train ticket, also ride on wro"| __truncated__ "The Story of two mice. Shakespeare letter bring Professor Barton's flat." "The Sindbad stories come from the Arabian Nights.\nOne day Sindbad went on a journey aboard a ship sailing for "| __truncated__ "One day a female doctor named Janet was killed in the city of Oxford, England. Frank Williams of the criminal q"| __truncated__ ...
## $ Opinion : chr "I think Norma is stupid." "This two mice is so clever." "As Sindbad continued his voyage for 27 years, I also wanted to focus on the activities of the university department." "Since I read this book and it is a close human relationship, I thought that I should not forget the courtesy an"| __truncated__ ...
## $ Stars : Factor w/ 5 levels "1","2","3","4",..: 1 1 2 1 3 2 3 2 2 2 ...
## $ Campus : Factor w/ 2 levels "Saitama","Tokyo": 2 2 2 2 2 2 2 2 2 2 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 2 2 1 1 1 1 1 1 1 1 ...
## $ Teacher_Assessment : int 2 1 2 3 3 4 3 3 3 3 ...
## $ Plagiarism_Source : chr "" "" "" "" ...
## $ Teacher_Assessment_2: int 2 1 3 4 3 4 3 3 3 3 ...
## $ Period : Factor w/ 3 levels "Ikebukuro period 2 (10:45am to 12:15pm)",..: NA NA NA NA NA NA NA NA NA NA ...
library(quanteda)
## Package version: 1.3.0
## Parallel computing: 2 of 4 threads used.
## See https://quanteda.io for tutorials and examples.
##
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
##
## View
sentences <- tokens(reading$Summary, what="sentence")
typeof(sentences)
## [1] "list"
sentencesDf <- as.data.frame(table(unlist(sentences)))
head(sentencesDf)
## Var1
## 1 1804,Britain was at war with France.
## 2 Gulliver 's travel note is a satire novel drawn by Irish satirist Jonathan Swift.There are chapters 1 to 4, but this book shows only chapter 1 that gulliver goes to Lilliout "".
## 3 People in crossways is not happy.
## 4 This is story about Brazil.
## 5 This story is princess Diana.
## 6 When the sky is grey and it is raining ,a girl whose name is Carla wants to buy a new umbrella.
## Freq
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
duplicates <- sentencesDf[sentencesDf$Freq>1,]
nrow(duplicates)*2/nrow(sentencesDf)
## [1] 0.02314571
duplicates$Var1
## [1] 1.
## [2] A letter is written what he is Washington now.
## [3] After all , Beth recovered , like that their father returned to their home.
## [4] After that her friend invited her to the party.
## [5] Alice hears two customs officers talking.
## [6] Amy is fourth daughter.
## [7] As they were suspected of the murdering, they went to the court.
## [8] At the party she met Tom and went to his house.
## [9] Basketball is Michael's life.
## [10] Beth is third daughter.She doesn't go to school.
## [11] But their father return to this country from the war.Because he were sick.
## [12] But he was often angry with her.
## [13] But she didn't believe it.
## [14] But they finally dyed their hands on foul play.
## [15] Cinque and his clue were catured in Connecticut.
## [16] Cinque began to broke the other slave`s chains.
## [17] Cinque was a man who lived in Sierra Leone and was kidnapped as a slave.
## [18] Cinque`s side, the young lowyer, Baldwin said trading native African is illegal, so we should free them.
## [19] Everyone's wish come true.
## [20] For example, They give their mother a christmas present and be provided the dinner for Lawrence of neighbor.
## [21] Four sisters lives strenuously ,but such time Beth becomes sick.
## [22] Four sisters tried to live strenuously.
## [23] Four students went to see football with the teacher one day.
## [24] George came to there everyday.
## [25] He arrived at the beach.
## [26] He gave her anything which she wanted.
## [27] He is a private detective of Los Angeles.
## [28] he thinks.
## [29] He was disappointed.
## [30] He was very sad.
## [31] His name is David.
## [32] His name is Mister Fruit.
## [33] His name is Toby.
## [34] However two of them told Cinque wrong way. it was a way to America.
## [35] However, there was no people.
## [36] In the 19th century, there was slave trading.
## [37] In the court, as Spansh Queen requested that back them to Spain as they are her slaves, the court was once stopped. but again the court was held and Cinque and American were debated.
## [38] In the shiip to Cuba, there was a big storm which broke Cinque`s chains.
## [39] It is Lisa's first visit to London.
## [40] It's Tom!
## [41] Jo is second daughter.She is fifteen years old.
## [42] Lisa and Alice are going to stay the Hotel Oracle.they are enjoy here.
## [43] Meg is eldest daughter of a March family.She is sixteen years old.She's working as a private teacher.
## [44] Michael Jordan is a basketball player.
## [45] Next year, Mister Fruit is Miss Fruit.
## [46] On the other hand, the American side which including president`s intending asked the proof of their born because president agreed with the slavery, he was afraid of losing his popular for the election. the debate was a long battle, but in the end the voice of Cinque through the transrater moved the juries, and they got freedom.
## [47] One day she met the men which is a asylum seeker in the centre which she worked in.
## [48] One day,they heared a rumor that Matt Lepadi and Claudia Carman who are Claudia's fiancee are staying on this island.When Lisa on the balcony,she saw two peaple on the balcony of the hotel Astra.they quarrel.Lisa are impatient, but She know this is practice their scens after fight.
## [49] People in every country know his name.
## [50] Rick Evelyn and Alex found the gold bracelet of Anubis in Egypt ruins.
## [51] She could meet him.
## [52] She decided to leave Tom.
## [53] She found her things in the house.
## [54] She is twenteen years old.She goes to school.
## [55] She knew that she love he and not Tom.
## [56] She lived with Tom of her husband.
## [57] She loved him too.
## [58] She started live alone.
## [59] She went to meet George.
## [60] She's working at aunt's house.
## [61] Soon after the chains were broken, the slaves started to kill the ship mans who was sailing them to their colony. they had bloody battle on the ship, finally the slaves got win and they made two spansh man were captured so that they can sail to their country.
## [62] That practice was hard and difficult, but everyone helped and got over it.
## [63] The hero of this story is Lenny Samuel.
## [64] The main character of this story is the four unwilling students of a public high school.
## [65] The point of the court focused on where they wewe born in.
## [66] The police thought George took things from her flat.
## [67] The public high school they attended was low-priced because they had little money and were not smart.
## [68] The teacher decides to compete with private high school by Tetraslon as a trigger.
## [69] Their father went to a war because there are no enough money.
## [70] Then she had to go Scotland because of her friend's weeding.
## [71] There is a new man at the market.
## [72] There was a good private high school which is rich near this high school, but it did not become a comparison.
## [73] They are happy.
## [74] They danced together.
## [75] They encounter a rare competition called Tetraslon.
## [76] They gave a letter from their father.
## [77] They had a lot of people cooperate, such as school and parents.
## [78] They loved each other.
## [79] They tried practicing for a year and came to the game.
## [80] They want to be David Beckham.
## [81] They went to the hotel.
## [82] They were fall in love.
## [83] They were looking for iron and other metals.
## [84] This is the story of one day at the office.
## [85] This story of main character is four people.
## [86] This time , I'll write about "Julius Caesar".
## [87] Though they were falling students, motivation and love gradually started growing.
## [88] When she came back to the home, some of her things weren't there.
## 7604 Levels: 1804,Britain was at war with France. ...
for(i in 1:nrow(duplicates)){
print(reading[grep(duplicates$Var1[i], reading$Summary),][3:4])
}
## Nickname Book_title
## 1 ryo six sketches
## 6 mako a little princess
## 9 mako nelson mandela
## 11 mako a christmas carol
## 14 mako romeo and juliet
## 17 yuto_y mother teresa
## 20 yuto_y rip van winkle and the legend of sleepy hollow
## 26 yuto_y the gift of the magi and other stories
## 28 yuto_y marcel goes to hollywood
## 30 yuto_y pele
## 35 kakuto ten long years
## 45 kakuto surfer!
## 46 kakuto who wants to be a star?
## 47 kakuto pele'
## 53 tomoya mother teresa
## 56 tomoya suffer!
## 61 tomoya leaving microsoft to change the world
## 62 tomoya a history of britain
## 63 tomoya les misérables
## 64 tomoya ski race
## 66 tomoya the amazon rain forest
## 74 hide the swiss family robinson
## 75 hide american life
## 79 hide robin hood
## 87 rio gandhi
## 89 rio audrey hepburn
## 91 rio agatha christie, woman of mystery
## 92 rio martin luther king
## 96 jun jennifer lopez
## 97 jun new york
## 98 jun michael jordan
## 99 jun gandhi
## 100 jun muhammad ali
## 102 jun pele
## 104 atsuhito david beckham
## 110 atsuhito audrey hepburn
## 117 moeka the cat
## 118 moeka mother teresa
## 129 moeka michael jordan
## 149 shimpei this is london
## 158 fumiya brazil american republics series no.3
## 159 fumiya gulliver ‘s travel in lilliput
## 163 fumiya michael jordan
## 166 fumiya the adventures of tom sawyer
## 170 taichi lucky number
## 173 taichi michael jordan
## 176 taichi daniel radcliffe
## 177 taichi the beatles
## 178 taichi muhammad ali
## 182 taichi mother teresa
## 183 taichi jennifer lopez
## 188 yuto_s twenty thousand leagues under the sea
## 189 yuto_s american life
## 198 yuto_s the lost ship
## 200 yuto_s the penang file
## 205 miku the girl at the window
## 214 shintaro a tale of two cities
## 217 shintaro the man in the iron mask
## 238 ayumi robinson crusoe
## 247 yuki barack obama
## 253 yuki this is london
## 254 yuki the death of karen silkwood
## 260 harumi eye of the storm
## 263 harumi taste and other tales
## 265 harumi tales of the supernatural
## 268 harumi the cellist of sarajevo
## 298 kouki lucky number
## 309 genki the mummy
## 312 genki the death of karen silkwood
## 317 nono the godfather
## 318 nono amistad
## 320 nono the wave
## 322 nono amisted
## 324 nono martin luther king
## 327 nono brazil
## 330 nono the jungle book
## 331 asahi the mysterious death of charles bravo
## 332 asahi martin luther king
## 342 asahi the children of the new forest
## 344 yumi superbird
## 345 yumi simply suspence
## 346 yumi bad company
## 347 yumi death in the freezer
## 348 yumi stories from the five towns
## 349 yumi love story
## 350 yumi tooth and claw
## 351 yumi king's ransom
## 352 yumi changing their skies
## 353 yumi matty doolin
## 359 natsu the black tulip
## 360 natsu k's first case
## 393 yuuka tom cruise
## 397 yuuka brazil
## 400 yuuka guliver's travels in lilliput
## 405 yuuka princess diana
## 406 sera mother teresa
## 410 sera pele
## 413 sera princess diana
## 414 sera the beatles
## 418 sera the piano
## 423 kazuma this is london
## 435 miki the mysterious island
## 444 mayu tales from the arabian nights
## 446 mayu the cellist of sarajevo
## 450 mayu the adventure of tom sawyer
## 454 manabu pele
## 462 manabu michel jordan
## 470 eishi american life
## 471 eishi the mummy returns
## 474 shine david beckham
## 487 yuji jumanji
## 489 yuna the adventures of huckleberry finn
## 490 yuna northanger abbey
## 491 yuna the last of the mohican
## 494 yuna good wives
## 496 yuna billy budd
## 501 yuna a tale of two cities
## 519 kensuke the last of the mohicans
## 520 kensuke the battle of newton road
## 522 minako wuthering heights
## 525 minako audrey hepburn
## 526 minako pele
## 529 minako the scarlet letter
## 542 sit the body
## 543 sit twenty thousand leagues under the sea
## 561 atsushi ten long years
## 564 minami the last of mohicans
## 565 minami jane eyre
## 567 minami the trumpet-major
## 569 minami northanger abbey
## 570 minami billy budd
## 571 minami around the world in eighty days
## 573 tmk washington square
## 581 tmk daniel radcliffe
## 591 rino the black tulip
## 593 rino the prisoner of zenda
## 596 rino gulliver's travels in lilliput
## 599 richard michael jordan
## 602 mri rich man, poor man
## 607 mri speed queens
## 617 shun the umbrella
## 625 mike michael jordan
## 635 kandai northanger abbey
## 645 yuki rich man, poor man
## 646 yumi new yorkers
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 531 saki in the frame
## 568 minami in the frame
## 611 mri in the frame
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## [1] Nickname Book_title
## <0 rows> (or 0-length row.names)
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## [1] Nickname Book_title
## <0 rows> (or 0-length row.names)
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 400 yuuka guliver's travels in lilliput
## 596 rino gulliver's travels in lilliput
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 507 kasumi l.a. raid
## 511 kasumi l.a. detective
## Nickname Book_title
## 95 jun the fireboy
## 483 yuji the troy stone
## 497 yuna five playsfor today
## 579 tmk ali and his camera
## Nickname Book_title
## 146 shimpei l.a.raid
## 512 kasumi the house on the hill
## Nickname Book_title
## 353 yumi matty doolin
## 446 mayu the cellist of sarajevo
## 523 minako the house on the hill
## 550 alice the house on the hill
## 570 minami billy budd
## Nickname Book_title
## 195 yuto_s six sketches
## Nickname Book_title
## 583 tmk sara says no!
## 605 mri sara says no!
## 631 mike sara says no!
## Nickname Book_title
## 220 shintaro newspaper boy
## 315 genki newspaper boy
## 518 kensuke newspaper boy
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## Nickname Book_title
## 198 yuto_s the lost ship
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## [1] Nickname Book_title
## <0 rows> (or 0-length row.names)
## [1] Nickname Book_title
## <0 rows> (or 0-length row.names)
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 604 mri shootingstars
## 608 mri lisa and alice are good friends
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 129 moeka michael jordan
## 599 richard michael jordan
## Nickname Book_title
## 583 tmk sara says no!
## 631 mike sara says no!
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 604 mri shootingstars
## 608 mri lisa and alice are good friends
## Nickname Book_title
## 67 tomoya david beckham
## 126 moeka david beckham
## Nickname Book_title
## 162 fumiya the mummy returns
## 186 yuto_s the mummy returns
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 110 atsuhito audrey hepburn
## 279 mirei tales from the arabian nights
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
## Nickname Book_title
## 507 kasumi l.a. raid
## 511 kasumi l.a. detective
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
## Nickname Book_title
## 318 nono amistad
## 322 nono amisted
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## [1] Nickname Book_title
## <0 rows> (or 0-length row.names)
## Nickname Book_title
## 605 mri sara says no!
## 631 mike sara says no!
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
## Nickname Book_title
## 566 minami the magic barber
## 587 tmk the magic barber
## Nickname Book_title
## 29 yuto_y girl meets boy
## 569 minami northanger abbey
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## 361 natsu the importance of being earnest
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
## Nickname Book_title
## 126 moeka david beckham
## 612 mri david beckham
## Nickname Book_title
## 144 shimpei anna and the fighter
## 595 rino anna and the fighter
## Nickname Book_title
## 269 mirei within high fences
## 273 mirei within high fences
## Nickname Book_title
## 218 shintaro dangerous journey
## 594 rino dangerous journey
## Nickname Book_title
## 122 moeka sadie's big day at the office
## 165 fumiya sadie’s big day at the office
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
## [1] Nickname Book_title
## <0 rows> (or 0-length row.names)
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
## [1] Nickname Book_title
## <0 rows> (or 0-length row.names)
reading[grep("Rick Evelyn and Alex found the gold bracelet of Anubis in Egypt ruins.", reading$Summary),][3:4]
## Nickname Book_title
## 162 fumiya the mummy returns
## 186 yuto_s the mummy returns
reading[grep("When Lisa on the balcony,she saw two peaple on the balcony of the hotel Astra.they quarrel.", reading$Summary),][3:4]
## Nickname Book_title
## 604 mri shootingstars
## 608 mri lisa and alice are good friends
reading <- reading[!(reading$Nickname == 'mri' & reading$Teacher_Assessment==0),]
dim(reading)
## [1] 638 16
reading[grep("For example, They give their mother a christmas present and be provided the dinner for Lawrence of neighbor.", reading$Summary),][3:4]
## Nickname Book_title
## 585 tmk little women
## 588 tmk little woman
reading <- reading[!(reading$Nickname=="tmk" & reading$Book_title=="little woman"),]
dim(reading)
## [1] 637 16
reading[grep("The public high school they attended was low-priced because they had little money and were not smart.", reading$Summary),][3:4]
## Nickname Book_title
## 5 mako who sir? me,sir?
## 12 mako who,sir? me,sir?'
identical(reading$Summary[8], reading$Summary[9])
## [1] FALSE
reading <- reading[!(reading$Nickname=="mako" & reading$Book_title=="who,sir? me,sir?'"),]
dim(reading)
## [1] 636 16
reading[grep("His name was Dorian and he was young and very beautiful.", reading$Summary),]
## X Time Nickname Book_title Author
## 589 589 2018-05-12 00:08:56 rino the picture of dorian gray oscar wild
## 592 592 2018-06-08 01:44:21 rino the picture of dorian gray oscar wild
## Publisher Genre
## 589 Macmillan historical fiction
## 592 Macmillan mystery
## Summary
## 589 Henly and Basil were good friends who studied at the university.\nbaysil was the artist I coud paint the best picture ever.\nWhich is because of a boy.His name was Dorian and he was young and very beautiful.Also Baysil refused.Henly met Dolian and talk about his life in various ways.\nBaysil drew a picture of Dolian,but he was not pleased.\nBecause he thought that he was young and beautiful so that sorrounding people cared about himself,and he hopes that the painting should take his age instead of himself.\nthat wish will come true.\nThe picture was wrinkled after a day and it became ugly and did bad things and become cruei face.But he remainded young aand beautiful.\nhe kept hiding the picture,but he was afraid to find it by second and tried to break the picture.\nWhen he broke the picture he returned to the real form and died.\n
## 592 Henry and Basil was good friends who studeied at the university.\nBayzil was the artist.I could paint the best picture ever.\nWhich is because of a boy.\nHis name was Dorian and he was young and very beautiful. Althou Baysil refused.\nHenry met Dorian and talk about his life in various ways.\nBAysil was drew a picture of Dorian, but he was not pleased.\nBecause he thought that he was young and bwautiful so that sorrounding people cared about hinself, and he hopes that the painting should take his age instead of himself.\nTHat wish will come true.\nThe picture was wrinkled after a day and it become ugly and did bad things and became crusl face.\nBut he remainded young and beautiful.\nHe kept hiding the picture, but he was afraid to find it by second and tried and break the picture.\nWhen he broke the picture he turened to the real form and died.\n
## Opinion
## 589 this story was very sad.\nI didn't think there would be such a story.\nIt seems not to be very popular in Japan.\n
## 592 I think this story was sad story.\nIt is because he thinks that Basil wanted to purely painting.\nAnd I think Dorian was just proud of his beauty and youth.So, I think Henly is bad people.\nI don't like people like Henly.\nDorian reflected on bad things, but in the end his died.\nI though it was a mirror cares.\nAnd it was a piece to Dorian who had a free way of living.\nSO, I think that I wanted to be a human being who can distinguish between mistakes.\nFinaly, there are not so many bad-end works in Japan.\nSO,I think very interested.\n \n
## Stars Campus Gender Teacher_Assessment Plagiarism_Source
## 589 2 Saitama Female 2
## 592 4 Saitama Female 3
## Teacher_Assessment_2 Period
## 589 2 <NA>
## 592 3 Niiza period 4 (3:00pm to 4:30pm)
reading <- reading[!(reading$Nickname=="rino" & reading$Book_title == "the picture of dorian gray" & reading$Genre == "historical fiction"),]
dim(reading)
## [1] 635 16
reading[grep("She lived with Tom of her husband.", reading$Summary),][3:8]
## Nickname Book_title Author Publisher Genre
## 269 mirei within high fences penny hancock Cambridge romance
## 273 mirei within high fences penny hancock Cambridge romance
## Summary
## 269 She lived with Tom of her husband. He gave her anything which she wanted. But he was often angry with her.\nOne day she met the men which is a asylum seeker in the centre which she worked in. We talked a lot of times. She knew that she love he and not Tom.\nShe decided to leave Tom. At the time, Tom said me that George just wants her money and her passport. But she didn’t believe it. She started live alone. George came to there everyday. They loved each other.\nThen she had to go Scotland because of her friend’s weeding. When she came back to the home, some of her things weren’t there. The police thought George took things from her flat. But she didn’t believe it.\nAfter that her friend invited her to the party. At the party she met Tom and went to his house. She found her things in the house. It’s Tom!\nShe went to meet George. She could meet him. They were fall in love.
## 273 She lived with Tom of her husband. He gave her anything which she wanted. But he was often angry with her.\nOne day she met the men which is a asylum seeker in the centre which she worked in. They talked a lot of times. She knew that she love he and not Tom.\nShe decided to leave Tom. At the time, Tom said her that George just wants her money and her passport. But she didn’t believe it. She started live alone. George came to there everyday. They loved each other.\nThen she had to go Scotland because of her friend’s weeding. When she came back to the home, some of her things weren’t there. The police thought George took things from her flat. But she didn’t believe it.\nAfter that her friend invited her to the party. At the party she met Tom and went to his house. She found her things in the house. It’s Tom!\nShe went to meet George. She could meet him. They were fall in love.
reading[reading$Nickname == 'mirei' & reading$Book_title == 'within high fences',]
## X Time Nickname Book_title Author
## 269 269 2018-06-29 13:29:50 mirei within high fences penny hancock
## 273 273 2018-06-29 13:40:42 mirei within high fences penny hancock
## Publisher Genre
## 269 Cambridge romance
## 273 Cambridge romance
## Summary
## 269 She lived with Tom of her husband. He gave her anything which she wanted. But he was often angry with her.\nOne day she met the men which is a asylum seeker in the centre which she worked in. We talked a lot of times. She knew that she love he and not Tom.\nShe decided to leave Tom. At the time, Tom said me that George just wants her money and her passport. But she didn’t believe it. She started live alone. George came to there everyday. They loved each other.\nThen she had to go Scotland because of her friend’s weeding. When she came back to the home, some of her things weren’t there. The police thought George took things from her flat. But she didn’t believe it.\nAfter that her friend invited her to the party. At the party she met Tom and went to his house. She found her things in the house. It’s Tom!\nShe went to meet George. She could meet him. They were fall in love.
## 273 She lived with Tom of her husband. He gave her anything which she wanted. But he was often angry with her.\nOne day she met the men which is a asylum seeker in the centre which she worked in. They talked a lot of times. She knew that she love he and not Tom.\nShe decided to leave Tom. At the time, Tom said her that George just wants her money and her passport. But she didn’t believe it. She started live alone. George came to there everyday. They loved each other.\nThen she had to go Scotland because of her friend’s weeding. When she came back to the home, some of her things weren’t there. The police thought George took things from her flat. But she didn’t believe it.\nAfter that her friend invited her to the party. At the party she met Tom and went to his house. She found her things in the house. It’s Tom!\nShe went to meet George. She could meet him. They were fall in love.
## Opinion
## 269 If something was happend, the chief character believed George. I think they bacame happy because of it. I could see a big love in it.
## 273 If something was happend, the chief character believed George. I think they bacame happy because of it. I could see a big love in it.
## Stars Campus Gender Teacher_Assessment Plagiarism_Source
## 269 4 Saitama Female 2
## 273 4 Saitama Female 2
## Teacher_Assessment_2 Period
## 269 2 Niiza period 3 (1:15pm to 2:45pm)
## 273 2 Niiza period 3 (1:15pm to 2:45pm)
Cinque began to broke the other slave`s chains.
reading[grep("Cinque began to broke the other slave`s chains.", reading$Summary),][3:8]
## Nickname Book_title Author Publisher Genre
## 318 nono amistad a stenen spielberg Penguin historical fiction
## 322 nono amisted a stenen spielberg Penguin historical fiction
## Summary
## 318 In the 19th century, there was slave trading. Cinque was a man who lived in Sierra Leone and was kidnapped as a slave. In the shiip to Cuba, there was a big storm which broke Cinque`s chains. Cinque began to broke the other slave`s chains. Soon after the chains were broken, the slaves started to kill the ship mans who was sailing them to their colony. they had bloody battle on the ship, finally the slaves got win and they made two spansh man were captured so that they can sail to their country. However two of them told Cinque wrong way. it was a way to America. Cinque and his clue were catured in Connecticut. As they were suspected of the murdering, they went to the court. In the court, as Spansh Queen requested that back them to Spain as they are her slaves, the court was once stopped. but again the court was held and Cinque and American were debated. The point of the court focused on where they wewe born in. Cinque`s side, the young lowyer, Baldwin said trading native African is illegal, so we should free them. On the other hand, the American side which including president`s intending asked the proof of their born because president agreed with the slavery, he was afraid of losing his popular for the election. the debate was a long battle, but in the end the voice of Cinque through the transrater moved the juries, and they got freedom. As their race was different,Cinque and Baidwin realized the value of freedom as America got the freedom.
## 322 In the 19th century, there was slave trading. Cinque was a man who lived in Sierra Leone and was kidnapped as a slave. In the shiip to Cuba, there was a big storm which broke Cinque`s chains. Cinque began to broke the other slave`s chains. Soon after the chains were broken, the slaves started to kill the ship mans who was sailing them to their colony. they had bloody battle on the ship, finally the slaves got win and they made two spansh man were captured so that they can sail to their country. However two of them told Cinque wrong way. it was a way to America. Cinque and his clue were catured in Connecticut. As they were suspected of the murdering, they went to the court. In the court, as Spansh Queen requested that back them to Spain as they are her slaves, the court was once stopped. but again the court was held and Cinque and American were debated. The point of the court focused on where they wewe born in. Cinque`s side, the young lowyer, Baldwin said trading native African is illegal, so we should free them. On the other hand, the American side which including president`s intending asked the proof of their born because president agreed with the slavery, he was afraid of losing his popular for the election. the debate was a long battle, but in the end the voice of Cinque through the transrater moved the juries, and they got freedom. As their race was different,Cinque and Baidwin realized the value of freedom as America got the freedom before.
reading <- reading[!(reading$Nickname == 'nono' & reading$Book_title == 'amisted'),]
dim(reading)
## [1] 634 16
sentences <- tokens(reading$Summary, what="sentence")
sentencesDf <- as.data.frame(table(unlist(sentences)))
duplicates <- sentencesDf[sentencesDf$Freq>1,]
nrow(duplicates)*2/nrow(sentencesDf)
## [1] 0.01238145
duplicates$Var1
## [1] 1.
## [2] After that her friend invited her to the party.
## [3] Alice hears two customs officers talking.
## [4] At the party she met Tom and went to his house.
## [5] Basketball is Michael's life.
## [6] But he was often angry with her.
## [7] But she didn't believe it.
## [8] George came to there everyday.
## [9] He arrived at the beach.
## [10] He gave her anything which she wanted.
## [11] He is a private detective of Los Angeles.
## [12] he thinks.
## [13] He was disappointed.
## [14] He was very sad.
## [15] His name is David.
## [16] His name is Mister Fruit.
## [17] His name is Toby.
## [18] However, there was no people.
## [19] It is Lisa's first visit to London.
## [20] It's Tom!
## [21] Michael Jordan is a basketball player.
## [22] Next year, Mister Fruit is Miss Fruit.
## [23] One day she met the men which is a asylum seeker in the centre which she worked in.
## [24] People in every country know his name.
## [25] Rick Evelyn and Alex found the gold bracelet of Anubis in Egypt ruins.
## [26] She could meet him.
## [27] She decided to leave Tom.
## [28] She found her things in the house.
## [29] She knew that she love he and not Tom.
## [30] She lived with Tom of her husband.
## [31] She loved him too.
## [32] She started live alone.
## [33] She went to meet George.
## [34] The hero of this story is Lenny Samuel.
## [35] The police thought George took things from her flat.
## [36] Then she had to go Scotland because of her friend's weeding.
## [37] There is a new man at the market.
## [38] They are happy.
## [39] They danced together.
## [40] They loved each other.
## [41] They want to be David Beckham.
## [42] They went to the hotel.
## [43] They were fall in love.
## [44] They were looking for iron and other metals.
## [45] This is the story of one day at the office.
## [46] This time , I'll write about "Julius Caesar".
## [47] When she came back to the home, some of her things weren't there.
## 7592 Levels: 1804,Britain was at war with France. ...
reading$Month <- as.factor(months(reading$Time))
table(reading$Month)
##
## April July June May
## 76 202 168 187
reading$Week = cut(reading$Time, breaks="weeks")
levels(reading$Week) <- paste("W", 1:nlevels(reading$Week), sep="")
table(reading$Week)
##
## W1 W2 W3 W4 W5 W6 W7 W8 W9 W10 W11 W12 W13 W14 W15
## 1 25 45 44 43 49 38 34 26 28 55 49 41 43 112
table(table(reading$Week, reading$Nickname))
##
## 0 1 2 3 4 5 6 7
## 438 348 53 20 16 6 3 1
reading$Day <- as.factor(weekdays(reading$Time))
sort(table(reading$Day))
##
## Saturday Sunday Wednesday Monday Tuesday Thursday Friday
## 50 51 51 82 87 134 178
Tokyo <- reading[reading$Campus=="Tokyo",]
Tokyo$Nickname <- factor(Tokyo$Nickname)
Saitama <- reading[reading$Campus=="Saitama",]
Saitama$Nickname <- factor(Saitama$Nickname)
table(Tokyo$Nickname)
##
## atsuhito fumiya hide jun kakuto koharu mai mako
## 14 14 11 10 18 12 3 11
## miku moeka rio ryo sena shimpei shintaro taichi
## 12 13 10 2 12 12 10 14
## tomo tomoya yuto_s yuto_y
## 1 19 17 16
SaitamaPeriod3 <- subset(reading, reading$Period=="Niiza period 3 (1:15pm to 2:45pm)")
SaitamaPeriod3$Nickname <- factor(SaitamaPeriod3$Nickname)
SaitamaPeriod4 <- subset(reading, reading$Period=="Niiza period 4 (3:00pm to 4:30pm)")
SaitamaPeriod4$Nickname <- factor(SaitamaPeriod4$Nickname)
table(SaitamaPeriod3$Nickname)
##
## asahi aya ayumi chacha genki harumi kazuma kouki manabu mayu
## 12 11 10 3 11 14 11 10 10 14
## miki mirei moena natsu nono sakura sera yuki yumi yuuka
## 9 13 12 12 13 12 14 9 10 14
table(SaitamaPeriod4$Nickname)
##
## alice atsushi eishi kandai kasumi kensuke mike minako minami
## 11 6 8 10 8 8 11 9 9
## mri richard rino saki shine shun sit tmk yuji
## 12 3 8 8 11 10 7 14 6
## yuna
## 16
table(reading$Nickname, reading$Week)
##
## W1 W2 W3 W4 W5 W6 W7 W8 W9 W10 W11 W12 W13 W14 W15
## alice 0 1 0 2 1 0 0 1 0 0 2 0 0 0 4
## asahi 0 1 0 1 0 1 0 1 0 0 3 0 0 0 6
## atsuhito 0 0 1 1 1 1 0 1 2 0 0 0 1 1 5
## atsushi 0 0 0 0 0 0 1 0 0 0 1 0 1 2 1
## aya 0 1 1 1 1 1 1 1 1 1 0 0 0 0 2
## ayumi 0 0 1 1 1 0 1 1 0 1 0 1 0 0 3
## chacha 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0
## eishi 0 1 1 0 1 0 0 0 1 0 0 2 1 1 0
## fumiya 0 0 1 1 1 1 1 1 0 1 0 0 0 3 4
## genki 0 0 1 0 0 4 1 1 1 1 0 1 0 1 0
## harumi 1 0 1 1 1 1 1 3 0 1 4 0 0 0 0
## hide 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0
## jun 0 1 0 2 1 0 2 0 0 1 0 1 0 1 1
## kakuto 0 0 2 0 1 1 1 1 1 0 2 2 3 4 0
## kandai 0 0 1 1 1 1 0 0 0 1 0 0 1 0 4
## kasumi 0 0 2 1 0 0 0 0 0 1 0 0 0 0 4
## kazuma 0 0 0 0 4 1 1 0 1 1 0 1 1 1 0
## kensuke 0 1 0 1 1 0 1 0 1 0 0 0 1 0 2
## koharu 0 0 1 0 1 1 1 0 2 1 1 3 0 0 1
## kouki 0 1 1 1 1 1 1 0 1 1 0 1 1 0 0
## mai 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0
## mako 0 0 1 1 1 1 1 0 1 1 1 2 0 0 1
## manabu 0 0 0 1 0 1 1 1 0 1 0 0 1 1 3
## mayu 0 1 1 1 1 1 1 1 2 1 1 2 1 0 0
## mike 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1
## miki 0 1 1 1 1 1 1 1 0 0 0 0 1 1 0
## miku 0 0 1 1 0 1 1 2 1 0 5 0 0 0 0
## minako 0 0 1 0 1 1 1 0 1 0 1 1 0 0 2
## minami 0 1 1 1 0 1 0 1 0 0 0 0 2 0 2
## mirei 0 1 1 1 1 0 2 0 0 1 1 1 0 1 3
## moeka 0 0 1 1 2 0 1 1 0 0 7 0 0 0 0
## moena 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0
## mri 0 0 0 0 0 1 2 0 0 0 0 0 0 5 4
## natsu 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1
## nono 0 1 1 1 1 0 0 0 0 0 0 0 3 2 4
## richard 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3
## rino 0 0 0 0 1 0 0 0 1 0 2 0 1 0 3
## rio 0 0 1 1 0 1 0 0 0 0 2 2 0 1 2
## ryo 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0
## saki 0 1 0 1 0 0 2 0 0 0 1 1 0 0 2
## sakura 0 1 1 1 1 1 1 1 1 1 1 0 1 0 1
## sena 0 0 1 2 0 0 0 0 0 0 1 1 0 3 4
## sera 0 1 1 1 1 2 1 0 1 1 3 1 0 0 1
## shimpei 0 1 0 2 1 0 1 2 0 2 1 1 1 0 0
## shine 0 0 1 0 0 0 0 0 0 0 1 1 3 3 2
## shintaro 0 0 2 1 1 1 1 0 0 1 2 0 0 1 0
## shun 0 0 0 0 0 0 0 1 1 0 0 0 0 2 6
## sit 0 0 0 0 0 1 0 0 0 0 0 0 0 0 6
## taichi 0 0 1 1 1 1 0 1 0 0 1 2 3 0 3
## tmk 0 0 0 0 0 4 0 1 0 0 1 1 2 0 5
## tomo 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
## tomoya 0 0 0 0 1 2 0 1 0 0 2 3 3 4 3
## yuji 0 0 1 0 0 2 0 0 0 1 0 0 0 1 1
## yuki 0 1 1 1 1 1 1 0 0 1 0 0 2 0 1
## yumi 0 1 1 1 1 1 1 0 2 1 1 0 1 0 0
## yuna 0 1 1 1 1 1 1 1 1 1 1 1 0 0 5
## yuto_s 0 0 1 1 1 1 1 2 0 0 0 5 2 1 2
## yuto_y 0 0 1 1 1 1 1 2 0 0 2 4 2 1 0
## yuuka 0 1 1 1 1 1 0 0 0 0 0 4 0 1 4
library(ggplot2)
heatmap <- as.data.frame(table(reading$Week, reading$Nickname))
figure1 <- ggplot(aes(x=Var1, y=Var2, fill = Freq), data=heatmap) +
geom_tile() +
ggtitle("Homework Assignments Per Student Per Week") +
scale_fill_gradient(low="white", high="purple") +
ylab("Students") +
xlab("Week") +
theme(axis.text.y=element_blank(),
axis.ticks.y = element_blank())
figure1
round(tapply(reading$opinionTokens, reading$Week, mean))
## W1 W2 W3 W4 W5 W6 W7 W8 W9 W10 W11 W12 W13 W14 W15
## 54 46 41 35 40 33 37 39 65 57 58 67 61 60 69
ggplot(aes(x = Week, y = opinionTokens), data = reading) +
geom_point() +
ggtitle("Opinion Word Counts per Student per Week") +
ylab("Opinion Word Counts")
mean(reading$summaryTokens)
## [1] 167.6556
mean(reading$opinionTokens)
## [1] 52.54818
library(ggplot2)
ggplot() +
geom_histogram(aes(x=opinionTokens), data = reading, binwidth=10, fill = "red") +
geom_histogram(aes(x=summaryTokens), data = reading, binwidth=10, fill = "green") +
scale_x_continuous(breaks = seq(0,1200,100)) +
xlab("Word Count Per Student") +
ylab("Number of Assignments") +
ggtitle("Opinion and Summary Word Counts")
t.test(reading$summaryTokens, reading$OpinionTokens)
##
## One Sample t-test
##
## data: reading$summaryTokens
## t = 47.688, df = 632, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 160.7518 174.5594
## sample estimates:
## mean of x
## 167.6556
library(effsize)
cohen.d(reading$opinionTokens, reading$summaryTokens, conf.level=0.95)
##
## Cohen's d
##
## d estimate: -1.701087 (large)
## 95 percent confidence interval:
## inf sup
## -1.829770 -1.572405
tapply(reading$totalTokens, reading$Campus, mean)
## Saitama Tokyo
## 242.0448 182.1948
tapply(reading$totalTokens, reading$Campus, sd)
## Saitama Tokyo
## 112.19067 70.33851
ggplot(aes(x=totalTokens), data=reading) +
geom_histogram(aes(fill=Campus), binwidth=10) +
scale_x_continuous(breaks = seq(0,1500,100)) +
xlab("Word Count Per Student") +
ylab("Number of Assignments") +
ggtitle("Homework Assignment Word Count per Campus")
tapply(reading$totalTokens, reading$Campus, median)
## Saitama Tokyo
## 223.5 173.0
t.test(Saitama$totalTokens, Tokyo$totalTokens)
##
## Welch Two Sample t-test
##
## data: Saitama$totalTokens and Tokyo$totalTokens
## t = 8.2422, df = 626.3, p-value = 9.927e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 45.59031 74.10963
## sample estimates:
## mean of x mean of y
## 242.0448 182.1948
cohen.d(Saitama$totalTokens, Tokyo$totalTokens, conf.level=0.95)
##
## Cohen's d
##
## d estimate: 0.604507 (medium)
## 95 percent confidence interval:
## inf sup
## 0.4389794 0.7700347
tapply(reading$totalTokens, reading$Gender, mean)
## Female Male
## 243.4357 192.9003
tapply(reading$totalTokens, reading$Gender, sd)
## Female Male
## 115.76250 77.44752
ggplot(aes(x=totalTokens), data=reading) +
geom_histogram(aes(fill=Gender), binwidth=10) +
scale_x_continuous(breaks = seq(0,1500,100)) +
xlab("Word Count Per Student") +
ylab("Number of Assignments") +
ggtitle("Homework Assignment Word Count per Gender")
tapply(reading$totalTokens, reading$Gender, median)
## Female Male
## 224.5 182.0
females <- subset(reading, Gender=="Female")
males <- reading[reading$Gender!="Female",]
t.test(females$totalTokens, males$totalTokens)
##
## Welch Two Sample t-test
##
## data: females$totalTokens and males$totalTokens
## t = 6.5352, df = 599.16, p-value = 1.36e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 35.34865 65.72201
## sample estimates:
## mean of x mean of y
## 243.4357 192.9003
cohen.d(females$totalTokens, males$totalTokens, conf.level=0.95)
##
## Cohen's d
##
## d estimate: 0.5053861 (medium)
## 95 percent confidence interval:
## inf sup
## 0.3463100 0.6644623
table(reading$Campus)
##
## Saitama Tokyo
## 402 231
reading %>%
group_by(Campus) %>%
summarize(count = n_distinct(Nickname))
## # A tibble: 2 x 2
## Campus count
## <fct> <int>
## 1 Saitama 39
## 2 Tokyo 20
observed_postings_campus <- c(160, 260)
expected_postings_campus <- c(20, 38)
expected_probs_campus <- prop.table(expected_postings_campus)
chisq.test(observed_postings_campus, p=expected_probs_campus)
##
## Chi-squared test for given probabilities
##
## data: observed_postings_campus
## X-squared = 2.4261, df = 1, p-value = 0.1193
table(reading$Gender)
##
## Female Male
## 342 291
reading %>%
group_by(Gender) %>%
summarize(count = n_distinct(Nickname))
## # A tibble: 2 x 2
## Gender count
## <fct> <int>
## 1 Female 32
## 2 Male 28
observed_postings_gender <- c(245, 175)
expected_postings_gender <- c(31, 27)
expected_probs_gender <- prop.table(expected_postings_gender)
chisq.test(observed_postings_gender, p=expected_probs_gender)
##
## Chi-squared test for given probabilities
##
## data: observed_postings_gender
## X-squared = 4.0283, df = 1, p-value = 0.04474
reading$Stars <- as.numeric(reading$Stars)
borrowed_often <- select(reading, Book_title, Stars, Nickname) %>%
group_by(Book_title) %>%
summarize(count = n(), average = mean(Stars)) %>%
arrange(desc(count), desc(average)) %>%
head(50)
borrowed_often
## # A tibble: 50 x 3
## Book_title count average
## <fct> <int> <dbl>
## 1 the house on the hill 10 3
## 2 the long tunnel 8 3.5
## 3 anna and the fighter 8 3.38
## 4 dangerous journey 7 3.14
## 5 michael jordan 7 3
## 6 newspaper boy 7 2.71
## 7 lisa in london 7 2.43
## 8 marco 7 2.29
## 9 the jungle book 6 3.5
## 10 the missing coins 6 2.33
## # ... with 40 more rows
popular_genres <- select(reading, Genre) %>%
group_by(Genre) %>%
summarize(count = n()) %>%
arrange(desc(count)) %>%
head(10)
popular_genres
## # A tibble: 10 x 2
## Genre count
## <fct> <int>
## 1 other 107
## 2 fantasy 98
## 3 action adventure 78
## 4 mystery 70
## 5 romance 66
## 6 non-fiction 63
## 7 children's literature 39
## 8 biography 35
## 9 historical fiction 28
## 10 classical literature 19
ggplot(data=reading, aes(x=reorder(Genre,Genre,
function(x)+length(x)))) +
geom_bar() +
theme(axis.text.x = element_text(size = 12, angle = 90, hjust=1),
axis.ticks.x = element_blank(),
axis.title.x = element_blank()) +
ggtitle("Genre Selection") +
ylab("Books Borrowed")
chisqReadingGenre <- chisq.test(table(reading$Genre))
chisqReadingGenre
##
## Chi-squared test for given probabilities
##
## data: table(reading$Genre)
## X-squared = 282.11, df = 12, p-value < 2.2e-16
sort(chisqReadingGenre$stdres)
##
## young adult science fiction sport
## -6.069644 -5.622165 -5.622165
## classical literature historical fiction biography
## -4.428889 -3.086454 -2.042338
## children's literature non-fiction romance
## -1.445700 2.134128 2.581606
## mystery action adventure fantasy
## 3.178244 4.371520 7.354710
## other
## 8.697145
top_publishers <- select(reading, Publisher) %>%
group_by(Publisher) %>%
summarize(count = n()) %>%
arrange(desc(count)) %>%
head(6)
top_publishers
## # A tibble: 6 x 2
## Publisher count
## <fct> <int>
## 1 Penguin 225
## 2 Macmillan 170
## 3 Pearson 96
## 4 Oxford 80
## 5 Cambridge 52
## 6 Other 9
table(reading$Stars)
##
## 1 2 3 4 5
## 114 118 141 167 93
mean(reading$Stars)
## [1] 3.011058
reading$Stars <- as.numeric(reading$Stars)
ggplot(aes(x=Genre, y=Stars), data=reading) +
geom_violin() +
theme(axis.text.x = element_text(angle = 90),
axis.ticks.x=element_blank()) +
ggtitle("Student Ratings")
reading$Stars <- as.numeric(reading$Stars)
aovStarRatings <- aov(Stars ~ Genre, data=reading)
summary(aovStarRatings)
## Df Sum Sq Mean Sq F value Pr(>F)
## Genre 12 17.1 1.424 0.806 0.645
## Residuals 620 1095.8 1.768
top100 <- select(reading, Book_title, Stars, Nickname) %>%
arrange(desc(Stars)) %>%
head(100)
top100
## Book_title Stars Nickname
## 1 mother teresa 5 yuto_y
## 2 tales from hans andersen 5 yuto_y
## 3 flying home 5 yuto_y
## 4 love or monet 5 tomoya
## 5 american life 5 tomo
## 6 round the world in eighty days"" 5 tomoya
## 7 daniel radcliffe 5 tomoya
## 8 les misérables 5 tomoya
## 9 the amazon rain forest 5 tomoya
## 10 david beckham 5 tomoya
## 11 michael jordan 5 hide
## 12 barack obama 5 hide
## 13 american life 5 hide
## 14 hannah and the hurricane 5 hide
## 15 robin hood 5 hide
## 16 the mummy returns 5 yuto_s
## 17 the lost ship 5 yuto_s
## 18 book boy 5 miku
## 19 the phantom of the opera 5 miku
## 20 the ring 5 harumi
## 21 the fox volpone 5 mirei
## 22 the importance of being earnest 5 mirei
## 23 the scarlet letter 5 mirei
## 24 simply suspense 5 mirei
## 25 the piano man 5 moena
## 26 the black cat and other stories 5 moena
## 27 jaws 5 moena
## 28 fly away home 5 moena
## 29 anne frank 5 moena
## 30 marley & me 5 genki
## 31 the wave 5 nono
## 32 the adventure of huckleberry finn 5 nono
## 33 the jungle book 5 asahi
## 34 the canterville ghost 5 asahi
## 35 the piano 5 asahi
## 36 red dog 5 asahi
## 37 the children of the new forest 5 asahi
## 38 simply suspence 5 yumi
## 39 death in the freezer 5 yumi
## 40 the black tulip 5 natsu
## 41 k's first case 5 natsu
## 42 the merchant of venice 5 chacha
## 43 the return of sherlock holmes 5 sakura
## 44 the canterville ghost 5 sakura
## 45 tales from the arabian nights 5 aya
## 46 babe 5 aya
## 47 jim smiley and his jumping frog and other stories 5 aya
## 48 sherlock holmes and the mystery of boscombe pool 5 aya
## 49 different worlds 5 yuuka
## 50 mother teresa 5 sera
## 51 pirates of caribbean 5 sera
## 52 pele 5 sera
## 53 washington square 5 kazuma
## 54 stories from shakespere 5 miki
## 55 five famous faury tales 5 miki
## 56 anna and the fighter 5 miki
## 57 lost love and other stories 5 miki
## 58 pirates of the caribbean. 5 mayu
## 59 jumanji 5 mayu
## 60 alice in wonderland 5 mayu
## 61 billy elliot 5 mayu
## 62 pele 5 manabu
## 63 karen and the artist 5 manabu
## 64 the missing coins 5 manabu
## 65 michel jordan 5 manabu
## 66 walkabout 5 shine
## 67 simon and spy 5 shine
## 68 white fang 5 shine
## 69 dracula 5 kasumi
## 70 rich man,poor man 5 kensuke
## 71 marcel and the mona lisa 5 kensuke
## 72 pete and the pirates 5 kensuke
## 73 the last of the mohicans 5 kensuke
## 74 the battle of newton road 5 kensuke
## 75 the long tunnel 5 minako
## 76 wuthering heights 5 minako
## 77 pele 5 minako
## 78 superbird 5 minako
## 79 photo finish 5 sit
## 80 l.a detective 5 minami
## 81 the magic barber 5 minami
## 82 the trumpet-major 5 minami
## 83 billy budd 5 minami
## 84 around the world in eighty days 5 minami
## 85 the gift of the magi 5 rino
## 86 dangerous journey 5 rino
## 87 anna and the fighter 5 rino
## 88 michael jordan 5 richard
## 89 night at the museum 5 shun
## 90 michael jordan 5 mike
## 91 ali and his camera 5 mike
## 92 surfer! 5 mike
## 93 the wizard of oz 5 kandai
## 94 the long tunnel 4 yuto_y
## 95 johnny english 4 yuto_y
## 96 rip van winkle and the legend of sleepy hollow 4 yuto_y
## 97 the white oryx 4 yuto_y
## 98 the gift of the magi and other stories 4 yuto_y
## 99 marcel goes to hollywood 4 yuto_y
## 100 girl meets boy 4 yuto_y
table(reading$Genre, reading$Gender)
##
## Female Male
## action adventure 43 35
## biography 14 21
## children's literature 12 27
## classical literature 14 5
## fantasy 65 33
## historical fiction 18 10
## mystery 45 25
## non-fiction 34 29
## other 48 59
## romance 37 29
## science fiction 8 3
## sport 1 10
## young adult 3 5
ggplot(aes(x=Genre), data=reading) +
geom_bar(aes(fill=Genre)) +
facet_wrap(~Gender) +
ylab("Book Count") +
theme(axis.text.x = element_text(angle = 90),
axis.ticks.x=element_blank()) +
ggtitle("Genre Selection by Gender")
mean(table(reading$Genre, reading$Gender)>=5)
## [1] 0.8846154
chisq.test(reading$Gender, reading$Genre)
## Warning in chisq.test(reading$Gender, reading$Genre): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: reading$Gender and reading$Genre
## X-squared = 39.483, df = 12, p-value = 8.759e-05
table(reading$Genre, reading$Campus)
##
## Saitama Tokyo
## action adventure 51 27
## biography 11 24
## children's literature 27 12
## classical literature 14 5
## fantasy 67 31
## historical fiction 21 7
## mystery 42 28
## non-fiction 37 26
## other 71 36
## romance 44 22
## science fiction 8 3
## sport 4 7
## young adult 5 3
ggplot(aes(x=Genre), data=reading) +
geom_bar(aes(fill=Genre)) +
facet_wrap(~Campus) +
ylab("Book Count") +
theme(axis.text.x = element_text(angle = 90),
axis.ticks.x=element_blank()) +
ggtitle("Genre Selection by Campus")
mean(table(reading$Genre, reading$Campus)>=5)
## [1] 0.8846154
chisq.test(reading$Genre, reading$Campus)
## Warning in chisq.test(reading$Genre, reading$Campus): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: reading$Genre and reading$Campus
## X-squared = 25.209, df = 12, p-value = 0.01386
levels(reading$Nickname)
## [1] "alice" "asahi" "atsuhito" "atsushi" "aya" "ayumi"
## [7] "chacha" "eishi" "fumiya" "genki" "harumi" "hide"
## [13] "jun" "kakuto" "kandai" "kasumi" "kazuma" "kensuke"
## [19] "koharu" "kouki" "mai" "mako" "manabu" "mayu"
## [25] "mike" "miki" "miku" "minako" "minami" "mirei"
## [31] "moeka" "moena" "mri" "natsu" "nono" "richard"
## [37] "rino" "rio" "ryo" "saki" "sakura" "sena"
## [43] "sera" "shimpei" "shine" "shintaro" "shun" "sit"
## [49] "taichi" "tmk" "tomo" "tomoya" "yuji" "yuki"
## [55] "yumi" "yuna" "yuto_s" "yuto_y" "yuuka"
Tokyo %>%
group_by(Gender) %>%
summarize(counts = n_distinct(Nickname)/nlevels(Tokyo$Nickname))
## # A tibble: 2 x 2
## Gender counts
## <fct> <dbl>
## 1 Female 0.3
## 2 Male 0.7
SaitamaPeriod3 %>%
group_by(Gender) %>%
summarize(counts = n_distinct(Nickname)/nlevels(SaitamaPeriod3$Nickname))
## # A tibble: 2 x 2
## Gender counts
## <fct> <dbl>
## 1 Female 0.8
## 2 Male 0.2
SaitamaPeriod4 %>%
group_by(Gender) %>%
summarize(counts = n_distinct(Nickname)/nlevels(SaitamaPeriod4$Nickname))
## # A tibble: 2 x 2
## Gender counts
## <fct> <dbl>
## 1 Female 0.526
## 2 Male 0.526
TClusterDf <- cbind(Ttitle, Tauthor, Tpublisher, Tgenre, Tstars)
library(ggdendro)
figure3 <- ggdendrogram(hclust(dist(TClusterDf)))
figure3
filter(reading, Nickname == "fumiya" | Nickname == "kakuto") %>%
group_by(Nickname) %>%
select(Nickname, Book_title, Author, Publisher, Genre, Stars)
## # A tibble: 32 x 6
## # Groups: Nickname [2]
## Nickname Book_title Author Publisher Genre Stars
## <fct> <fct> <fct> <fct> <fct> <dbl>
## 1 kakuto let me out! antoinette … Cambridge fantasy 1
## 2 kakuto help! philip prow… Cambridge fantasy 1
## 3 kakuto big hair day margeret jo… Cambridge fantasy 1
## 4 kakuto a death in oxford richard mac… Cambridge mystery 1
## 5 kakuto ten long years alan batter… Cambridge mystery 1
## 6 kakuto book boy antoinette … Cambridge biography 1
## 7 kakuto next door to love margaret jo… Cambridge romance 2
## 8 kakuto karen and the artist elizabeth l… Pearson romance 1
## 9 kakuto the leopard and the … anne collins Pearson biography 1
## 10 kakuto the battle of newton… leslie dunk… Pearson historical… 1
## # ... with 22 more rows
SaitamaPeriod3title <- table(SaitamaPeriod3$Nickname, SaitamaPeriod3$Book_title)
SaitamaPeriod3author <- table(SaitamaPeriod3$Nickname, SaitamaPeriod3$Author)
SaitamaPeriod3publisher <- table(SaitamaPeriod3$Nickname, SaitamaPeriod3$Publisher)
SaitamaPeriod3genre <- table(SaitamaPeriod3$Nickname, SaitamaPeriod3$Genre)
SaitamaPeriod3stars <- table(SaitamaPeriod3$Nickname, SaitamaPeriod3$Stars)
SaitamaPeriod3ClusterDf <- cbind(SaitamaPeriod3title, SaitamaPeriod3author, SaitamaPeriod3publisher, SaitamaPeriod3genre, SaitamaPeriod3stars)
ggdendrogram(hclust(dist(SaitamaPeriod3ClusterDf)))
SaitamaPeriod4title <- table(SaitamaPeriod4$Nickname, SaitamaPeriod4$Author)
SaitamaPeriod4author <- table(SaitamaPeriod4$Nickname, SaitamaPeriod4$Author)
SaitamaPeriod4publisher <- table(SaitamaPeriod4$Nickname, SaitamaPeriod4$Publisher)
SaitamaPeriod4genre <- table(SaitamaPeriod4$Nickname, SaitamaPeriod4$Genre)
SaitamaPeriod4stars <- table(SaitamaPeriod4$Nickname, SaitamaPeriod4$Stars)
SaitamaPeriod4ClusterDf <- cbind(SaitamaPeriod4title, SaitamaPeriod4author, SaitamaPeriod4publisher, SaitamaPeriod4genre, SaitamaPeriod4stars)
ggdendrogram(hclust(dist(SaitamaPeriod4ClusterDf)))
summary(reading$Teacher_Assessment)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 3.000 2.754 3.000 4.000
scores <- select(reading, Nickname, Teacher_Assessment) %>%
group_by(Nickname) %>%
summarize(Total = sum(Teacher_Assessment))
scores <- as.data.frame(scores)
summary(scores$Total)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 23.50 31.00 29.54 38.00 48.00
15/nlevels(reading$Week)*summary(scores$Total)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 23.50 31.00 29.54 38.00 48.00
averageNpostings <- nrow(reading)/nlevels(reading$Nickname)
averageNpostings
## [1] 10.72881
###### tapply(reading$Teacher_Assessment, reading$Nickname, sum)
tapply(reading$Teacher_Assessment, reading$Nickname, mean)
## alice asahi atsuhito atsushi aya ayumi chacha
## 2.0909091 3.0769231 2.4285714 3.3333333 3.4545455 3.4000000 0.6666667
## eishi fumiya genki harumi hide jun kakuto
## 3.0000000 2.1428571 2.9090909 3.4285714 3.3636364 3.0000000 2.3333333
## kandai kasumi kazuma kensuke koharu kouki mai
## 2.0000000 2.6250000 3.6363636 2.2500000 3.2500000 2.8000000 3.0000000
## mako manabu mayu mike miki miku minako
## 2.8181818 2.1000000 2.8571429 3.0909091 2.5555556 3.5833333 2.7777778
## minami mirei moeka moena mri natsu nono
## 2.6666667 2.3846154 2.6153846 3.5833333 2.1666667 3.4166667 2.9230769
## richard rino rio ryo saki sakura sena
## 2.0000000 2.3750000 3.2000000 1.5000000 3.1250000 3.3333333 2.0000000
## sera shimpei shine shintaro shun sit taichi
## 2.5714286 2.8333333 3.1818182 3.0000000 2.6000000 2.7142857 2.2142857
## tmk tomo tomoya yuji yuki yumi yuna
## 2.2142857 3.0000000 2.1052632 2.8333333 3.0000000 3.9090909 2.2500000
## yuto_s yuto_y yuuka
## 2.1764706 2.6250000 2.9285714
tapply(reading$Teacher_Assessment, reading$Nickname, sd)
## alice asahi atsuhito atsushi aya ayumi chacha
## 0.3015113 0.7595545 0.5135526 0.8164966 0.5222330 0.5163978 1.1547005
## eishi fumiya genki harumi hide jun kakuto
## 0.7559289 0.5345225 0.5393599 0.7559289 0.6741999 0.4714045 0.4850713
## kandai kasumi kazuma kensuke koharu kouki mai
## 0.0000000 0.7440238 0.5045250 0.4629100 0.6215816 0.4216370 0.0000000
## mako manabu mayu mike miki miku minako
## 0.6030227 0.3162278 0.5345225 0.7006490 0.7264832 0.5149287 0.6666667
## minami mirei moeka moena mri natsu nono
## 0.5000000 0.5063697 0.6504436 0.5149287 0.3892495 0.6685579 0.6405126
## richard rino rio ryo saki sakura sena
## 0.0000000 0.5175492 0.9189366 0.7071068 0.6408699 0.4923660 0.4264014
## sera shimpei shine shintaro shun sit taichi
## 0.5135526 0.7177406 0.8738629 0.4714045 0.6992059 0.7559289 0.5789342
## tmk tomo tomoya yuji yuki yumi yuna
## 0.4258153 NA 0.3153018 0.4082483 0.6666667 0.3015113 0.4472136
## yuto_s yuto_y yuuka
## 0.3929526 0.6191392 0.8287419
table(reading$Teacher_Assessment)
##
## 0 1 2 3 4
## 2 4 255 259 113
round(prop.table(table(reading$Teacher_Assessment))*100)
##
## 0 1 2 3 4
## 0 1 40 41 18
ggplot(aes(x=Teacher_Assessment), data = reading) +
geom_histogram(binwidth=1) +
ggtitle("Homework Assignment Scores") +
ylab("Count") +
xlab("score")
vars <- c("summaryTokens", "opinionTokens", "totalTokens",
"summarySentenceCount", "opinionSentenceCount",
"summaryTypes", "opinionTypes",
"totalSentenceCount", "totalTypes",
"summaryTTR", "opinionTTR")
cor(reading$Teacher_Assessment, reading[,vars])
## summaryTokens opinionTokens totalTokens summarySentenceCount
## [1,] 0.3097547 0.2954774 0.3706123 0.2282441
## opinionSentenceCount summaryTypes opinionTypes totalSentenceCount
## [1,] 0.1973958 0.3314173 0.3191072 0.2551883
## totalTypes summaryTTR opinionTTR
## [1,] 0.4059339 -0.3070815 -0.1738696
cor.test(reading$Teacher_Assessment, reading$totalTypes)
##
## Pearson's product-moment correlation
##
## data: reading$Teacher_Assessment and reading$totalTypes
## t = 11.158, df = 631, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3387204 0.4690253
## sample estimates:
## cor
## 0.4059339
cor.test(reading$Teacher_Assessment, reading$summarySentenceCount)
##
## Pearson's product-moment correlation
##
## data: reading$Teacher_Assessment and reading$summarySentenceCount
## t = 5.8889, df = 631, p-value = 6.323e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1530376 0.3008219
## sample estimates:
## cor
## 0.2282441
ggplot(aes(x=totalTypes, y=Teacher_Assessment), data = reading) +
geom_point() +
stat_smooth(method="lm") +
ggtitle("Correlation of Total Types and\nTeacher Assessment") +
ylab("Teacher Assessment") +
xlab("Total Types")
Saitama %>%
ggplot(aes(totalTypes, Teacher_Assessment, label = Nickname)) +
geom_label()
Tokyo %>%
ggplot(aes(totalTypes, Teacher_Assessment, label = Nickname)) +
geom_label()
linMod1 <- lm(Teacher_Assessment ~ summaryTokens + opinionTokens +
summarySentenceCount + opinionSentenceCount +
summaryTypes + opinionTypes +
summaryTTR + opinionTTR, data = reading)
summary(linMod1)
##
## Call:
## lm(formula = Teacher_Assessment ~ summaryTokens + opinionTokens +
## summarySentenceCount + opinionSentenceCount + summaryTypes +
## opinionTypes + summaryTTR + opinionTTR, data = reading)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.86642 -0.54658 0.03215 0.43953 1.56494
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.1316822 0.4454085 7.031 5.40e-12 ***
## summaryTokens -0.0053186 0.0016522 -3.219 0.00135 **
## opinionTokens 0.0000531 0.0043618 0.012 0.99029
## summarySentenceCount -0.0088255 0.0076783 -1.149 0.25083
## opinionSentenceCount -0.0205608 0.0210449 -0.977 0.32895
## summaryTypes 0.0165202 0.0037764 4.375 1.43e-05 ***
## opinionTypes 0.0127359 0.0066289 1.921 0.05515 .
## summaryTTR -2.8206238 0.4895261 -5.762 1.31e-08 ***
## opinionTTR 0.3669152 0.3505325 1.047 0.29563
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6736 on 624 degrees of freedom
## Multiple R-squared: 0.2213, Adjusted R-squared: 0.2113
## F-statistic: 22.16 on 8 and 624 DF, p-value: < 2.2e-16
linMod2 <- step(linMod1)
## Start: AIC=-491.34
## Teacher_Assessment ~ summaryTokens + opinionTokens + summarySentenceCount +
## opinionSentenceCount + summaryTypes + opinionTypes + summaryTTR +
## opinionTTR
##
## Df Sum of Sq RSS AIC
## - opinionTokens 1 0.0001 283.11 -493.34
## - opinionSentenceCount 1 0.4331 283.54 -492.37
## - opinionTTR 1 0.4971 283.61 -492.23
## - summarySentenceCount 1 0.5994 283.71 -492.00
## <none> 283.11 -491.34
## - opinionTypes 1 1.6747 284.78 -489.60
## - summaryTokens 1 4.7013 287.81 -482.91
## - summaryTypes 1 8.6826 291.79 -474.22
## - summaryTTR 1 15.0629 298.17 -460.52
##
## Step: AIC=-493.34
## Teacher_Assessment ~ summaryTokens + summarySentenceCount + opinionSentenceCount +
## summaryTypes + opinionTypes + summaryTTR + opinionTTR
##
## Df Sum of Sq RSS AIC
## - opinionTTR 1 0.5384 283.65 -494.13
## - opinionSentenceCount 1 0.5582 283.67 -494.09
## - summarySentenceCount 1 0.6086 283.72 -493.98
## <none> 283.11 -493.34
## - summaryTokens 1 4.7287 287.84 -484.85
## - summaryTypes 1 8.7176 291.83 -476.14
## - opinionTypes 1 11.5357 294.64 -470.06
## - summaryTTR 1 15.0722 298.18 -462.50
##
## Step: AIC=-494.13
## Teacher_Assessment ~ summaryTokens + summarySentenceCount + opinionSentenceCount +
## summaryTypes + opinionTypes + summaryTTR
##
## Df Sum of Sq RSS AIC
## - summarySentenceCount 1 0.7064 284.35 -494.56
## <none> 283.65 -494.13
## - opinionSentenceCount 1 1.1171 284.76 -493.65
## - summaryTokens 1 4.9324 288.58 -485.22
## - summaryTypes 1 9.1721 292.82 -475.99
## - opinionTypes 1 11.1592 294.81 -471.71
## - summaryTTR 1 15.1483 298.80 -463.20
##
## Step: AIC=-494.56
## Teacher_Assessment ~ summaryTokens + opinionSentenceCount + summaryTypes +
## opinionTypes + summaryTTR
##
## Df Sum of Sq RSS AIC
## <none> 284.35 -494.56
## - opinionSentenceCount 1 2.3434 286.70 -491.36
## - summaryTokens 1 6.6715 291.02 -481.88
## - summaryTypes 1 9.8731 294.23 -474.95
## - opinionTypes 1 14.2478 298.60 -465.61
## - summaryTTR 1 15.2804 299.63 -463.43
summary(linMod2)
##
## Call:
## lm(formula = Teacher_Assessment ~ summaryTokens + opinionSentenceCount +
## summaryTypes + opinionTypes + summaryTTR, data = reading)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8868 -0.5534 0.0341 0.4499 1.5785
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.258490 0.276254 11.795 < 2e-16 ***
## summaryTokens -0.006025 0.001571 -3.835 0.000138 ***
## opinionSentenceCount -0.036011 0.015842 -2.273 0.023354 *
## summaryTypes 0.017400 0.003729 4.666 3.76e-06 ***
## opinionTypes 0.012813 0.002286 5.605 3.12e-08 ***
## summaryTTR -2.565959 0.442057 -5.805 1.03e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6734 on 627 degrees of freedom
## Multiple R-squared: 0.2179, Adjusted R-squared: 0.2116
## F-statistic: 34.93 on 5 and 627 DF, p-value: < 2.2e-16
tapply(reading$Teacher_Assessment, reading$Gender, mean)
## Female Male
## 2.888889 2.594502
tapply(reading$Teacher_Assessment, reading$Gender, sd)
## Female Male
## 0.7768229 0.7050341
ggplot(reading, aes(x = Teacher_Assessment, y = ..density.., colour=Gender)) +
stat_density(geom="line") +
ggtitle("Homework Assignment Scores per Gender") +
ylab("Density") +
xlab("Score")
Females <- subset(reading, reading$Gender=="Female")
Males <- reading[!reading$Gender=="Female",]
t.test(Females$Teacher_Assessment, Males$Teacher_Assessment)
##
## Welch Two Sample t-test
##
## data: Females$Teacher_Assessment and Males$Teacher_Assessment
## t = 4.9956, df = 628.36, p-value = 7.611e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1786653 0.4101090
## sample estimates:
## mean of x mean of y
## 2.888889 2.594502
cohen.d(Females$Teacher_Assessment, Males$Teacher_Assessment, conf.level=0.95)
##
## Cohen's d
##
## d estimate: 0.3953153 (small)
## 95 percent confidence interval:
## inf sup
## 0.2371912 0.5534394
tapply(reading$Teacher_Assessment, reading$Campus, mean)
## Saitama Tokyo
## 2.830846 2.619048
tapply(reading$Teacher_Assessment, reading$Campus, sd)
## Saitama Tokyo
## 0.7712048 0.7177143
ggplot(reading, aes(x = Teacher_Assessment, y = ..density.., colour = Campus)) +
stat_density(geom="line") +
ggtitle("Homework Assignment Scores per Campus") +
ylab("Density") +
xlab("Score")
t.test(Tokyo$Teacher_Assessment, Saitama$Teacher_Assessment)
##
## Welch Two Sample t-test
##
## data: Tokyo$Teacher_Assessment and Saitama$Teacher_Assessment
## t = -3.4775, df = 508.14, p-value = 0.0005495
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.33145486 -0.09214145
## sample estimates:
## mean of x mean of y
## 2.619048 2.830846
cohen.d(Saitama$Teacher_Assessment, Tokyo$Teacher_Assessment, conf.level=0.95)
##
## Cohen's d
##
## d estimate: 0.281591 (small)
## 95 percent confidence interval:
## inf sup
## 0.1187173 0.4444646
ggplot(data=reading) +
aes(x=Gender, y=Teacher_Assessment, group=Campus, color=Campus) +
stat_summary(fun.y = mean, geom = "line") +
stat_summary(fun.y = mean, geom = "point") +
ggtitle("Effect of Gender and Campus \non Homework Assignment Scores") +
ylab("Score")
genderCampusAov <- aov(Teacher_Assessment~Gender*Campus,data= reading)
summary(genderCampusAov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 13.6 13.626 25.284 6.46e-07 ***
## Campus 1 1.2 1.250 2.319 0.128
## Gender:Campus 1 9.7 9.711 18.020 2.52e-05 ***
## Residuals 629 339.0 0.539
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(genderCampusAov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Teacher_Assessment ~ Gender * Campus, data = reading)
##
## $Gender
## diff lwr upr p adj
## Male-Female -0.2943872 -0.4093563 -0.1794181 6e-07
##
## $Campus
## diff lwr upr p adj
## Tokyo-Saitama -0.08375866 -0.2027791 0.03526181 0.1674773
##
## $`Gender:Campus`
## diff lwr upr p adj
## Male:Saitama-Female:Saitama -0.05358666 -0.259200537 0.1520272 0.9079025
## Female:Tokyo-Female:Saitama 0.23499212 -0.032111769 0.5020960 0.1071212
## Male:Tokyo-Female:Saitama -0.39403391 -0.577770604 -0.2102972 0.0000003
## Female:Tokyo-Male:Saitama 0.28857878 -0.008357544 0.5855151 0.0603387
## Male:Tokyo-Male:Saitama -0.34044725 -0.565360270 -0.1155342 0.0006173
## Male:Tokyo-Female:Tokyo -0.62902604 -0.911255177 -0.3467969 0.0000001
tapply(reading$Teacher_Assessment, reading$Month, mean)
## April July June May
## 2.907895 2.475248 2.946429 2.818182
monthAov <- aov(Teacher_Assessment~Month, data=reading)
summary(monthAov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Month 3 24.5 8.162 15.14 1.59e-09 ***
## Residuals 629 339.1 0.539
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
table(reading$Week)
##
## W1 W2 W3 W4 W5 W6 W7 W8 W9 W10 W11 W12 W13 W14 W15
## 1 25 45 44 43 49 38 34 26 28 55 49 41 43 112
tapply(reading$Teacher_Assessment, reading$Week, mean)
## W1 W2 W3 W4 W5 W6 W7 W8
## 4.000000 2.800000 3.000000 2.818182 3.093023 2.857143 2.552632 2.588235
## W9 W10 W11 W12 W13 W14 W15
## 3.192308 3.071429 2.963636 2.795918 2.560976 2.627907 2.366071
figure4 <- ggplot(data=reading) +
aes(x=Week, y=Teacher_Assessment, group=Campus, color=Campus) +
stat_summary(fun.y = mean, geom = "line") +
stat_summary(fun.y = mean, geom = "point") +
ylab("Average Homework Score") +
ggtitle("Weekly Homework Assignment Scores")
figure4
weekAov <- aov(Teacher_Assessment~Week,data= reading)
summary(weekAov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Week 14 41.8 2.9881 5.74 1.38e-10 ***
## Residuals 618 321.7 0.5206
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(weekAov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Teacher_Assessment ~ Week, data = reading)
##
## $Week
## diff lwr upr p adj
## W2-W1 -1.200000000 -3.70553026 1.305530258 0.9514055
## W3-W1 -1.000000000 -3.48402316 1.484023163 0.9895019
## W4-W1 -1.181818182 -3.66645491 1.302818547 0.9540360
## W5-W1 -0.906976744 -3.39225542 1.578301927 0.9960302
## W6-W1 -1.142857143 -3.62467522 1.338960939 0.9648238
## W7-W1 -1.447368421 -3.93636034 1.041623503 0.8095233
## W8-W1 -1.411764706 -3.90450794 1.080978527 0.8379992
## W9-W1 -0.807692308 -3.31136868 1.695984062 0.9989309
## W10-W1 -0.928571429 -3.42893386 1.571791006 0.9952558
## W11-W1 -1.036363636 -3.51547280 1.442745529 0.9850377
## W12-W1 -1.204081633 -3.68589971 1.277736449 0.9461365
## W13-W1 -1.439024390 -3.92568033 1.047631548 0.8149854
## W14-W1 -1.372093023 -3.85737169 1.113185648 0.8624776
## W15-W1 -1.633928571 -4.10174694 0.833889798 0.6248473
## W3-W2 0.200000000 -0.41285219 0.812852188 0.9987868
## W4-W2 0.018181818 -0.59715257 0.633516204 1.0000000
## W5-W2 0.293023256 -0.32489810 0.910944614 0.9551556
## W6-W2 0.057142857 -0.54670954 0.660995259 1.0000000
## W7-W2 -0.247368421 -0.88005913 0.385322284 0.9920836
## W8-W2 -0.211764706 -0.85905564 0.435526229 0.9987534
## W9-W2 0.392307692 -0.29588712 1.080502500 0.8312707
## W10-W2 0.271428571 -0.40461066 0.947467799 0.9897666
## W11-W2 0.163636364 -0.42898408 0.756256804 0.9998184
## W12-W2 -0.004081633 -0.60793403 0.599770769 1.0000000
## W13-W2 -0.239024390 -0.86246203 0.384413247 0.9934709
## W14-W2 -0.172093023 -0.79001438 0.445828335 0.9997991
## W15-W2 -0.433928571 -0.97738440 0.109527256 0.2958136
## W4-W3 -0.181818182 -0.70270743 0.339071063 0.9975009
## W5-W3 0.093023256 -0.43091949 0.616966004 0.9999993
## W6-W3 -0.142857143 -0.65013115 0.364416867 0.9997712
## W7-W3 -0.447368421 -0.98865090 0.093914063 0.2418081
## W8-W3 -0.411764706 -0.97004309 0.146513678 0.4336210
## W9-W3 0.192307692 -0.41292061 0.797535990 0.9990959
## W10-W3 0.071428571 -0.51994123 0.662798368 1.0000000
## W11-W3 -0.036363636 -0.53021400 0.457486730 1.0000000
## W12-W3 -0.204081633 -0.71135564 0.303192378 0.9895670
## W13-W3 -0.439024390 -0.96946165 0.091412869 0.2397043
## W14-W3 -0.372093023 -0.89603577 0.151849725 0.5020713
## W15-W3 -0.633928571 -1.06755679 -0.200300349 0.0000839
## W5-W4 0.274841438 -0.25200257 0.801685443 0.9068393
## W6-W4 0.038961039 -0.47130901 0.549231089 1.0000000
## W7-W4 -0.265550239 -0.80964153 0.278541049 0.9435441
## W8-W4 -0.229946524 -0.79094862 0.331055576 0.9875490
## W9-W4 0.374125874 -0.23361576 0.981867509 0.7371777
## W10-W4 0.253246753 -0.34069503 0.847188534 0.9820655
## W11-W4 0.145454545 -0.35147280 0.642381891 0.9996421
## W12-W4 -0.022263451 -0.53253350 0.488006599 1.0000000
## W13-W4 -0.257206208 -0.79050939 0.276096978 0.9486180
## W14-W4 -0.190274841 -0.71711885 0.336569163 0.9964334
## W15-W4 -0.452110390 -0.88923970 -0.014981075 0.0344487
## W6-W5 -0.235880399 -0.74926712 0.277506319 0.9654824
## W7-W5 -0.540391677 -1.08740697 0.006623613 0.0567763
## W8-W5 -0.504787962 -1.06862637 0.059050449 0.1381981
## W9-W5 0.099284436 -0.51107635 0.709645226 0.9999998
## W10-W5 -0.021594684 -0.61821620 0.575026834 1.0000000
## W11-W5 -0.129386892 -0.62951406 0.370740276 0.9999163
## W12-W5 -0.297104888 -0.81049161 0.216281829 0.8149527
## W13-W5 -0.532047646 -1.06833366 0.004238366 0.0543394
## W14-W5 -0.465116279 -0.99497947 0.064746914 0.1606572
## W15-W5 -0.726951827 -1.16771530 -0.286188359 0.0000031
## W7-W6 -0.304511278 -0.83558250 0.226559946 0.8250790
## W8-W6 -0.268907563 -0.81729125 0.279476127 0.9414245
## W9-W6 0.335164835 -0.26094859 0.931278256 0.8452707
## W10-W6 0.214285714 -0.36775222 0.796323646 0.9956565
## W11-W6 0.106493506 -0.37614311 0.589130122 0.9999881
## W12-W6 -0.061224490 -0.55758811 0.435139127 1.0000000
## W13-W6 -0.296167247 -0.81618033 0.223845834 0.8322157
## W14-W6 -0.229235880 -0.74262260 0.284150837 0.9729721
## W15-W6 -0.491071429 -0.91188416 -0.070258699 0.0068858
## W8-W7 0.035603715 -0.54438290 0.615590334 1.0000000
## W9-W7 0.639676113 0.01436738 1.264984850 0.0390297
## W10-W7 0.518796992 -0.09310820 1.130702181 0.2052529
## W11-W7 0.411004785 -0.10725954 0.929269108 0.3070089
## W12-W7 0.243286788 -0.28778444 0.774358013 0.9663318
## W13-W7 0.008344031 -0.54489499 0.561583048 1.0000000
## W14-W7 0.075275398 -0.47173989 0.622290688 1.0000000
## W15-W7 -0.186560150 -0.64780104 0.274680740 0.9890197
## W9-W8 0.604072398 -0.03600497 1.244149771 0.0883850
## W10-W8 0.483193277 -0.14379635 1.110182900 0.3556331
## W11-W8 0.375401070 -0.16058954 0.911391681 0.5270417
## W12-W8 0.207683073 -0.34070062 0.756066764 0.9942221
## W13-W8 -0.027259684 -0.59713812 0.542618756 1.0000000
## W14-W8 0.039671683 -0.52416673 0.603510094 1.0000000
## W15-W8 -0.222163866 -0.70323682 0.258909087 0.9639699
## W10-W9 -0.120879121 -0.79001478 0.548256535 0.9999991
## W11-W9 -0.228671329 -0.81340415 0.356061494 0.9920655
## W12-W9 -0.396389325 -0.99250275 0.199724096 0.6176120
## W13-W9 -0.631332083 -1.24727687 -0.015387299 0.0381798
## W14-W9 -0.564400716 -1.17476150 0.045960074 0.1058833
## W15-W9 -0.826236264 -1.36107991 -0.291392619 0.0000203
## W11-W10 -0.107792208 -0.67816877 0.462584350 0.9999984
## W12-W10 -0.275510204 -0.85754814 0.306527728 0.9558088
## W13-W10 -0.510452962 -1.11278586 0.091879936 0.2058671
## W14-W10 -0.443521595 -1.04014311 0.153099924 0.4194441
## W15-W10 -0.705357143 -1.22446664 -0.186247647 0.0004413
## W12-W11 -0.167717996 -0.65035461 0.314918620 0.9976155
## W13-W11 -0.402660754 -0.90958764 0.104266132 0.3043517
## W14-W11 -0.335729387 -0.83585656 0.164397782 0.6015304
## W15-W11 -0.597564935 -1.00209505 -0.193034817 0.0000650
## W13-W12 -0.234942758 -0.75495584 0.285070323 0.9700814
## W14-W12 -0.168011391 -0.68139811 0.345375327 0.9987490
## W15-W12 -0.429846939 -0.85065967 -0.009034209 0.0396797
## W14-W13 0.066931367 -0.46935464 0.603217379 1.0000000
## W15-W13 -0.194904181 -0.64336836 0.253559996 0.9787002
## W15-W14 -0.261835548 -0.70259902 0.178927920 0.7841622
sort(table(reading$Day))
##
## Saturday Sunday Wednesday Monday Tuesday Thursday Friday
## 50 51 51 82 87 134 178
sort(tapply(reading$Teacher_Assessment, reading$Day, mean))
## Tuesday Friday Monday Wednesday Sunday Thursday Saturday
## 2.471264 2.640449 2.768293 2.843137 2.862745 2.925373 2.960000
dayAov <- aov(Teacher_Assessment~Day,data= reading)
summary(weekAov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Week 14 41.8 2.9881 5.74 1.38e-10 ***
## Residuals 618 321.7 0.5206
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(dayAov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Teacher_Assessment ~ Day, data = reading)
##
## $Day
## diff lwr upr p adj
## Monday-Friday 0.12784324 -0.16617777 0.421864261 0.8582764
## Saturday-Friday 0.31955056 -0.03304842 0.672149539 0.1048529
## Sunday-Friday 0.22229566 -0.12759414 0.572185458 0.4949040
## Thursday-Friday 0.28492370 0.03296838 0.536879012 0.0151831
## Tuesday-Friday -0.16918507 -0.45736381 0.118993669 0.5916195
## Wednesday-Friday 0.20268782 -0.14720198 0.552577615 0.6072108
## Saturday-Monday 0.19170732 -0.20357161 0.586986247 0.7828745
## Sunday-Monday 0.09445242 -0.29841177 0.487316596 0.9919278
## Thursday-Monday 0.15708045 -0.15178987 0.465950772 0.7422844
## Tuesday-Monday -0.29702832 -0.63609524 0.042038610 0.1303602
## Wednesday-Monday 0.07484457 -0.31801961 0.467708753 0.9977499
## Sunday-Saturday -0.09725490 -0.53568385 0.341174050 0.9947791
## Thursday-Saturday -0.03462687 -0.39970022 0.330446491 0.9999601
## Tuesday-Saturday -0.48873563 -0.87968839 -0.097782872 0.0044126
## Wednesday-Saturday -0.11686275 -0.55529170 0.321566207 0.9860541
## Thursday-Sunday 0.06262804 -0.29982939 0.425085465 0.9987019
## Tuesday-Sunday -0.39148073 -0.77999185 -0.002969607 0.0468690
## Wednesday-Sunday -0.01960784 -0.45586096 0.416645269 0.9999995
## Tuesday-Thursday -0.45410877 -0.75742296 -0.150794569 0.0002255
## Wednesday-Thursday -0.08223588 -0.44469331 0.280221549 0.9940970
## Wednesday-Tuesday 0.37187289 -0.01663824 0.760384010 0.0709905
scores <- select(reading, Nickname, Campus, Teacher_Assessment,
summaryTokens, opinionTokens) %>%
group_by(Nickname) %>%
summarize(books = length(Teacher_Assessment),
score = sum(Teacher_Assessment),
average = mean(Teacher_Assessment),
percent=average*25,
summary_Words = round(mean(summaryTokens)),
opinion_Words = round(mean(opinionTokens)))
print(tbl_df(scores), n=nlevels(reading$Nickname))
## # A tibble: 59 x 7
## Nickname books score average percent summary_Words opinion_Words
## <fct> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 alice 11 23 2.09 52.3 161 56
## 2 asahi 13 40 3.08 76.9 163 69
## 3 atsuhito 14 34 2.43 60.7 129 21
## 4 atsushi 6 20 3.33 83.3 168 52
## 5 aya 11 38 3.45 86.4 203 64
## 6 ayumi 10 34 3.4 85 282 57
## 7 chacha 3 2 0.667 16.7 143 24
## 8 eishi 8 24 3 75 139 73
## 9 fumiya 14 30 2.14 53.6 79 61
## 10 genki 11 32 2.91 72.7 182 36
## 11 harumi 14 48 3.43 85.7 350 58
## 12 hide 11 37 3.36 84.1 190 59
## 13 jun 10 30 3 75 177 40
## 14 kakuto 18 42 2.33 58.3 169 44
## 15 kandai 10 20 2 50 84 48
## 16 kasumi 8 21 2.62 65.6 191 72
## 17 kazuma 11 40 3.64 90.9 220 71
## 18 kensuke 8 18 2.25 56.2 109 33
## 19 koharu 12 39 3.25 81.2 156 43
## 20 kouki 10 28 2.8 70 179 20
## 21 mai 3 9 3 75 135 15
## 22 mako 11 31 2.82 70.5 168 37
## 23 manabu 10 21 2.1 52.5 87 57
## 24 mayu 14 40 2.86 71.4 132 40
## 25 mike 11 34 3.09 77.3 154 52
## 26 miki 9 23 2.56 63.9 170 46
## 27 miku 12 43 3.58 89.6 203 57
## 28 minako 9 25 2.78 69.4 182 45
## 29 minami 9 24 2.67 66.7 201 49
## 30 mirei 13 31 2.38 59.6 153 29
## 31 moeka 13 34 2.62 65.4 188 31
## 32 moena 12 43 3.58 89.6 191 82
## 33 mri 12 26 2.17 54.2 139 31
## 34 natsu 12 41 3.42 85.4 229 147
## 35 nono 13 38 2.92 73.1 286 44
## 36 richard 3 6 2 50 212 109
## 37 rino 8 19 2.38 59.4 181 97
## 38 rio 10 32 3.2 80 127 34
## 39 ryo 2 3 1.5 37.5 54 6
## 40 saki 8 25 3.12 78.1 159 69
## 41 sakura 12 40 3.33 83.3 208 36
## 42 sena 12 24 2 50 106 11
## 43 sera 14 36 2.57 64.3 182 56
## 44 shimpei 12 34 2.83 70.8 144 13
## 45 shine 11 35 3.18 79.5 130 86
## 46 shintaro 10 30 3 75 165 30
## 47 shun 10 26 2.6 65 194 91
## 48 sit 7 19 2.71 67.9 282 83
## 49 taichi 14 31 2.21 55.4 100 73
## 50 tmk 14 31 2.21 55.4 154 62
## 51 tomo 1 3 3 75 48 72
## 52 tomoya 19 40 2.11 52.6 82 37
## 53 yuji 6 17 2.83 70.8 198 36
## 54 yuki 10 30 3 75 171 42
## 55 yumi 11 43 3.91 97.7 175 105
## 56 yuna 16 36 2.25 56.2 128 28
## 57 yuto_s 17 37 2.18 54.4 173 33
## 58 yuto_y 16 42 2.62 65.6 137 47
## 59 yuuka 14 41 2.93 73.2 192 106
mean(table(reading$Nickname))
## [1] 10.72881
mean(reading$Teacher_Assessment)
## [1] 2.753555
survey <- read.csv("survey.csv")
ggplot(aes(x = Did.you.enjoy.reading.graded.readers.), data=survey) +
geom_bar() +
ggtitle("Did you enjoy reading graded readers?") +
xlab("")
ggplot(aes(x = Will.you.continue.reading.graded.readers.after.the.course.has.finished.), data=survey) +
geom_bar() +
theme(axis.text.x = element_text(size = 8, angle = 75, hjust=1),
axis.ticks.x = element_blank(),
axis.title.x = element_blank()) +
ggtitle("Will you continue reading graded readers\nafter the course has finished?") +
xlab("")
ggplot(aes(x = How.was.the.reading.graded.readers.),
data=survey) +
geom_bar() +
theme(axis.text.x = element_text(size = 10, angle = 75, hjust=1),
axis.ticks.x = element_blank(),
axis.title.x = element_blank()) +
ggtitle("How was reading graded readers?") +
xlab("")
ggplot(aes(x = How.was.the.writing.about.graded.readers.),
data=survey) +
geom_bar() +
theme(axis.text.x = element_text(size = 10, angle = 75, hjust=1),
axis.ticks.x = element_blank(),
axis.title.x = element_blank()) +
ggtitle("How was writing about graded readers?") +
xlab("")
survey$Do.you.have.any.comments. <- as.character(survey$Do.you.have.any.comments.)
student_comments <- dfm(survey$Do.you.have.any.comments.)
textplot_wordcloud(student_comments, max_words = 200, max_size = 16, min_count = 2)
ggplot(data=reading) +
aes(x=Week, y=Teacher_Assessment_2, group=Campus, color=Campus) +
stat_summary(fun.y = mean, geom = "line") +
stat_summary(fun.y = mean, geom = "point") +
ylab("Average Homework Score") +
ggtitle("Weekly Homework Assignment Scores")
table(reading$Teacher_Assessment, reading$Teacher_Assessment_2)
##
## 0 1 2 3 4
## 0 2 0 0 0 0
## 1 0 3 1 0 0
## 2 0 0 205 49 1
## 3 0 0 7 231 21
## 4 0 0 0 7 106
final_scores <- select(reading, Nickname, Teacher_Assessment,
Teacher_Assessment_2) %>%
group_by(Nickname) %>%
summarize(
books = length(Teacher_Assessment),
score_1 = sum(Teacher_Assessment),
score_2 = sum(Teacher_Assessment_2),
average = (score_1 + score_2)/2,
percent=average*2.5)
print(tbl_df(final_scores), n=nlevels(reading$Nickname))
## # A tibble: 59 x 6
## Nickname books score_1 score_2 average percent
## <fct> <int> <int> <int> <dbl> <dbl>
## 1 alice 11 23 24 23.5 58.8
## 2 asahi 13 40 46 43 108.
## 3 atsuhito 14 34 35 34.5 86.2
## 4 atsushi 6 20 20 20 50
## 5 aya 11 38 39 38.5 96.2
## 6 ayumi 10 34 36 35 87.5
## 7 chacha 3 2 2 2 5
## 8 eishi 8 24 23 23.5 58.8
## 9 fumiya 14 30 31 30.5 76.2
## 10 genki 11 32 33 32.5 81.2
## 11 harumi 14 48 48 48 120
## 12 hide 11 37 37 37 92.5
## 13 jun 10 30 33 31.5 78.8
## 14 kakuto 18 42 43 42.5 106.
## 15 kandai 10 20 20 20 50
## 16 kasumi 8 21 24 22.5 56.2
## 17 kazuma 11 40 42 41 102.
## 18 kensuke 8 18 20 19 47.5
## 19 koharu 12 39 39 39 97.5
## 20 kouki 10 28 27 27.5 68.8
## 21 mai 3 9 10 9.5 23.8
## 22 mako 11 31 33 32 80
## 23 manabu 10 21 21 21 52.5
## 24 mayu 14 40 39 39.5 98.8
## 25 mike 11 34 35 34.5 86.2
## 26 miki 9 23 23 23 57.5
## 27 miku 12 43 45 44 110
## 28 minako 9 25 25 25 62.5
## 29 minami 9 24 24 24 60
## 30 mirei 13 31 33 32 80
## 31 moeka 13 34 34 34 85
## 32 moena 12 43 42 42.5 106.
## 33 mri 12 26 28 27 67.5
## 34 natsu 12 41 43 42 105
## 35 nono 13 38 40 39 97.5
## 36 richard 3 6 8 7 17.5
## 37 rino 8 19 20 19.5 48.8
## 38 rio 10 32 33 32.5 81.2
## 39 ryo 2 3 3 3 7.5
## 40 saki 8 25 25 25 62.5
## 41 sakura 12 40 40 40 100
## 42 sena 12 24 24 24 60
## 43 sera 14 36 37 36.5 91.2
## 44 shimpei 12 34 36 35 87.5
## 45 shine 11 35 36 35.5 88.8
## 46 shintaro 10 30 30 30 75
## 47 shun 10 26 30 28 70
## 48 sit 7 19 21 20 50
## 49 taichi 14 31 33 32 80
## 50 tmk 14 31 33 32 80
## 51 tomo 1 3 3 3 7.5
## 52 tomoya 19 40 42 41 102.
## 53 yuji 6 17 17 17 42.5
## 54 yuki 10 30 30 30 75
## 55 yumi 11 43 43 43 108.
## 56 yuna 16 36 36 36 90
## 57 yuto_s 17 37 37 37 92.5
## 58 yuto_y 16 42 44 43 108.
## 59 yuuka 14 41 44 42.5 106.
COMMENT About 1 per cent of the sentences were duplicates.