Classwork 9

Author

Diya Bijoy

Published

October 21, 2025

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.0     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(mosaic)
Registered S3 method overwritten by 'mosaic':
  method                           from   
  fortify.SpatialPolygonsDataFrame ggplot2

The 'mosaic' package masks several functions from core packages in order to add 
additional features.  The original behavior of these functions should not be affected by this.

Attaching package: 'mosaic'

The following object is masked from 'package:Matrix':

    mean

The following objects are masked from 'package:dplyr':

    count, do, tally

The following object is masked from 'package:purrr':

    cross

The following object is masked from 'package:ggplot2':

    stat

The following objects are masked from 'package:stats':

    binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
    quantile, sd, t.test, var

The following objects are masked from 'package:base':

    max, mean, min, prod, range, sample, sum
library(ggformula)
library(infer)

Attaching package: 'infer'

The following objects are masked from 'package:mosaic':

    prop_test, t_test
library(broom) # Clean test results in tibble form
library(resampledata) # Datasets from Chihara and Hesterberg's book

Attaching package: 'resampledata'

The following object is masked from 'package:datasets':

    Titanic
library(openintro) # More datasets
Loading required package: airports
Loading required package: cherryblossom
Loading required package: usdata

Attaching package: 'openintro'

The following object is masked from 'package:mosaic':

    dotPlot

The following objects are masked from 'package:lattice':

    ethanol, lsegments
library(visStatistics) # One package to rule them all
library(ggstatsplot)
You can cite this package as:
     Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
     Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
data("exam_grades", package = "openintro")
exam_grades
# A tibble: 233 × 6
   semester sex   exam1 exam2 exam3 course_grade
   <chr>    <chr> <dbl> <dbl> <dbl>        <dbl>
 1 2000-1   Man    84.5  69.5  86.5         76.3
 2 2000-1   Man    80    74    67           75.4
 3 2000-1   Man    56    70    71.5         67.1
 4 2000-1   Man    64    61    67.5         63.5
 5 2000-1   Man    90.5  72.5  75           72.4
 6 2000-1   Man    74    78.5  84.5         71.4
 7 2000-1   Man    60.5  44    58           56.1
 8 2000-1   Man    89    82    88           78.0
 9 2000-1   Woman  87.5  86.5  95           82.9
10 2000-1   Man    91    98    88           89.1
# ℹ 223 more rows
library(nortest)
# Especially when we have >= 5000 observations
nortest::ad.test(x = exam_grades$course_grade) %>%
  broom::tidy()
# A tibble: 1 × 3
  statistic p.value method                         
      <dbl>   <dbl> <chr>                          
1     0.331   0.512 Anderson-Darling normality test
# t-test
t4 <- mosaic::t_test(
  exam_grades$course_grade, # Name of variable
  mu = 80, # belief
  alternative = "two.sided"
) %>% # Check both sides
  broom::tidy()
t4
# A tibble: 1 × 8
  estimate statistic  p.value parameter conf.low conf.high method    alternative
     <dbl>     <dbl>    <dbl>     <dbl>    <dbl>     <dbl> <chr>     <chr>      
1     72.2     -12.1 2.19e-26       232     71.0      73.5 One Samp… two.sided  

ANOVA

frogs_orig <- read_csv("frogs.csv")
Rows: 60 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (4): Frogspawn sample id, Temperature13, Temperature18, Temperature25

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
frogs_orig
# A tibble: 60 × 4
   `Frogspawn sample id` Temperature13 Temperature18 Temperature25
                   <dbl>         <dbl>         <dbl>         <dbl>
 1                     1            24            NA            NA
 2                     2            NA            21            NA
 3                     3            NA            NA            18
 4                     4            26            NA            NA
 5                     5            NA            22            NA
 6                     6            NA            NA            14
 7                     7            27            NA            NA
 8                     8            NA            22            NA
 9                     9            NA            NA            15
10                    10            27            NA            NA
# ℹ 50 more rows

CONVERTING TO LONG FROM FROM LONG FORM

frogs_orig %>%
  pivot_longer(
    .,
    cols = starts_with("Temperature"),
    cols_vary = "fastest",
    # new in pivot_longer
    names_to = "Temp",
    values_to = "Time"
  ) %>%
  drop_na() %>%
  ##
  separate_wider_regex(
    cols = Temp,
    # knock off the unnecessary "Temperature" word
    # Just keep the digits thereafter
    patterns = c("Temperature", TempFac = "\\d+"),
    cols_remove = TRUE
  ) %>%
  # Convert Temp into TempFac, a 3-level factor
  mutate(TempFac = factor(
    x = TempFac,
    levels = c(13, 18, 25),
    labels = c("13", "18", "25")
  )) %>%
  rename("Id" = `Frogspawn sample id`) -> frogs_long
frogs_long
# A tibble: 60 × 3
      Id TempFac  Time
   <dbl> <fct>   <dbl>
 1     1 13         24
 2     2 18         21
 3     3 25         18
 4     4 13         26
 5     5 18         22
 6     6 25         14
 7     7 13         27
 8     8 18         22
 9     9 25         15
10    10 13         27
# ℹ 50 more rows
##
frogs_long %>% count(TempFac)
# A tibble: 3 × 2
  TempFac     n
  <fct>   <int>
1 13         20
2 18         20
3 25         20