---
title: "SQA AH Statistics Exam Papers, using R"
output: html_document
date: "2023-07-19"
---
INSTRUCTIONS
Expand (or Collapse) the code chunks by clicking on the grey triangle to the right of the line number, or use the menu item: Edit > Folding > Expand All or Collapse All
Locate the code chunk you want from the Year and Question number
Run any code chunk by clicking on the green triangle 'play button' in the top right of each section of code
```{r 2017 Question 2}
x_values = c(20, 30, 40, 50, 60)
x_probabilities = c(1/5, 1/5, 1/5, 1/5, 1/5)
# mean of y
sum(x_values * x_probabilities)
# variance of y
sum(x_values^2 * x_probabilities) - (sum(x_values * x_probabilities))^2
```
```{r 2017 Question 3(b)}
pianists_n = 16
pianists_mean = 77
pianists_st_dev = 10
violinists_n = 14
violinists_mean = 82
violinists_st_dev = 8
# create simulated data sets with correct statistics
simulated_pianist_marks = scale(1:pianists_n) * pianists_st_dev + pianists_mean
simulated_violinist_marks = scale(1:violinists_n) * violinists_st_dev + violinists_mean
t.test(simulated_pianist_marks,
simulated_violinist_marks,
paired = FALSE,
var.equal = TRUE, #this will pool the samples
alternative = "less")
```
```{r 2017 Question 7(a)}
trees_n = 18
trees_mean = 7.46
trees_st_dev = 1.46
# create simulated data sets with correct statistics
simulated_trees_per_hectare = scale(1:trees_n) * trees_st_dev + trees_mean
# the t.test command delivers both hypothesis test result and confidence interval
t.test(simulated_trees_per_hectare,
mu = 5.87,
alternative = "two.sided",
conf.level = 0.95)
```
```{r 2017 Question 8}
# (a) W ~ Po(4) ... P(2 <= W <= 6 ) = P(W <= 6) - P(W <= 1)
ppois(q = 6, lambda = 4) - ppois(q = 1, lambda = 4)
# equivalent to using the diff(erence) command...
diff(ppois(q = c(1, 6), lambda = 4))
# (b) X ~ N(4, 4) .. P(2 < X < 6 )
pnorm(q = 6, mean = 4, sd = 2) - pnorm(q = 2, mean = 4, sd = 2)
# equivalent to using the diff(erence) command...
diff(pnorm(q = c(2, 6), mean = 4, sd = 2))
# (c) Y ~ U(6, 10) .. P(8 - sqrt(4/3) < Y < 8 + sqrt(4/3))
punif(q = 8 + sqrt(4/3), min = 6, max = 10) - punif(q = 8 - sqrt(4/3), min = 6, max = 10)
# equivalent to using the diff(erence) command...
diff(punif(q = 8 + c(-1, 1) * sqrt(4/3), min = 6, max = 10))
```
```{r 2017 Question 9(c)}
# the package called 'BSDA' is needed for the z.test function
if(!require(BSDA)){install.packages("BSDA"); library(BSDA)}
lead_n = 25
lead_mean = 174.5
lead_st_dev = 23.1
# create simulated data sets with correct statistics
simulated_lead_concentration = scale(1:lead_n) * lead_st_dev + lead_mean
z.test(x = simulated_lead_concentration,
sigma.x = lead_st_dev,
mu = 165.6,
alternative = "greater")
```
```{r 2017 Question 10(c)}
table = t(data.frame(
recaptured = c(58, 51),
not_recaptured = c(255, 182)
))
output = chisq.test(x = table,
correct = FALSE) # to prevent Yate's Continuity Correction
output # displays test statistic, degrees of freedom and p-value
print("Expected frequencies:")
output$expected # displays table of expected frequencies
```
```{r 2017 Question 11(a)}
in_favour = 61
sample_size = 100
# this uses the 2-sample proportion test command syntax, but sets the second sample to have zero 'successes'. This ensures that the generated confidence interval agrees with hand-calculated answers.
prop.test(c(in_favour, 0),
c(sample_size, sample_size),
alternative = "two.sided",
conf.level = 0.99,
correct = FALSE) # to prevent Yate's Continuity Correction
```