---
title: "SQA AH Statistics Exam Papers, using R"
output: html_document
date: "2023-07-19"
---
INSTRUCTIONS
Expand (or Collapse) the code chunks by clicking on the grey triangle to the right of the line number, or use the menu item: Edit > Folding > Expand All or Collapse All
Locate the code chunk you want from the Year and Question number
Run any code chunk by clicking on the green triangle 'play button' in the top right of each section of code
```{r 2023 Paper 2 Question 2}
steps = c(320, 310, 321, 304, 298, 328, 296, 307, 314, 295)
differences = steps - 300
# remove zeros from differences before performing the Wilxocon test
updated_differences = differences[differences != 0]
wilcox.test(x = updated_differences,
mu = 0, # the command uses mu rather than median
alternative = "greater")
# note that the value of V stated in the Wilcoxon test output is *not* the minimum rank sum
```
```{r 2023 Paper 2 Question 4}
observed_offspring = c(78, 90, 152)
# generate probabilities in the ratio 1:1:2
probabilities = c(1/4, 1/4, 2/4)
#conduct the goodness of fit hypothesis test
output = chisq.test(x = observed_offspring,
p = probabilities)
# displays expected frequencies, test statistic, degrees of freedom, p-value
print("Expected frequencies:")
output$expected
output
```
```{r 2023 Paper 2 Question 6}
# the package called 'BSDA' is needed for the z.test function
if(!require(BSDA)){install.packages("BSDA"); library(BSDA)}
length_n = 75
length_mean = 3840 / 75
length_st_dev = sqrt((198240 - 3840^2/75) / (75 - 1))
# create simulated data sets with correct statistics
simulated_lengths = scale(1:length_n) * length_st_dev + length_mean
z.test(x = simulated_lengths,
sigma.x = length_st_dev,
mu = 50,
alternative = "greater")
# for interest, a single sample t-test would have given...
t.test(x = simulated_lengths,
mu = 50,
alternative = "greater")
```
```{r 2023 Paper 2 Question 8}
n = 25
r = 0.652
t = r * sqrt(n - 2) / sqrt(1 - r^2)
t
p_value = 2 * pt(q = t,
df = n - 2,
lower.tail = FALSE)
p_value
```
```{r 2023 Paper 2 Question 9(a)}
with_tracker = c(5.1, 10, 10.8, 7.5, 6.2, 10.2, 5.4, 4.2, 8.1, 11.1, 10.2, 5.3)
without_tracker = c(4, 9.5, 12, 5.5, 5.9, 11, 4.8, 3.5, 6.5, 11.5, 9.4, 5.1)
differences = with_tracker - without_tracker
t.test(differences,
mu = 0,
alternative = "greater")
```
```{r 2023 Paper 2 Question 10}
homeless = 23312
sample_size = 37878
# the output of this test gives the correct p-value. All other output can be ignored.
prop.test(x = homeless,
n = sample_size,
p = 0.624,
alternative = "two.sided",
correct = FALSE) # to prevent Yate's Continuity Correction
```
```{r 2023 Paper 2 Question 11}
# set up a system of two simultaneous equations in the matrix form Ax = B
A = matrix(data = c(1, 1,
qnorm(0.1), qnorm(0.95)),
nrow = 2,
ncol = 2)
B = c(17, 24)
# solve the system of equations to give mu and sigma
solve(A, B)
```
```{r 2023 Paper 2 Question 12(a)}
in_favour = 55
sample_size = 100
# this uses the 2-sample proportion test command syntax, but sets the second sample to have zero 'successes'. This ensures that the generated confidence interval agrees with hand-calculated answers.
prop.test(c(in_favour, 0),
c(sample_size, sample_size),
alternative = "two.sided",
conf.level = 0.99,
correct = FALSE) # to prevent Yate's Continuity Correction
```