Scenario: A comparison on the extent of student engagement is to be conducted to check whether interventions would be needed to entice students to participate in school activities. The team randomly sampled students (proportional allocation) from the four Academic Programs offered by the school and met the desired sample size computed via Modified Cochran.

Open the data set

library(xlsx)
data <- read.xlsx("StudentEngagement.xlsx", sheetIndex = "Sheet1")

Check the first and last six rows, check the data structure and generate a summary

head(data)
##   NA. Engagement    Acad
## 1   1       3.53 Nursing
## 2   2       4.53 Nursing
## 3   3       3.85 Nursing
## 4   4       3.91 Nursing
## 5   5       4.35 Nursing
## 6   6       3.46 Nursing
tail(data)
##    NA. Engagement              Acad
## 73  73       3.77 Civil Engineering
## 74  74       3.36 Civil Engineering
## 75  75       2.87 Civil Engineering
## 76  76       3.81 Civil Engineering
## 77  77       3.56 Civil Engineering
## 78  78       3.38 Civil Engineering
str(data)
## 'data.frame':    78 obs. of  3 variables:
##  $ NA.       : chr  "1" "2" "3" "4" ...
##  $ Engagement: num  3.53 4.53 3.85 3.91 4.35 3.46 4.3 5.05 4.6 3.51 ...
##  $ Acad      : chr  "Nursing" "Nursing" "Nursing" "Nursing" ...
summary(data)
##      NA.              Engagement        Acad          
##  Length:78          Min.   :2.160   Length:78         
##  Class :character   1st Qu.:3.270   Class :character  
##  Mode  :character   Median :3.635   Mode  :character  
##                     Mean   :3.647                     
##                     3rd Qu.:4.018                     
##                     Max.   :5.210

Clean data by removing the irrelevant feature

data <- data[,-1]

Subset data by Academic Program for Normality Testing

unique(data$Acad)
## [1] "Nursing"                 "Business Administration"
## [3] "Computer Science"        "Civil Engineering"
Nsg <- data[data$Acad == "Nursing",]$Engagement
BA <- data[data$Acad == "Business Administration",]$Engagement
CS <- data[data$Acad == "Computer Science",]$Engagement
CE <- data[data$Acad == "Civil Engineering",]$Engagement

Conduct normality testing for each academic program

library(nortest)
ad.test(Nsg)
## 
##  Anderson-Darling normality test
## 
## data:  Nsg
## A = 0.29386, p-value = 0.5579
ad.test(BA)
## 
##  Anderson-Darling normality test
## 
## data:  BA
## A = 0.33658, p-value = 0.4768
ad.test(CS)
## 
##  Anderson-Darling normality test
## 
## data:  CS
## A = 0.15365, p-value = 0.9488
ad.test(CE)
## 
##  Anderson-Darling normality test
## 
## data:  CE
## A = 0.47463, p-value = 0.2096
#Data sets tend to assume Normal Distribution

Generate Probability Density Function Plots

par(mfrow=c(2,2))
plot(density(Nsg), col = "darkorchid")
plot(density(BA), col = "darkorchid")
plot(density(CE), col = "darkorchid")
plot(density(CS), col = "darkorchid")

Generate BoxPlots

par(mfrow=c(2,2))
boxplot(Nsg, col = "darkorchid")
boxplot(BA, col = "darkorchid")
boxplot(CE, col = "darkorchid")
boxplot(CS, col = "darkorchid")

Generate QQ-plots

par(mfrow=c(2,2))
qqnorm(data[data$Acad == "Business Administration",]$Engagement); qqline(data[data$Acad == "Business Administration",]$Engagement)
qqnorm(data[data$Acad == "Civil Engineering",]$Engagement); qqline(data[data$Acad == "Civil Engineering",]$Engagement)
qqnorm(data[data$Acad == "Computer Science",]$Engagement); qqline(data[data$Acad == "Computer Science",]$Engagement)
qqnorm(data[data$Acad == "Nursing",]$Engagement); qqline(data[data$Acad == "Nursing",]$Engagement)

# Although statistics show that the distribution for each variable is approximately normal, the Boxplot and QQ-plot visualizations shows otherwise.  

Generate Descriptive Statistics

library(psych)
Desc <- describeBy(data$Engagement, data$Acad)
Desc <- rbind(Desc$`Business Administration`, Desc$`Civil Engineering`, Desc$`Computer Science`, Desc$`Nursing`)
Desc <- round(Desc[, c(2,5,7,8,9)],2)
Group <- c("Business Administration", "Civil Engineering", "Computer Science", "Nursing")
Group <- data.frame(Group)
Desc <- cbind(Group, Desc)
Desc
##                       Group  n median  mad  min  max
## X1  Business Administration 24   3.34 0.52 2.56 4.54
## X11       Civil Engineering 17   3.56 0.40 2.32 4.56
## X12        Computer Science 20   3.58 0.58 2.16 4.97
## X13                 Nursing 17   4.07 0.64 3.46 5.21
#Median is the measure for central tendency given the result of the normality test

Perform Comparison for more than two groups

library(broom)
res <- tidy(kruskal.test(data$Engagement~data$Acad))
res <- res[,c(4,3,1,2)]
res
## # A tibble: 1 x 4
##   method                       parameter statistic p.value
##   <chr>                            <int>     <dbl>   <dbl>
## 1 Kruskal-Wallis rank sum test         3      15.0 0.00179
#Median is the measure for central tendency given the result of the normality test

Perform a Post Hoc Analysis

library(FSA)
ph <- dunnTest(Engagement ~ Acad, data=data, method="bonferroni")
## Warning: Acad was coerced to a factor.
ph
##                                    Comparison          Z     P.unadj      P.adj
## 1 Business Administration - Civil Engineering -0.2439727 0.807251947 1.00000000
## 2  Business Administration - Computer Science -0.8174954 0.413645357 1.00000000
## 3        Civil Engineering - Computer Science -0.5158455 0.605962352 1.00000000
## 4           Business Administration - Nursing -3.6015833 0.000316285 0.00189771
## 5                 Civil Engineering - Nursing -3.1031419 0.001914778 0.01148867
## 6                  Computer Science - Nursing -2.7106479 0.006715190 0.04029114
#Median is the measure for central tendency given the result of the normality test

In this inferential analysis we did a comparison of the extent of student engagement per program which includes Nursing, Business Admistration, Civil Engineering, and Computer Science. Results show that there is a significant difference in the extent of student engagement when grouped by their respective programs (p-value < 0.05). Moreover, the Post Hoc Analysis shows that at an alpa of 0.05, it is the pairing of Bus. Administration - Nursing, Civil Engr. - Nursing, and Computer Sci. - Nursing that are significantly different from each other. Given the result, interventions may be needed to further convince students from Bus. Administration, Civil Engineering, and Computer Science Programs to engage in school activities.