Scenario: The research group is conducting an experimental study to check whether there is a significant difference between the control and treatment group in the weight (lbs), leaf length (mm), and stalk length (ft) post test measures. Samples for each group were determined via sample size estimation using power and mean, and randomization is considered in the assignment of groups.
library(xlsx)
data <- read.xlsx("Experiment3.xlsx", sheetIndex = "Sheet1")
str(data)
## 'data.frame': 16 obs. of 5 variables:
## $ NA. : chr "1" "2" "3" "4" ...
## $ Group : chr "Treatment" "Treatment" "Treatment" "Treatment" ...
## $ Weight: num 69.2 93.2 91.6 50.5 74.5 ...
## $ Length: num 308 306 342 391 352 ...
## $ Stalk : num 5.02 5.65 9.53 3.93 6.33 6.83 6.73 5.61 9.76 6.53 ...
summary(data)
## NA. Group Weight Length
## Length:16 Length:16 Min. : 21.34 Min. :305.6
## Class :character Class :character 1st Qu.: 29.89 1st Qu.:340.3
## Mode :character Mode :character Median : 46.21 Median :364.8
## Mean : 59.15 Mean :376.7
## 3rd Qu.: 89.19 3rd Qu.:391.0
## Max. :137.50 Max. :509.5
## Stalk
## Min. : 3.930
## 1st Qu.: 5.640
## Median : 6.770
## Mean : 7.100
## 3rd Qu.: 9.035
## Max. :10.650
data <- data[,-1]
TxWt <- data[data$Group == "Treatment",]$Weight
TxLen <- data[data$Group == "Treatment",]$Length
TxStk <- data[data$Group == "Treatment",]$Stalk
CnWt <- data[data$Group == "Control",]$Weight
CnLen <- data[data$Group == "Control",]$Length
CnStk <- data[data$Group == "Control",]$Stalk
library(nortest)
nordata <- cbind(TxWt, TxLen, TxStk, CnWt, CnLen, CnStk)
apply(nordata, 2, function(x) ad.test(x)) #It seems that all the features in the dataset assume normal distribution; proceed with plotting.
## $TxWt
##
## Anderson-Darling normality test
##
## data: x
## A = 0.40226, p-value = 0.2698
##
##
## $TxLen
##
## Anderson-Darling normality test
##
## data: x
## A = 0.37029, p-value = 0.3288
##
##
## $TxStk
##
## Anderson-Darling normality test
##
## data: x
## A = 0.34566, p-value = 0.3821
##
##
## $CnWt
##
## Anderson-Darling normality test
##
## data: x
## A = 0.24172, p-value = 0.6681
##
##
## $CnLen
##
## Anderson-Darling normality test
##
## data: x
## A = 0.5412, p-value = 0.1122
##
##
## $CnStk
##
## Anderson-Darling normality test
##
## data: x
## A = 0.23597, p-value = 0.6899
par(mfrow=c(3,3))
apply(nordata, 2, function(x) plot(density(x), col = "deeppink"))
## NULL
library(psych)
describeBy(data$Weight, data$Group) #Mean is the measure for central tendency given the result of the normality test
##
## Descriptive statistics by group
## group: Control
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 30.86 7.47 29.62 30.86 8.07 21.34 41.89 20.55 0.24 -1.68 2.64
## ------------------------------------------------------------
## group: Treatment
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 87.45 25.28 90 87.45 14.99 50.53 137.5 86.97 0.52 -0.47 8.94
describeBy(data$Length, data$Group)
##
## Descriptive statistics by group
## group: Control
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 8 398.36 68.67 366.1 398.36 51.36 327.51 509.5 181.99 0.44 -1.7
## se
## X1 24.28
## ------------------------------------------------------------
## group: Treatment
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 8 355.06 34.71 360.65 355.06 37.9 305.55 391.71 86.16 -0.34 -1.7
## se
## X1 12.27
describeBy(data$Stalk, data$Group)
##
## Descriptive statistics by group
## group: Control
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 8 2.09 8.49 8 2.19 4.18 10.65 6.47 -0.47 -1.15 0.74
## ------------------------------------------------------------
## group: Treatment
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 6.2 1.65 5.99 6.2 1.17 3.93 9.53 5.6 0.66 -0.42 0.58
library(lawstat)
levene.test(data$Weight, data$Group, location = "mean") #Variances are equal; use Student's t-test
##
## Classical Levene's test based on the absolute deviations from the mean
## ( none not applied because the location is not set to median )
##
## data: data$Weight
## Test Statistic = 3.0004, p-value = 0.1052
t.test(CnWt, TxWt, paired = FALSE, var.equal = TRUE) # It seems that the introduction of the treatment leads to an increase in weight of about 36.60 to 76.58 on a 95% confidence interval; means between Control and Treatment have a significant difference (p<0.05).
##
## Two Sample t-test
##
## data: CnWt and TxWt
## t = -6.0721, df = 14, p-value = 2.876e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -76.57856 -36.60144
## sample estimates:
## mean of x mean of y
## 30.855 87.445
levene.test(data$Length, data$Group, location = "mean") #Variances are not equal; use Welch Two Sample t-test
##
## Classical Levene's test based on the absolute deviations from the mean
## ( none not applied because the location is not set to median )
##
## data: data$Length
## Test Statistic = 7.8602, p-value = 0.01408
t.test(CnLen, TxLen, paired = FALSE, var.equal = FALSE) # Given the probability value of > 0.05 and a confidence interval that has zero between limits, it can be observed that there is no significant difference in leaf length (millimeter) between control and treatment groups.
##
## Welch Two Sample t-test
##
## data: CnLen and TxLen
## t = 1.5918, df = 10.357, p-value = 0.1415
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -17.02655 103.62655
## sample estimates:
## mean of x mean of y
## 398.3562 355.0562
levene.test(data$Stalk, data$Group, location = "mean") #Variances are equal; use Student's t-test
##
## Classical Levene's test based on the absolute deviations from the mean
## ( none not applied because the location is not set to median )
##
## data: data$Stalk
## Test Statistic = 0.71492, p-value = 0.412
t.test(CnStk, TxStk, paired = FALSE, var.equal = FALSE) # Given the probability value of > 0.05 and a confidence interval that has zero between limits, it can be observed that there is no significant difference in stalk length (feet) between control and treatment groups.
##
## Welch Two Sample t-test
##
## data: CnStk and TxStk
## t = 1.9078, df = 13.284, p-value = 0.07826
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2328596 3.8178596
## sample estimates:
## mean of x mean of y
## 7.99625 6.20375