Scenario: The research group is conducting an experimental study to check whether there is a significant difference between the control and treatment group in the weight (lbs), leaf length (mm), and stalk length (ft) post test measures. Samples for each group were determined via sample size estimation using power and mean, and randomization is considered in the assignment of groups.

## Load the Dataset

library(xlsx)
data <- read.xlsx("Experiment3.xlsx", sheetIndex = "Sheet1")

## Check out the Dataset

str(data)
## 'data.frame':    16 obs. of  5 variables:
##  $NA. : chr "1" "2" "3" "4" ... ##$ Group : chr  "Treatment" "Treatment" "Treatment" "Treatment" ...
##  $Weight: num 69.2 93.2 91.6 50.5 74.5 ... ##$ Length: num  308 306 342 391 352 ...
##  $Stalk : num 5.02 5.65 9.53 3.93 6.33 6.83 6.73 5.61 9.76 6.53 ... summary(data) ## NA. Group Weight Length ## Length:16 Length:16 Min. : 21.34 Min. :305.6 ## Class :character Class :character 1st Qu.: 29.89 1st Qu.:340.3 ## Mode :character Mode :character Median : 46.21 Median :364.8 ## Mean : 59.15 Mean :376.7 ## 3rd Qu.: 89.19 3rd Qu.:391.0 ## Max. :137.50 Max. :509.5 ## Stalk ## Min. : 3.930 ## 1st Qu.: 5.640 ## Median : 6.770 ## Mean : 7.100 ## 3rd Qu.: 9.035 ## Max. :10.650 ## Remove irrelevant features data <- data[,-1] # Create Subsets TxWt <- data[data$Group == "Treatment",]$Weight TxLen <- data[data$Group == "Treatment",]$Length TxStk <- data[data$Group == "Treatment",]$Stalk CnWt <- data[data$Group == "Control",]$Weight CnLen <- data[data$Group == "Control",]$Length CnStk <- data[data$Group == "Control",]$Stalk ## Conduct Normality Testing library(nortest) nordata <- cbind(TxWt, TxLen, TxStk, CnWt, CnLen, CnStk) apply(nordata, 2, function(x) ad.test(x)) #It seems that all the features in the dataset assume normal distribution; proceed with plotting. ##$TxWt
##
##  Anderson-Darling normality test
##
## data:  x
## A = 0.40226, p-value = 0.2698
##
##
## $TxLen ## ## Anderson-Darling normality test ## ## data: x ## A = 0.37029, p-value = 0.3288 ## ## ##$TxStk
##
##  Anderson-Darling normality test
##
## data:  x
## A = 0.34566, p-value = 0.3821
##
##
## $CnWt ## ## Anderson-Darling normality test ## ## data: x ## A = 0.24172, p-value = 0.6681 ## ## ##$CnLen
##
##  Anderson-Darling normality test
##
## data:  x
## A = 0.5412, p-value = 0.1122
##
##
## $CnStk ## ## Anderson-Darling normality test ## ## data: x ## A = 0.23597, p-value = 0.6899 ## Generate Probability Density Plots par(mfrow=c(3,3)) apply(nordata, 2, function(x) plot(density(x), col = "deeppink")) ## NULL ## Generate Descriptive Statistics library(psych) describeBy(data$Weight, data$Group) #Mean is the measure for central tendency given the result of the normality test ## ## Descriptive statistics by group ## group: Control ## vars n mean sd median trimmed mad min max range skew kurtosis se ## X1 1 8 30.86 7.47 29.62 30.86 8.07 21.34 41.89 20.55 0.24 -1.68 2.64 ## ------------------------------------------------------------ ## group: Treatment ## vars n mean sd median trimmed mad min max range skew kurtosis se ## X1 1 8 87.45 25.28 90 87.45 14.99 50.53 137.5 86.97 0.52 -0.47 8.94 describeBy(data$Length, data$Group) ## ## Descriptive statistics by group ## group: Control ## vars n mean sd median trimmed mad min max range skew kurtosis ## X1 1 8 398.36 68.67 366.1 398.36 51.36 327.51 509.5 181.99 0.44 -1.7 ## se ## X1 24.28 ## ------------------------------------------------------------ ## group: Treatment ## vars n mean sd median trimmed mad min max range skew kurtosis ## X1 1 8 355.06 34.71 360.65 355.06 37.9 305.55 391.71 86.16 -0.34 -1.7 ## se ## X1 12.27 describeBy(data$Stalk, data$Group) ## ## Descriptive statistics by group ## group: Control ## vars n mean sd median trimmed mad min max range skew kurtosis se ## X1 1 8 8 2.09 8.49 8 2.19 4.18 10.65 6.47 -0.47 -1.15 0.74 ## ------------------------------------------------------------ ## group: Treatment ## vars n mean sd median trimmed mad min max range skew kurtosis se ## X1 1 8 6.2 1.65 5.99 6.2 1.17 3.93 9.53 5.6 0.66 -0.42 0.58 ## Test for Homogeneity and Conduct of Inferential Analysis on Weight library(lawstat) levene.test(data$Weight, data$Group, location = "mean") #Variances are equal; use Student's t-test ## ## Classical Levene's test based on the absolute deviations from the mean ## ( none not applied because the location is not set to median ) ## ## data: data$Weight
## Test Statistic = 3.0004, p-value = 0.1052
t.test(CnWt, TxWt, paired = FALSE, var.equal = TRUE) # It seems that the introduction of the treatment leads to an increase in weight of about 36.60 to 76.58 on a 95% confidence interval; means between Control and Treatment have a significant difference (p<0.05).
##
##  Two Sample t-test
##
## data:  CnWt and TxWt
## t = -6.0721, df = 14, p-value = 2.876e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -76.57856 -36.60144
## sample estimates:
## mean of x mean of y
##    30.855    87.445

## Test for Homogeneity and Conduct Inferential Analysis on Leaf Length (mm)

levene.test(data$Length, data$Group, location = "mean") #Variances are not equal; use Welch Two Sample t-test
##
##  Classical Levene's test based on the absolute deviations from the mean
##  ( none not applied because the location is not set to median )
##
## data:  data$Length ## Test Statistic = 7.8602, p-value = 0.01408 t.test(CnLen, TxLen, paired = FALSE, var.equal = FALSE) # Given the probability value of > 0.05 and a confidence interval that has zero between limits, it can be observed that there is no significant difference in leaf length (millimeter) between control and treatment groups.  ## ## Welch Two Sample t-test ## ## data: CnLen and TxLen ## t = 1.5918, df = 10.357, p-value = 0.1415 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -17.02655 103.62655 ## sample estimates: ## mean of x mean of y ## 398.3562 355.0562 ## Test for Homogeneity and Conduct Inferential Analysis on Stalk Length (ft) levene.test(data$Stalk, data$Group, location = "mean") #Variances are equal; use Student's t-test ## ## Classical Levene's test based on the absolute deviations from the mean ## ( none not applied because the location is not set to median ) ## ## data: data$Stalk
## Test Statistic = 0.71492, p-value = 0.412
t.test(CnStk, TxStk, paired = FALSE, var.equal = FALSE) # Given the probability value of > 0.05 and a confidence interval that has zero between limits, it can be observed that there is no significant difference in stalk length (feet) between control and treatment groups. 
##
##  Welch Two Sample t-test
##
## data:  CnStk and TxStk
## t = 1.9078, df = 13.284, p-value = 0.07826
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2328596  3.8178596
## sample estimates:
## mean of x mean of y
##   7.99625   6.20375