Comparing Paired Samples

Code for "Comparing Paired Samples"
by Atsushi Mizumoto

server.R
ui.R

library(shiny)
library(psych)
library(car)
library(compute.es)
library(pwr)
library(lattice)
library(latticeExtra)
library(beeswarm)

shinyServer(function(input, output) {
    options(warn = -1)

    # データの取得と前処理
    getData <- reactive({
        if (input$colname) {
            # ヘッダーありの場合
            df <- read.csv(text = input$text, sep = "\t", na.strings = c("", "NA", ".", "#N/A", "N/A"))
            # 完全なケースのみを抽出（リストワイズ削除）
            df_complete <- df[complete.cases(df[, 1:2]), ]
            x <- df_complete[, 1] # 1列目のデータ
            y <- df_complete[, 2] # 2列目のデータ
            var_names <- colnames(df)[1:2] # 変数名を取得
        } else {
            # ヘッダーなしの場合
            df <- read.csv(text = input$text, header = FALSE, sep = "\t", na.strings = c("", "NA", ".", "#N/A", "N/A"))
            # 完全なケースのみを抽出（リストワイズ削除）
            df_complete <- df[complete.cases(df[, 1:2]), ]
            x <- df_complete[, 1] # 1列目のデータ
            y <- df_complete[, 2] # 2列目のデータ
            var_names <- c("Data 1", "Data 2") # デフォルト変数名
        }

        # 欠損値の情報を追加
        total_rows <- nrow(df)
        complete_rows <- nrow(df_complete)
        missing_rows <- total_rows - complete_rows

        return(list(
            x = x,
            y = y,
            df = df_complete,
            var_names = var_names,
            total_rows = total_rows,
            complete_rows = complete_rows,
            missing_rows = missing_rows
        ))
    })

    bs <- reactive({
        data <- getData()
        x <- data$x
        y <- data$y
        var_names <- data$var_names

        # 欠損値の情報
        total_rows <- data$total_rows
        complete_rows <- data$complete_rows
        missing_rows <- data$missing_rows

        result1 <- describe(x)[2:13]
        result2 <- describe(y)[2:13]
        row.names(result1) <- ""
        row.names(result2) <- ""

        correl <- cor(x, y)

        result <- list()
        result[[var_names[1]]] <- result1
        result[[var_names[2]]] <- result2
        result[["Correlation"]] <- correl

        # 欠損値の情報を追加
        if (missing_rows > 0) {
            result[["Missing Values"]] <- c(
                paste("Total rows:", total_rows),
                paste("Complete cases:", complete_rows),
                paste("Missing cases (list-wise deleted):", missing_rows)
            )
        }

        return(result)
    })


    makedistPlot <- function() {
        data <- getData()
        x <- data$x
        y <- data$y
        var_names <- data$var_names

        simple.bincount <- function(x, breaks) {
            nx <- length(x)
            nbreaks <- length(breaks)
            counts <- integer(nbreaks - 1)
            for (i in 1:nx) {
                lo <- 1
                hi <- nbreaks
                if (breaks[lo] <= x[i] && x[i] <= breaks[hi]) {
                    while (hi - lo >= 2) {
                        new <- (hi + lo) %/% 2
                        if (x[i] > breaks[new]) {
                            lo <- new
                        } else {
                            hi <- new
                        }
                    }
                    counts[lo] <- counts[lo] + 1
                }
            }
            return(counts)
        }

        nclass.x <- nclass.FD(x)
        breaks.x <- pretty(x, nclass.x)
        counts.x <- simple.bincount(x, breaks.x)
        counts.max.x <- max(counts.x)

        nclass.y <- nclass.FD(y)
        breaks.y <- pretty(y, nclass.y)
        counts.y <- simple.bincount(y, breaks.y)
        counts.max.y <- max(counts.y)

        counts.max <- max(c(counts.max.x, counts.max.y))


        xy.min <- min(c(x, y))
        xy.min <- xy.min - xy.min * 0.1
        xy.max <- max(c(x, y))
        xy.max <- xy.max + xy.max * 0.1

        p1 <- hist(x, xlim = c(xy.min, xy.max), ylim = c(0, counts.max * 1.3))
        p2 <- hist(y, xlim = c(xy.min, xy.max), ylim = c(0, counts.max * 1.3))

        plot(p1,
            las = 1, xlab = paste(var_names[1], "is expressed in blue;", var_names[2], "in red. Vertical lines show the mean."),
            main = "", col = rgb(0, 0, 1, 1 / 4), xlim = c(xy.min, xy.max), ylim = c(0, counts.max * 1.3)
        )
        plot(p2, las = 1, xlab = "", main = "", col = rgb(1, 0, 0, 1 / 4), xlim = c(xy.min, xy.max), ylim = c(0, counts.max * 1.3), add = TRUE)

        abline(v = mean(x), col = "blue", lwd = 2)
        abline(v = mean(y), col = "red", lwd = 2)
    }

    output$distPlot <- renderPlot({
        print(makedistPlot())
    })



    makeboxPlot <- function() {
        data <- getData()
        x <- data$x
        y <- data$y
        var_names <- data$var_names

        score <- c(x, y)
        group <- factor(c(rep(var_names[1], length(x)), rep(var_names[2], length(y))))

        boxplot(score ~ group, las = 1, xlab = "Means and +/-1 SDs are displayed in red.")

        beeswarm(score ~ group, col = 4, pch = 16, add = TRUE)

        points(1.2, mean(x), pch = 18, col = "red", cex = 2)
        arrows(1.2, mean(x), 1.2, mean(x) + sd(x), length = 0.1, angle = 45, col = "red")
        arrows(1.2, mean(x), 1.2, mean(x) - sd(x), length = 0.1, angle = 45, col = "red")

        points(2.2, mean(y), pch = 18, col = "red", cex = 2)
        arrows(2.2, mean(y), 2.2, mean(y) + sd(y), length = 0.1, angle = 45, col = "red")
        arrows(2.2, mean(y), 2.2, mean(y) - sd(y), length = 0.1, angle = 45, col = "red")
    }

    output$boxPlot <- renderPlot({
        print(makeboxPlot())
    })



    makeindvPlot <- function() {
        data <- getData()
        x <- data$x
        y <- data$y
        var_names <- data$var_names

        xy <- c(x, y)
        data.point <- factor(c(rep(var_names[1], length(x)), rep(var_names[2], length(y))))
        dat <- data.frame(xy, data.point)
        dat$indiv <- factor(c(rep(1:length(x)), rep(1:length(y))))
        each <- xyplot(xy ~ data.point,
            group = indiv,
            type = c("l"), data = dat, xlab = "", ylab = ""
        )
        a <- mean(x)
        b <- mean(y)
        value <- c(a, b)
        data.point2 <- factor(c(rep(var_names[1], 1), rep(var_names[2], 1)))
        dat2 <- data.frame(value, data.point2)
        all <- xyplot(value ~ data.point2,
            col = "black",
            lwd = 5, type = c("l"), data = dat, xlab = "", ylab = ""
        )
        indv <- each + as.layer(all, axes = NULL)
        print(indv)
    }

    output$indvPlot <- renderPlot({
        print(makeindvPlot())
    })



    makecorrelPlot <- function() {
        data <- getData()
        x <- data$x
        y <- data$y
        var_names <- data$var_names

        xy.min <- min(c(x, y))
        xy.min <- xy.min - xy.min * 0.1
        xy.max <- max(c(x, y))
        xy.max <- xy.max + xy.max * 0.1

        plot(x, y,
            las = 1, pch = 16, xlab = var_names[1], ylab = var_names[2], main = "",
            xlim = c(xy.min, xy.max), ylim = c(xy.min, xy.max)
        )
        lines(par()$usr[1:2], par()$usr[3:4], lty = 3)
    }

    output$correlPlot <- renderPlot({
        print(makecorrelPlot())
    })



    testnorm <- reactive({
        data <- getData()
        x <- data$x
        y <- data$y
        var_names <- data$var_names

        data.1ks <- ks.test(scale(x), "pnorm")
        data.1sh <- shapiro.test(x)

        data.2ks <- ks.test(scale(y), "pnorm")
        data.2sh <- shapiro.test(y)

        result <- list()
        result[[var_names[1]]] <- data.1ks
        result[[paste0(var_names[1], " (n < 50 w/ Outliers)")]] <- data.1sh
        result[[var_names[2]]] <- data.2ks
        result[[paste0(var_names[2], " (n < 50 w/ Outliers)")]] <- data.2sh

        return(result)
    })



    makedistdiffPlot <- function() {
        data <- getData()
        x <- data$x
        y <- data$y

        diff <- x - y

        simple.bincount <- function(x, breaks) {
            nx <- length(diff)
            nbreaks <- length(breaks)
            counts <- integer(nbreaks - 1)
            for (i in 1:nx) {
                lo <- 1
                hi <- nbreaks
                if (breaks[lo] <= x[i] && x[i] <= breaks[hi]) {
                    while (hi - lo >= 2) {
                        new <- (hi + lo) %/% 2
                        if (x[i] > breaks[new]) {
                            lo <- new
                        } else {
                            hi <- new
                        }
                    }
                    counts[lo] <- counts[lo] + 1
                }
            }
            return(counts)
        }

        nclass <- nclass.FD(diff)
        breaks <- pretty(diff, nclass)
        counts <- simple.bincount(diff, breaks)
        counts.max <- max(counts)

        h <- hist(diff,
            las = 1, breaks = "FD", xlab = "Red vertical line shows the mean of the differences.",
            ylim = c(0, counts.max * 1.2), main = "", col = "cyan"
        )
        rug(diff)
        abline(v = mean(diff, na.rm = T), col = "red", lwd = 2)
        xfit <- seq(min(diff, na.rm = T), max(diff, na.rm = T))
        yfit <- dnorm(xfit, mean = mean(diff, na.rm = T), sd = sd(diff, na.rm = T))
        yfit <- yfit * diff(h$mids[1:2]) * length(diff)
        lines(xfit, yfit, col = "blue", lwd = 2)
    }

    output$distdiffPlot <- renderPlot({
        print(makedistdiffPlot())
    })



    diffnorm <- reactive({
        data <- getData()
        x <- data$x
        y <- data$y

        diff <- x - y

        diff.1ks <- ks.test(scale(diff), "pnorm")
        diff.1sh <- shapiro.test(diff)

        return(list(Diff.Score = diff.1ks, "Diff.Score (n < 50 w/ Outliers)" = diff.1sh))
    })



    t <- reactive({
        data <- getData()
        x <- data$x
        y <- data$y

        t.test(x, y, paired = TRUE)
    })



    es <- reactive({
        data <- getData()
        x <- data$x
        y <- data$y

        m1 <- mean(x)
        sd1 <- sd(x)
        n1 <- length(x)

        m2 <- mean(y)
        sd2 <- sd(y)
        n2 <- length(y)

        diff <- x - y

        pooledSD <- sd(diff)
        sediff <- pooledSD / sqrt(length(x))
        paired.d <- abs((m1 - m2) / pooledSD)
        dCI <- d.ci(paired.d, n1 = n1, alpha = .05)

        d <- abs(mean(diff) / (sd(diff) / sqrt(2 * (1 - cor(x, y)))))
        var.d <- ((1 / n1) + ((d^2) / (2 * n1))) * (2 * (1 - cor(x, y)))

        df <- n1 - 1
        j <- 1 - (3 / (4 * df - 1))
        g <- j * d
        var.g <- j^2 * var.d

        # a <- ((n1 + n2)^2)/(n1 * n2)
        # r <- d/sqrt((d^2) + a)
        # var.r <- (a^2 * var.d)/(d^2 + a)^3

        alpha <- 0.05
        crit <- qt(alpha / 2, df, lower.tail = FALSE)

        lower.d <- d - crit * sqrt(var.d)
        upper.d <- d + crit * sqrt(var.d)

        # lower.d2 <- d - 1.96 * sqrt(var.d)
        # upper.d2 <- d + 1.96 * sqrt(var.d)

        lower.g <- g - crit * sqrt(var.g)
        upper.g <- g + crit * sqrt(var.g)

        # lower.g2 <- d - 1.96 * sqrt(var.d)
        # upper.g2 <- d + 1.96 * sqrt(var.d)

        cat("=======================================================", "\n")
        cat(
            " paired.t / sqrt(n):", "\n",
            "(Based on Larkens, 2013, p. 4)", "\n",
            "\n",
            "  d [95% CI] =", dCI[2], "[", dCI[1], ",", dCI[3], "]", "\n",
            "\n"
        )



        cat("=======================================================", "\n")
        cat(
            " Mean difference / within-groups SD:", "\n",
            "(Based on Borenstein et al., 2021, p. 29) ", "\n",
            "\n",
            "  d [95% CI] =", d, "[", lower.d, ",", upper.d, "]", "\n",
            "   var(d) =", var.d, "\n",
            "\n",
            "  g [95% CI] =", g, "[", lower.g, ",", upper.g, "]", "\n",
            "   var(g) =", var.g, "\n",
            "\n"
        )



        delta <- abs(mean(diff) / sd(x))

        cat("=======================================================", "\n")
        cat(
            " Mean difference / SD of Data 1 (e.g., pretest):", "\n",
            "\n",
            "  Delta =", delta, "\n",
            "\n"
        )



        result.dependent <- t.test(x, y, paired = TRUE)
        paired.t <- result.dependent$statistic
        r2 <- sqrt(paired.t^2 / (paired.t^2 + df))
        r2 <- r2[[1]]

        cat("=======================================================", "\n")
        cat(
            " r --- sqrt(paired.t^2/(paired.t^2+df)):", "\n",
            "\n",
            "  r =", r2, "\n",
            "\n"
        )
    })



    wsr <- reactive({
        data <- getData()
        x <- data$x
        y <- data$y

        result <- wilcox.test(x, y, paired = TRUE, correct = FALSE)

        pval <- result$p.value
        z <- qnorm(1 - (pval / 2))

        r1 <- z / sqrt(length(x * 2))

        r2 <- z / sqrt(length(x) - sum((y - x == 0)))

        print(result)

        cat(
            " z-value =", z, "\n",
            "effect size r =", r1, "\n",
            "effect size r (without considering ties) =", r2, "\n"
        )
    })



    power <- reactive({
        data <- getData()
        x <- data$x
        y <- data$y

        diff <- x - y
        d <- mean(diff) / sd(diff)
        n <- length(x)

        posthoc <- power.t.test(n = n, delta = d, sig.level = .05, type = "paired", strict = TRUE)$power

        future <- ceiling(power.t.test(
            power = 0.8, delta = d, sig.level = 0.05,
            type = "paired", strict = TRUE, alternative = "two.sided"
        )$n)

        cat(
            " Post hoc (observed) power =", round(posthoc, 3), "\n",
            "\n",
            "  Note: According to Cumming (2012), post hoc power is 'illegitimate'", "\n",
            "        and we should NEVER calculate or report it.", "\n",
            "\n",
            "\n",
            "Sample size needed for future experiment:", " n =", future, "\n",
            "  Power = 0.8, sig.level = 0.05, alternative = two.sided, d =", round(d, 2), "\n",
            "\n",
            "  Note: This is true only PROVIDED that the population effect size is", "\n",
            "        equal to the observed sample effect size (i.e., it is unrealistic).", "\n",
            "\n",
            "\n",
            "POWER ANALYSIS SHOULD BE CONDUCTED PRIOR TO THE EXPERIMENT!", "\n"
        )
    })



    info <- reactive({
        info1 <- paste("This analysis was conducted with ", strsplit(R.version$version.string, " \\(")[[1]][1], ".", sep = "")
        info2 <- paste("It was executed on ", date(), ".", sep = "")
        cat(sprintf(info1), "\n")
        cat(sprintf(info2), "\n")
    })

    output$info.out <- renderPrint({
        info()
    })



    output$textarea.out <- renderPrint({
        bs()
    })

    output$testnorm.out <- renderPrint({
        testnorm()
    })

    output$diffnorm.out <- renderPrint({
        diffnorm()
    })

    output$t.out <- renderPrint({
        t()
    })

    output$es.out <- renderPrint({
        es()
    })

    output$wsr.out <- renderPrint({
        wsr()
    })

    output$power.out <- renderPrint({
        power()
    })
})

library(shiny)
library(shinyAce)

shinyUI(pageWithSidebar(
    headerPanel("Comparing Paired Samples"),
    sidebarPanel(
        p("Please input data with tab delimiter. You can copy & paste from Excel."),
        checkboxInput("colname", label = strong("First row contains variable names"), value = TRUE),
        aceEditor("text",
            value = "Test.A\tTest.B\n70\t69\n50\t59\n80\t80\n70\t99\n25\t40\n39\t72\n40\t39\n39\t37\n90\t90\n77\t87\n39\t76\n66\t60\n70\t55\n89\t78\n69\t72\n88\t92\n49\t55\n52\t47\n36\t66\n56\t77\n67\t65\n57\t50",
            mode = "r", theme = "chrome", height = "400px"
        )
    ),
    mainPanel(
        tabsetPanel(
            tabPanel(
                "Main",
                h3("Basic statistics"),
                verbatimTextOutput("textarea.out"),
                br(),
                h3("Overlayed histograms"),
                plotOutput("distPlot"),
                h3("Box plots with individual data points"),
                plotOutput("boxPlot", width = "80%"),
                br(),
                h3("Changes of the individual data"),
                plotOutput("indvPlot", width = "70%"),
                br(),
                h3("Scatterplot"),
                plotOutput("correlPlot", width = "70%", height = "500px"),
                br(),
                h3("Test of normality (Each data)"),
                verbatimTextOutput("testnorm.out"),
                br(),
                h3("Distribution of the difference scores"),
                plotOutput("distdiffPlot", width = "70%"),
                br(),
                h3("Checking the normality of difference scores"),
                verbatimTextOutput("diffnorm.out"),
                br(),
                h3("Paired t-test"),
                verbatimTextOutput("t.out"),
                br(),
                h3("Effect size indices"),
                verbatimTextOutput("es.out"),
                p(
                    "Note:", "See", a("Larkens (2013)", href = "https://www.frontiersin.org/articles/10.3389/fpsyg.2013.00863/full", target = "_blank"),
                    "and", a("Borenstein et al. (2021)", href = "https://books.google.co.jp/books?id=pdQnEAAAQBAJ&pg=PA28&lpg=PA28&dq=%22when+working+with+a+matched+study%22&source=bl&ots=WEVovSfIR4&sig=ACfU3U2npV7DO3ZDCiAuIxnVJNmgz9PL-w&hl=ja&sa=X&ved=2ahUKEwizjrHqsZDzAhVKCN4KHQFgBuMQ6AF6BAgDEAM#v=onepage&q=%22when%20working%20with%20a%20matched%20study%22&f=false", target = "_blank"), "for formulas.", br(),
                    "The formula [paired.t / sqrt(n)] is used in", a("G*Power,", href = "https://www.psychologie.hhu.de/arbeitsgruppen/allgemeine-psychologie-und-arbeitspsychologie/gpower", target = "_blank"), a("JASP,", href = "https://jasp-stats.org/", target = "_blank"), "and", a("jamovi.", href = "https://www.jamovi.org/", target = "_blank"), br(),
                    "[Mean difference / within-groups SD] is used in", a("Comprehensive Meta-Analysis,", href = "https://www.meta-analysis.com/", target = "_blank")
                ),
                br(),
                h3("Wilcoxon signed-rank test"),
                verbatimTextOutput("wsr.out"),
                br(),
                h3("Power analysis (Just for a reference)"),
                verbatimTextOutput("power.out"),
                br(),
                br(),
                strong("R session info"),
                verbatimTextOutput("info.out")
            ),
            tabPanel(
                "About",
                strong("Note"),
                p(
                    "This web application is developed with",
                    a("Shiny.", href = "http://www.rstudio.com/shiny/", target = "_blank"),
                    ""
                ),
                br(),
                strong("List of Packages Used"), br(),
                code("library(shiny)"), br(),
                code("library(shinyAce)"), br(),
                code("library(psych)"), br(),
                code("library(car)"), br(),
                code("library(compute.es)"), br(),
                code("library(pwr)"), br(),
                code("library(lattice)"), br(),
                code("library(latticeExtra)"), br(),
                code("library(beeswarm)"), br(),
                br(),
                strong("Code"),
                p(
                    "Source code for this application is based on",
                    a('"The handbook of Research in Foreign Language Learning and Teaching" (Takeuchi & Mizumoto, 2012).', href = "http://mizumot.com/handbook/", target = "_blank")
                ),
                p(
                    "The code for this web application is available at",
                    a("GitHub.", href = "https://github.com/mizumot/paired", target = "_blank")
                ),
                p(
                    "If you want to run this code on your computer (in a local R session), run the code below:",
                    br(),
                    code("library(shiny)"), br(),
                    code('runGitHub("paired","mizumot")')
                ),
                br(),
                strong("Citation in Publications"),
                p("Mizumoto, A. (2015). Langtest (Version 1.0) [Web application]. Retrieved from http://langtest.jp"),
                br(),
                strong("Article"),
                p("Mizumoto, A., & Plonsky, L. (2015).", a("R as a lingua franca: Advantages of using R for quantitative research in applied linguistics.", href = "http://applij.oxfordjournals.org/content/early/2015/06/24/applin.amv025.abstract", target = "_blank"), em("Applied Linguistics,"), "Advance online publication. doi:10.1093/applin/amv025"),
                br(),
                strong("Author"),
                p(
                    a("Atsushi MIZUMOTO,", href = "http://mizumot.com", target = "_blank"), " Ph.D.", br(),
                    "Professor of Applied Linguistics", br(),
                    "Faculty of Foreign Language Studies /", br(),
                    "Graduate School of Foreign Language Education and Research,", br(),
                    "Kansai University, Osaka, Japan"
                ),
                br(),
                a(img(src = "http://i.creativecommons.org/p/mark/1.0/80x15.png"), target = "_blank", href = "http://creativecommons.org/publicdomain/mark/1.0/")
            )
        )
    )
))

Code license: GPL-3

Comparing Paired Samples

Basic statistics

Overlayed histograms

Box plots with individual data points

Changes of the individual data

Scatterplot

Test of normality (Each data)

Distribution of the difference scores

Checking the normality of difference scores

Paired t-test

Effect size indices

Wilcoxon signed-rank test

Power analysis (Just for a reference)

Code for "Comparing Paired Samples" by Atsushi Mizumoto

Code for "Comparing Paired Samples"
by Atsushi Mizumoto